e1000: flush packets when link comes up
[qemu/ar7.git] / hw / net / e1000.c
blob5c6bcd001491f6a50ebe09394e66b37026af5fc3
1 /*
2 * QEMU e1000 emulation
4 * Software developer's manual:
5 * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
7 * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8 * Copyright (c) 2008 Qumranet
9 * Based on work done by:
10 * Copyright (c) 2007 Dan Aloni
11 * Copyright (c) 2004 Antony T Curtis
13 * This library is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU Lesser General Public
15 * License as published by the Free Software Foundation; either
16 * version 2 of the License, or (at your option) any later version.
18 * This library is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 * Lesser General Public License for more details.
23 * You should have received a copy of the GNU Lesser General Public
24 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
28 #include "hw/hw.h"
29 #include "hw/pci/pci.h"
30 #include "net/net.h"
31 #include "net/checksum.h"
32 #include "hw/loader.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/dma.h"
35 #include "qemu/iov.h"
36 #include "qemu/range.h"
38 #include "e1000_regs.h"
40 #define E1000_DEBUG
42 #ifdef E1000_DEBUG
43 enum {
44 DEBUG_GENERAL, DEBUG_IO, DEBUG_MMIO, DEBUG_INTERRUPT,
45 DEBUG_RX, DEBUG_TX, DEBUG_MDIC, DEBUG_EEPROM,
46 DEBUG_UNKNOWN, DEBUG_TXSUM, DEBUG_TXERR, DEBUG_RXERR,
47 DEBUG_RXFILTER, DEBUG_PHY, DEBUG_NOTYET,
49 #define DBGBIT(x) (1<<DEBUG_##x)
50 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
52 #define DBGOUT(what, fmt, ...) do { \
53 if (debugflags & DBGBIT(what)) \
54 fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
55 } while (0)
56 #else
57 #define DBGOUT(what, fmt, ...) do {} while (0)
58 #endif
60 #define IOPORT_SIZE 0x40
61 #define PNPMMIO_SIZE 0x20000
62 #define MIN_BUF_SIZE 60 /* Min. octets in an ethernet frame sans FCS */
64 /* this is the size past which hardware will drop packets when setting LPE=0 */
65 #define MAXIMUM_ETHERNET_VLAN_SIZE 1522
66 /* this is the size past which hardware will drop packets when setting LPE=1 */
67 #define MAXIMUM_ETHERNET_LPE_SIZE 16384
69 #define MAXIMUM_ETHERNET_HDR_LEN (14+4)
72 * HW models:
73 * E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
74 * E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
75 * E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
76 * Others never tested
79 typedef struct E1000State_st {
80 /*< private >*/
81 PCIDevice parent_obj;
82 /*< public >*/
84 NICState *nic;
85 NICConf conf;
86 MemoryRegion mmio;
87 MemoryRegion io;
89 uint32_t mac_reg[0x8000];
90 uint16_t phy_reg[0x20];
91 uint16_t eeprom_data[64];
93 uint32_t rxbuf_size;
94 uint32_t rxbuf_min_shift;
95 struct e1000_tx {
96 unsigned char header[256];
97 unsigned char vlan_header[4];
98 /* Fields vlan and data must not be reordered or separated. */
99 unsigned char vlan[4];
100 unsigned char data[0x10000];
101 uint16_t size;
102 unsigned char sum_needed;
103 unsigned char vlan_needed;
104 uint8_t ipcss;
105 uint8_t ipcso;
106 uint16_t ipcse;
107 uint8_t tucss;
108 uint8_t tucso;
109 uint16_t tucse;
110 uint8_t hdr_len;
111 uint16_t mss;
112 uint32_t paylen;
113 uint16_t tso_frames;
114 char tse;
115 int8_t ip;
116 int8_t tcp;
117 char cptse; // current packet tse bit
118 } tx;
120 struct {
121 uint32_t val_in; // shifted in from guest driver
122 uint16_t bitnum_in;
123 uint16_t bitnum_out;
124 uint16_t reading;
125 uint32_t old_eecd;
126 } eecd_state;
128 QEMUTimer *autoneg_timer;
130 QEMUTimer *mit_timer; /* Mitigation timer. */
131 bool mit_timer_on; /* Mitigation timer is running. */
132 bool mit_irq_level; /* Tracks interrupt pin level. */
133 uint32_t mit_ide; /* Tracks E1000_TXD_CMD_IDE bit. */
135 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
136 #define E1000_FLAG_AUTONEG_BIT 0
137 #define E1000_FLAG_MIT_BIT 1
138 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
139 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
140 uint32_t compat_flags;
141 } E1000State;
143 typedef struct E1000BaseClass {
144 PCIDeviceClass parent_class;
145 uint16_t phy_id2;
146 } E1000BaseClass;
148 #define TYPE_E1000_BASE "e1000-base"
150 #define E1000(obj) \
151 OBJECT_CHECK(E1000State, (obj), TYPE_E1000_BASE)
153 #define E1000_DEVICE_CLASS(klass) \
154 OBJECT_CLASS_CHECK(E1000BaseClass, (klass), TYPE_E1000_BASE)
155 #define E1000_DEVICE_GET_CLASS(obj) \
156 OBJECT_GET_CLASS(E1000BaseClass, (obj), TYPE_E1000_BASE)
158 #define defreg(x) x = (E1000_##x>>2)
159 enum {
160 defreg(CTRL), defreg(EECD), defreg(EERD), defreg(GPRC),
161 defreg(GPTC), defreg(ICR), defreg(ICS), defreg(IMC),
162 defreg(IMS), defreg(LEDCTL), defreg(MANC), defreg(MDIC),
163 defreg(MPC), defreg(PBA), defreg(RCTL), defreg(RDBAH),
164 defreg(RDBAL), defreg(RDH), defreg(RDLEN), defreg(RDT),
165 defreg(STATUS), defreg(SWSM), defreg(TCTL), defreg(TDBAH),
166 defreg(TDBAL), defreg(TDH), defreg(TDLEN), defreg(TDT),
167 defreg(TORH), defreg(TORL), defreg(TOTH), defreg(TOTL),
168 defreg(TPR), defreg(TPT), defreg(TXDCTL), defreg(WUFC),
169 defreg(RA), defreg(MTA), defreg(CRCERRS),defreg(VFTA),
170 defreg(VET), defreg(RDTR), defreg(RADV), defreg(TADV),
171 defreg(ITR),
174 static void
175 e1000_link_down(E1000State *s)
177 s->mac_reg[STATUS] &= ~E1000_STATUS_LU;
178 s->phy_reg[PHY_STATUS] &= ~MII_SR_LINK_STATUS;
179 s->phy_reg[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE;
180 s->phy_reg[PHY_LP_ABILITY] &= ~MII_LPAR_LPACK;
183 static void
184 e1000_link_up(E1000State *s)
186 s->mac_reg[STATUS] |= E1000_STATUS_LU;
187 s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS;
189 /* E1000_STATUS_LU is tested by e1000_can_receive() */
190 qemu_flush_queued_packets(qemu_get_queue(s->nic));
193 static bool
194 have_autoneg(E1000State *s)
196 return (s->compat_flags & E1000_FLAG_AUTONEG) &&
197 (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN);
200 static void
201 set_phy_ctrl(E1000State *s, int index, uint16_t val)
203 /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */
204 s->phy_reg[PHY_CTRL] = val & ~(0x3f |
205 MII_CR_RESET |
206 MII_CR_RESTART_AUTO_NEG);
209 * QEMU 1.3 does not support link auto-negotiation emulation, so if we
210 * migrate during auto negotiation, after migration the link will be
211 * down.
213 if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) {
214 e1000_link_down(s);
215 DBGOUT(PHY, "Start link auto negotiation\n");
216 timer_mod(s->autoneg_timer,
217 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
221 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
222 [PHY_CTRL] = set_phy_ctrl,
225 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
227 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
228 static const char phy_regcap[0x20] = {
229 [PHY_STATUS] = PHY_R, [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
230 [PHY_ID1] = PHY_R, [M88E1000_PHY_SPEC_CTRL] = PHY_RW,
231 [PHY_CTRL] = PHY_RW, [PHY_1000T_CTRL] = PHY_RW,
232 [PHY_LP_ABILITY] = PHY_R, [PHY_1000T_STATUS] = PHY_R,
233 [PHY_AUTONEG_ADV] = PHY_RW, [M88E1000_RX_ERR_CNTR] = PHY_R,
234 [PHY_ID2] = PHY_R, [M88E1000_PHY_SPEC_STATUS] = PHY_R,
235 [PHY_AUTONEG_EXP] = PHY_R,
238 /* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
239 static const uint16_t phy_reg_init[] = {
240 [PHY_CTRL] = MII_CR_SPEED_SELECT_MSB |
241 MII_CR_FULL_DUPLEX |
242 MII_CR_AUTO_NEG_EN,
244 [PHY_STATUS] = MII_SR_EXTENDED_CAPS |
245 MII_SR_LINK_STATUS | /* link initially up */
246 MII_SR_AUTONEG_CAPS |
247 /* MII_SR_AUTONEG_COMPLETE: initially NOT completed */
248 MII_SR_PREAMBLE_SUPPRESS |
249 MII_SR_EXTENDED_STATUS |
250 MII_SR_10T_HD_CAPS |
251 MII_SR_10T_FD_CAPS |
252 MII_SR_100X_HD_CAPS |
253 MII_SR_100X_FD_CAPS,
255 [PHY_ID1] = 0x141,
256 /* [PHY_ID2] configured per DevId, from e1000_reset() */
257 [PHY_AUTONEG_ADV] = 0xde1,
258 [PHY_LP_ABILITY] = 0x1e0,
259 [PHY_1000T_CTRL] = 0x0e00,
260 [PHY_1000T_STATUS] = 0x3c00,
261 [M88E1000_PHY_SPEC_CTRL] = 0x360,
262 [M88E1000_PHY_SPEC_STATUS] = 0xac00,
263 [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
266 static const uint32_t mac_reg_init[] = {
267 [PBA] = 0x00100030,
268 [LEDCTL] = 0x602,
269 [CTRL] = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
270 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
271 [STATUS] = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
272 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
273 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
274 E1000_STATUS_LU,
275 [MANC] = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
276 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
277 E1000_MANC_RMCP_EN,
280 /* Helper function, *curr == 0 means the value is not set */
281 static inline void
282 mit_update_delay(uint32_t *curr, uint32_t value)
284 if (value && (*curr == 0 || value < *curr)) {
285 *curr = value;
289 static void
290 set_interrupt_cause(E1000State *s, int index, uint32_t val)
292 PCIDevice *d = PCI_DEVICE(s);
293 uint32_t pending_ints;
294 uint32_t mit_delay;
296 s->mac_reg[ICR] = val;
299 * Make sure ICR and ICS registers have the same value.
300 * The spec says that the ICS register is write-only. However in practice,
301 * on real hardware ICS is readable, and for reads it has the same value as
302 * ICR (except that ICS does not have the clear on read behaviour of ICR).
304 * The VxWorks PRO/1000 driver uses this behaviour.
306 s->mac_reg[ICS] = val;
308 pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
309 if (!s->mit_irq_level && pending_ints) {
311 * Here we detect a potential raising edge. We postpone raising the
312 * interrupt line if we are inside the mitigation delay window
313 * (s->mit_timer_on == 1).
314 * We provide a partial implementation of interrupt mitigation,
315 * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
316 * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
317 * RADV; relative timers based on TIDV and RDTR are not implemented.
319 if (s->mit_timer_on) {
320 return;
322 if (s->compat_flags & E1000_FLAG_MIT) {
323 /* Compute the next mitigation delay according to pending
324 * interrupts and the current values of RADV (provided
325 * RDTR!=0), TADV and ITR.
326 * Then rearm the timer.
328 mit_delay = 0;
329 if (s->mit_ide &&
330 (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
331 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
333 if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
334 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
336 mit_update_delay(&mit_delay, s->mac_reg[ITR]);
338 if (mit_delay) {
339 s->mit_timer_on = 1;
340 timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
341 mit_delay * 256);
343 s->mit_ide = 0;
347 s->mit_irq_level = (pending_ints != 0);
348 pci_set_irq(d, s->mit_irq_level);
351 static void
352 e1000_mit_timer(void *opaque)
354 E1000State *s = opaque;
356 s->mit_timer_on = 0;
357 /* Call set_interrupt_cause to update the irq level (if necessary). */
358 set_interrupt_cause(s, 0, s->mac_reg[ICR]);
361 static void
362 set_ics(E1000State *s, int index, uint32_t val)
364 DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
365 s->mac_reg[IMS]);
366 set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
369 static void
370 e1000_autoneg_timer(void *opaque)
372 E1000State *s = opaque;
373 if (!qemu_get_queue(s->nic)->link_down) {
374 e1000_link_up(s);
375 s->phy_reg[PHY_LP_ABILITY] |= MII_LPAR_LPACK;
376 s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
377 DBGOUT(PHY, "Auto negotiation is completed\n");
378 set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
382 static int
383 rxbufsize(uint32_t v)
385 v &= E1000_RCTL_BSEX | E1000_RCTL_SZ_16384 | E1000_RCTL_SZ_8192 |
386 E1000_RCTL_SZ_4096 | E1000_RCTL_SZ_2048 | E1000_RCTL_SZ_1024 |
387 E1000_RCTL_SZ_512 | E1000_RCTL_SZ_256;
388 switch (v) {
389 case E1000_RCTL_BSEX | E1000_RCTL_SZ_16384:
390 return 16384;
391 case E1000_RCTL_BSEX | E1000_RCTL_SZ_8192:
392 return 8192;
393 case E1000_RCTL_BSEX | E1000_RCTL_SZ_4096:
394 return 4096;
395 case E1000_RCTL_SZ_1024:
396 return 1024;
397 case E1000_RCTL_SZ_512:
398 return 512;
399 case E1000_RCTL_SZ_256:
400 return 256;
402 return 2048;
405 static void e1000_reset(void *opaque)
407 E1000State *d = opaque;
408 E1000BaseClass *edc = E1000_DEVICE_GET_CLASS(d);
409 uint8_t *macaddr = d->conf.macaddr.a;
410 int i;
412 timer_del(d->autoneg_timer);
413 timer_del(d->mit_timer);
414 d->mit_timer_on = 0;
415 d->mit_irq_level = 0;
416 d->mit_ide = 0;
417 memset(d->phy_reg, 0, sizeof d->phy_reg);
418 memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
419 d->phy_reg[PHY_ID2] = edc->phy_id2;
420 memset(d->mac_reg, 0, sizeof d->mac_reg);
421 memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
422 d->rxbuf_min_shift = 1;
423 memset(&d->tx, 0, sizeof d->tx);
425 if (qemu_get_queue(d->nic)->link_down) {
426 e1000_link_down(d);
429 /* Some guests expect pre-initialized RAH/RAL (AddrValid flag + MACaddr) */
430 d->mac_reg[RA] = 0;
431 d->mac_reg[RA + 1] = E1000_RAH_AV;
432 for (i = 0; i < 4; i++) {
433 d->mac_reg[RA] |= macaddr[i] << (8 * i);
434 d->mac_reg[RA + 1] |= (i < 2) ? macaddr[i + 4] << (8 * i) : 0;
436 qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
439 static void
440 set_ctrl(E1000State *s, int index, uint32_t val)
442 /* RST is self clearing */
443 s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
446 static void
447 set_rx_control(E1000State *s, int index, uint32_t val)
449 s->mac_reg[RCTL] = val;
450 s->rxbuf_size = rxbufsize(val);
451 s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
452 DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
453 s->mac_reg[RCTL]);
454 qemu_flush_queued_packets(qemu_get_queue(s->nic));
457 static void
458 set_mdic(E1000State *s, int index, uint32_t val)
460 uint32_t data = val & E1000_MDIC_DATA_MASK;
461 uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
463 if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
464 val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
465 else if (val & E1000_MDIC_OP_READ) {
466 DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
467 if (!(phy_regcap[addr] & PHY_R)) {
468 DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
469 val |= E1000_MDIC_ERROR;
470 } else
471 val = (val ^ data) | s->phy_reg[addr];
472 } else if (val & E1000_MDIC_OP_WRITE) {
473 DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
474 if (!(phy_regcap[addr] & PHY_W)) {
475 DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
476 val |= E1000_MDIC_ERROR;
477 } else {
478 if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
479 phyreg_writeops[addr](s, index, data);
480 } else {
481 s->phy_reg[addr] = data;
485 s->mac_reg[MDIC] = val | E1000_MDIC_READY;
487 if (val & E1000_MDIC_INT_EN) {
488 set_ics(s, 0, E1000_ICR_MDAC);
492 static uint32_t
493 get_eecd(E1000State *s, int index)
495 uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
497 DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
498 s->eecd_state.bitnum_out, s->eecd_state.reading);
499 if (!s->eecd_state.reading ||
500 ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
501 ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
502 ret |= E1000_EECD_DO;
503 return ret;
506 static void
507 set_eecd(E1000State *s, int index, uint32_t val)
509 uint32_t oldval = s->eecd_state.old_eecd;
511 s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
512 E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
513 if (!(E1000_EECD_CS & val)) // CS inactive; nothing to do
514 return;
515 if (E1000_EECD_CS & (val ^ oldval)) { // CS rise edge; reset state
516 s->eecd_state.val_in = 0;
517 s->eecd_state.bitnum_in = 0;
518 s->eecd_state.bitnum_out = 0;
519 s->eecd_state.reading = 0;
521 if (!(E1000_EECD_SK & (val ^ oldval))) // no clock edge
522 return;
523 if (!(E1000_EECD_SK & val)) { // falling edge
524 s->eecd_state.bitnum_out++;
525 return;
527 s->eecd_state.val_in <<= 1;
528 if (val & E1000_EECD_DI)
529 s->eecd_state.val_in |= 1;
530 if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
531 s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
532 s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
533 EEPROM_READ_OPCODE_MICROWIRE);
535 DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
536 s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
537 s->eecd_state.reading);
540 static uint32_t
541 flash_eerd_read(E1000State *s, int x)
543 unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
545 if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
546 return (s->mac_reg[EERD]);
548 if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
549 return (E1000_EEPROM_RW_REG_DONE | r);
551 return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
552 E1000_EEPROM_RW_REG_DONE | r);
555 static void
556 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
558 uint32_t sum;
560 if (cse && cse < n)
561 n = cse + 1;
562 if (sloc < n-1) {
563 sum = net_checksum_add(n-css, data+css);
564 stw_be_p(data + sloc, net_checksum_finish(sum));
568 static inline int
569 vlan_enabled(E1000State *s)
571 return ((s->mac_reg[CTRL] & E1000_CTRL_VME) != 0);
574 static inline int
575 vlan_rx_filter_enabled(E1000State *s)
577 return ((s->mac_reg[RCTL] & E1000_RCTL_VFE) != 0);
580 static inline int
581 is_vlan_packet(E1000State *s, const uint8_t *buf)
583 return (be16_to_cpup((uint16_t *)(buf + 12)) ==
584 le16_to_cpu(s->mac_reg[VET]));
587 static inline int
588 is_vlan_txd(uint32_t txd_lower)
590 return ((txd_lower & E1000_TXD_CMD_VLE) != 0);
593 /* FCS aka Ethernet CRC-32. We don't get it from backends and can't
594 * fill it in, just pad descriptor length by 4 bytes unless guest
595 * told us to strip it off the packet. */
596 static inline int
597 fcs_len(E1000State *s)
599 return (s->mac_reg[RCTL] & E1000_RCTL_SECRC) ? 0 : 4;
602 static void
603 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
605 NetClientState *nc = qemu_get_queue(s->nic);
606 if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
607 nc->info->receive(nc, buf, size);
608 } else {
609 qemu_send_packet(nc, buf, size);
613 static void
614 xmit_seg(E1000State *s)
616 uint16_t len, *sp;
617 unsigned int frames = s->tx.tso_frames, css, sofar, n;
618 struct e1000_tx *tp = &s->tx;
620 if (tp->tse && tp->cptse) {
621 css = tp->ipcss;
622 DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
623 frames, tp->size, css);
624 if (tp->ip) { // IPv4
625 stw_be_p(tp->data+css+2, tp->size - css);
626 stw_be_p(tp->data+css+4,
627 be16_to_cpup((uint16_t *)(tp->data+css+4))+frames);
628 } else // IPv6
629 stw_be_p(tp->data+css+4, tp->size - css);
630 css = tp->tucss;
631 len = tp->size - css;
632 DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->tcp, css, len);
633 if (tp->tcp) {
634 sofar = frames * tp->mss;
635 stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
636 if (tp->paylen - sofar > tp->mss)
637 tp->data[css + 13] &= ~9; // PSH, FIN
638 } else // UDP
639 stw_be_p(tp->data+css+4, len);
640 if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
641 unsigned int phsum;
642 // add pseudo-header length before checksum calculation
643 sp = (uint16_t *)(tp->data + tp->tucso);
644 phsum = be16_to_cpup(sp) + len;
645 phsum = (phsum >> 16) + (phsum & 0xffff);
646 stw_be_p(sp, phsum);
648 tp->tso_frames++;
651 if (tp->sum_needed & E1000_TXD_POPTS_TXSM)
652 putsum(tp->data, tp->size, tp->tucso, tp->tucss, tp->tucse);
653 if (tp->sum_needed & E1000_TXD_POPTS_IXSM)
654 putsum(tp->data, tp->size, tp->ipcso, tp->ipcss, tp->ipcse);
655 if (tp->vlan_needed) {
656 memmove(tp->vlan, tp->data, 4);
657 memmove(tp->data, tp->data + 4, 8);
658 memcpy(tp->data + 8, tp->vlan_header, 4);
659 e1000_send_packet(s, tp->vlan, tp->size + 4);
660 } else
661 e1000_send_packet(s, tp->data, tp->size);
662 s->mac_reg[TPT]++;
663 s->mac_reg[GPTC]++;
664 n = s->mac_reg[TOTL];
665 if ((s->mac_reg[TOTL] += s->tx.size) < n)
666 s->mac_reg[TOTH]++;
669 static void
670 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
672 PCIDevice *d = PCI_DEVICE(s);
673 uint32_t txd_lower = le32_to_cpu(dp->lower.data);
674 uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
675 unsigned int split_size = txd_lower & 0xffff, bytes, sz, op;
676 unsigned int msh = 0xfffff;
677 uint64_t addr;
678 struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
679 struct e1000_tx *tp = &s->tx;
681 s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
682 if (dtype == E1000_TXD_CMD_DEXT) { // context descriptor
683 op = le32_to_cpu(xp->cmd_and_length);
684 tp->ipcss = xp->lower_setup.ip_fields.ipcss;
685 tp->ipcso = xp->lower_setup.ip_fields.ipcso;
686 tp->ipcse = le16_to_cpu(xp->lower_setup.ip_fields.ipcse);
687 tp->tucss = xp->upper_setup.tcp_fields.tucss;
688 tp->tucso = xp->upper_setup.tcp_fields.tucso;
689 tp->tucse = le16_to_cpu(xp->upper_setup.tcp_fields.tucse);
690 tp->paylen = op & 0xfffff;
691 tp->hdr_len = xp->tcp_seg_setup.fields.hdr_len;
692 tp->mss = le16_to_cpu(xp->tcp_seg_setup.fields.mss);
693 tp->ip = (op & E1000_TXD_CMD_IP) ? 1 : 0;
694 tp->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0;
695 tp->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0;
696 tp->tso_frames = 0;
697 if (tp->tucso == 0) { // this is probably wrong
698 DBGOUT(TXSUM, "TCP/UDP: cso 0!\n");
699 tp->tucso = tp->tucss + (tp->tcp ? 16 : 6);
701 return;
702 } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
703 // data descriptor
704 if (tp->size == 0) {
705 tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
707 tp->cptse = ( txd_lower & E1000_TXD_CMD_TSE ) ? 1 : 0;
708 } else {
709 // legacy descriptor
710 tp->cptse = 0;
713 if (vlan_enabled(s) && is_vlan_txd(txd_lower) &&
714 (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
715 tp->vlan_needed = 1;
716 stw_be_p(tp->vlan_header,
717 le16_to_cpu(s->mac_reg[VET]));
718 stw_be_p(tp->vlan_header + 2,
719 le16_to_cpu(dp->upper.fields.special));
722 addr = le64_to_cpu(dp->buffer_addr);
723 if (tp->tse && tp->cptse) {
724 msh = tp->hdr_len + tp->mss;
725 do {
726 bytes = split_size;
727 if (tp->size + bytes > msh)
728 bytes = msh - tp->size;
730 bytes = MIN(sizeof(tp->data) - tp->size, bytes);
731 pci_dma_read(d, addr, tp->data + tp->size, bytes);
732 sz = tp->size + bytes;
733 if (sz >= tp->hdr_len && tp->size < tp->hdr_len) {
734 memmove(tp->header, tp->data, tp->hdr_len);
736 tp->size = sz;
737 addr += bytes;
738 if (sz == msh) {
739 xmit_seg(s);
740 memmove(tp->data, tp->header, tp->hdr_len);
741 tp->size = tp->hdr_len;
743 } while (split_size -= bytes);
744 } else if (!tp->tse && tp->cptse) {
745 // context descriptor TSE is not set, while data descriptor TSE is set
746 DBGOUT(TXERR, "TCP segmentation error\n");
747 } else {
748 split_size = MIN(sizeof(tp->data) - tp->size, split_size);
749 pci_dma_read(d, addr, tp->data + tp->size, split_size);
750 tp->size += split_size;
753 if (!(txd_lower & E1000_TXD_CMD_EOP))
754 return;
755 if (!(tp->tse && tp->cptse && tp->size < tp->hdr_len)) {
756 xmit_seg(s);
758 tp->tso_frames = 0;
759 tp->sum_needed = 0;
760 tp->vlan_needed = 0;
761 tp->size = 0;
762 tp->cptse = 0;
765 static uint32_t
766 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
768 PCIDevice *d = PCI_DEVICE(s);
769 uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
771 if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
772 return 0;
773 txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
774 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
775 dp->upper.data = cpu_to_le32(txd_upper);
776 pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
777 &dp->upper, sizeof(dp->upper));
778 return E1000_ICR_TXDW;
781 static uint64_t tx_desc_base(E1000State *s)
783 uint64_t bah = s->mac_reg[TDBAH];
784 uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
786 return (bah << 32) + bal;
789 static void
790 start_xmit(E1000State *s)
792 PCIDevice *d = PCI_DEVICE(s);
793 dma_addr_t base;
794 struct e1000_tx_desc desc;
795 uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
797 if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
798 DBGOUT(TX, "tx disabled\n");
799 return;
802 while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
803 base = tx_desc_base(s) +
804 sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
805 pci_dma_read(d, base, &desc, sizeof(desc));
807 DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
808 (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
809 desc.upper.data);
811 process_tx_desc(s, &desc);
812 cause |= txdesc_writeback(s, base, &desc);
814 if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
815 s->mac_reg[TDH] = 0;
817 * the following could happen only if guest sw assigns
818 * bogus values to TDT/TDLEN.
819 * there's nothing too intelligent we could do about this.
821 if (s->mac_reg[TDH] == tdh_start) {
822 DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
823 tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
824 break;
827 set_ics(s, 0, cause);
830 static int
831 receive_filter(E1000State *s, const uint8_t *buf, int size)
833 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
834 static const int mta_shift[] = {4, 3, 2, 0};
835 uint32_t f, rctl = s->mac_reg[RCTL], ra[2], *rp;
837 if (is_vlan_packet(s, buf) && vlan_rx_filter_enabled(s)) {
838 uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14));
839 uint32_t vfta = le32_to_cpup((uint32_t *)(s->mac_reg + VFTA) +
840 ((vid >> 5) & 0x7f));
841 if ((vfta & (1 << (vid & 0x1f))) == 0)
842 return 0;
845 if (rctl & E1000_RCTL_UPE) // promiscuous
846 return 1;
848 if ((buf[0] & 1) && (rctl & E1000_RCTL_MPE)) // promiscuous mcast
849 return 1;
851 if ((rctl & E1000_RCTL_BAM) && !memcmp(buf, bcast, sizeof bcast))
852 return 1;
854 for (rp = s->mac_reg + RA; rp < s->mac_reg + RA + 32; rp += 2) {
855 if (!(rp[1] & E1000_RAH_AV))
856 continue;
857 ra[0] = cpu_to_le32(rp[0]);
858 ra[1] = cpu_to_le32(rp[1]);
859 if (!memcmp(buf, (uint8_t *)ra, 6)) {
860 DBGOUT(RXFILTER,
861 "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x\n",
862 (int)(rp - s->mac_reg - RA)/2,
863 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
864 return 1;
867 DBGOUT(RXFILTER, "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x\n",
868 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
870 f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
871 f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff;
872 if (s->mac_reg[MTA + (f >> 5)] & (1 << (f & 0x1f)))
873 return 1;
874 DBGOUT(RXFILTER,
875 "dropping, inexact filter mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] %x\n",
876 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5],
877 (rctl >> E1000_RCTL_MO_SHIFT) & 3, f >> 5,
878 s->mac_reg[MTA + (f >> 5)]);
880 return 0;
883 static void
884 e1000_set_link_status(NetClientState *nc)
886 E1000State *s = qemu_get_nic_opaque(nc);
887 uint32_t old_status = s->mac_reg[STATUS];
889 if (nc->link_down) {
890 e1000_link_down(s);
891 } else {
892 if (have_autoneg(s) &&
893 !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
894 /* emulate auto-negotiation if supported */
895 timer_mod(s->autoneg_timer,
896 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
897 } else {
898 e1000_link_up(s);
902 if (s->mac_reg[STATUS] != old_status)
903 set_ics(s, 0, E1000_ICR_LSC);
906 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
908 int bufs;
909 /* Fast-path short packets */
910 if (total_size <= s->rxbuf_size) {
911 return s->mac_reg[RDH] != s->mac_reg[RDT];
913 if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
914 bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
915 } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
916 bufs = s->mac_reg[RDLEN] / sizeof(struct e1000_rx_desc) +
917 s->mac_reg[RDT] - s->mac_reg[RDH];
918 } else {
919 return false;
921 return total_size <= bufs * s->rxbuf_size;
924 static int
925 e1000_can_receive(NetClientState *nc)
927 E1000State *s = qemu_get_nic_opaque(nc);
929 return (s->mac_reg[STATUS] & E1000_STATUS_LU) &&
930 (s->mac_reg[RCTL] & E1000_RCTL_EN) &&
931 (s->parent_obj.config[PCI_COMMAND] & PCI_COMMAND_MASTER) &&
932 e1000_has_rxbufs(s, 1);
935 static uint64_t rx_desc_base(E1000State *s)
937 uint64_t bah = s->mac_reg[RDBAH];
938 uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
940 return (bah << 32) + bal;
943 static ssize_t
944 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
946 E1000State *s = qemu_get_nic_opaque(nc);
947 PCIDevice *d = PCI_DEVICE(s);
948 struct e1000_rx_desc desc;
949 dma_addr_t base;
950 unsigned int n, rdt;
951 uint32_t rdh_start;
952 uint16_t vlan_special = 0;
953 uint8_t vlan_status = 0;
954 uint8_t min_buf[MIN_BUF_SIZE];
955 struct iovec min_iov;
956 uint8_t *filter_buf = iov->iov_base;
957 size_t size = iov_size(iov, iovcnt);
958 size_t iov_ofs = 0;
959 size_t desc_offset;
960 size_t desc_size;
961 size_t total_size;
963 if (!(s->mac_reg[STATUS] & E1000_STATUS_LU)) {
964 return -1;
967 if (!(s->mac_reg[RCTL] & E1000_RCTL_EN)) {
968 return -1;
971 /* Pad to minimum Ethernet frame length */
972 if (size < sizeof(min_buf)) {
973 iov_to_buf(iov, iovcnt, 0, min_buf, size);
974 memset(&min_buf[size], 0, sizeof(min_buf) - size);
975 min_iov.iov_base = filter_buf = min_buf;
976 min_iov.iov_len = size = sizeof(min_buf);
977 iovcnt = 1;
978 iov = &min_iov;
979 } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
980 /* This is very unlikely, but may happen. */
981 iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
982 filter_buf = min_buf;
985 /* Discard oversized packets if !LPE and !SBP. */
986 if ((size > MAXIMUM_ETHERNET_LPE_SIZE ||
987 (size > MAXIMUM_ETHERNET_VLAN_SIZE
988 && !(s->mac_reg[RCTL] & E1000_RCTL_LPE)))
989 && !(s->mac_reg[RCTL] & E1000_RCTL_SBP)) {
990 return size;
993 if (!receive_filter(s, filter_buf, size)) {
994 return size;
997 if (vlan_enabled(s) && is_vlan_packet(s, filter_buf)) {
998 vlan_special = cpu_to_le16(be16_to_cpup((uint16_t *)(filter_buf
999 + 14)));
1000 iov_ofs = 4;
1001 if (filter_buf == iov->iov_base) {
1002 memmove(filter_buf + 4, filter_buf, 12);
1003 } else {
1004 iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
1005 while (iov->iov_len <= iov_ofs) {
1006 iov_ofs -= iov->iov_len;
1007 iov++;
1010 vlan_status = E1000_RXD_STAT_VP;
1011 size -= 4;
1014 rdh_start = s->mac_reg[RDH];
1015 desc_offset = 0;
1016 total_size = size + fcs_len(s);
1017 if (!e1000_has_rxbufs(s, total_size)) {
1018 set_ics(s, 0, E1000_ICS_RXO);
1019 return -1;
1021 do {
1022 desc_size = total_size - desc_offset;
1023 if (desc_size > s->rxbuf_size) {
1024 desc_size = s->rxbuf_size;
1026 base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
1027 pci_dma_read(d, base, &desc, sizeof(desc));
1028 desc.special = vlan_special;
1029 desc.status |= (vlan_status | E1000_RXD_STAT_DD);
1030 if (desc.buffer_addr) {
1031 if (desc_offset < size) {
1032 size_t iov_copy;
1033 hwaddr ba = le64_to_cpu(desc.buffer_addr);
1034 size_t copy_size = size - desc_offset;
1035 if (copy_size > s->rxbuf_size) {
1036 copy_size = s->rxbuf_size;
1038 do {
1039 iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
1040 pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
1041 copy_size -= iov_copy;
1042 ba += iov_copy;
1043 iov_ofs += iov_copy;
1044 if (iov_ofs == iov->iov_len) {
1045 iov++;
1046 iov_ofs = 0;
1048 } while (copy_size);
1050 desc_offset += desc_size;
1051 desc.length = cpu_to_le16(desc_size);
1052 if (desc_offset >= total_size) {
1053 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
1054 } else {
1055 /* Guest zeroing out status is not a hardware requirement.
1056 Clear EOP in case guest didn't do it. */
1057 desc.status &= ~E1000_RXD_STAT_EOP;
1059 } else { // as per intel docs; skip descriptors with null buf addr
1060 DBGOUT(RX, "Null RX descriptor!!\n");
1062 pci_dma_write(d, base, &desc, sizeof(desc));
1064 if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
1065 s->mac_reg[RDH] = 0;
1066 /* see comment in start_xmit; same here */
1067 if (s->mac_reg[RDH] == rdh_start) {
1068 DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
1069 rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
1070 set_ics(s, 0, E1000_ICS_RXO);
1071 return -1;
1073 } while (desc_offset < total_size);
1075 s->mac_reg[GPRC]++;
1076 s->mac_reg[TPR]++;
1077 /* TOR - Total Octets Received:
1078 * This register includes bytes received in a packet from the <Destination
1079 * Address> field through the <CRC> field, inclusively.
1081 n = s->mac_reg[TORL] + size + /* Always include FCS length. */ 4;
1082 if (n < s->mac_reg[TORL])
1083 s->mac_reg[TORH]++;
1084 s->mac_reg[TORL] = n;
1086 n = E1000_ICS_RXT0;
1087 if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1088 rdt += s->mac_reg[RDLEN] / sizeof(desc);
1089 if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1090 s->rxbuf_min_shift)
1091 n |= E1000_ICS_RXDMT0;
1093 set_ics(s, 0, n);
1095 return size;
1098 static ssize_t
1099 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1101 const struct iovec iov = {
1102 .iov_base = (uint8_t *)buf,
1103 .iov_len = size
1106 return e1000_receive_iov(nc, &iov, 1);
1109 static uint32_t
1110 mac_readreg(E1000State *s, int index)
1112 return s->mac_reg[index];
1115 static uint32_t
1116 mac_icr_read(E1000State *s, int index)
1118 uint32_t ret = s->mac_reg[ICR];
1120 DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1121 set_interrupt_cause(s, 0, 0);
1122 return ret;
1125 static uint32_t
1126 mac_read_clr4(E1000State *s, int index)
1128 uint32_t ret = s->mac_reg[index];
1130 s->mac_reg[index] = 0;
1131 return ret;
1134 static uint32_t
1135 mac_read_clr8(E1000State *s, int index)
1137 uint32_t ret = s->mac_reg[index];
1139 s->mac_reg[index] = 0;
1140 s->mac_reg[index-1] = 0;
1141 return ret;
1144 static void
1145 mac_writereg(E1000State *s, int index, uint32_t val)
1147 uint32_t macaddr[2];
1149 s->mac_reg[index] = val;
1151 if (index == RA + 1) {
1152 macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1153 macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1154 qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1158 static void
1159 set_rdt(E1000State *s, int index, uint32_t val)
1161 s->mac_reg[index] = val & 0xffff;
1162 if (e1000_has_rxbufs(s, 1)) {
1163 qemu_flush_queued_packets(qemu_get_queue(s->nic));
1167 static void
1168 set_16bit(E1000State *s, int index, uint32_t val)
1170 s->mac_reg[index] = val & 0xffff;
1173 static void
1174 set_dlen(E1000State *s, int index, uint32_t val)
1176 s->mac_reg[index] = val & 0xfff80;
1179 static void
1180 set_tctl(E1000State *s, int index, uint32_t val)
1182 s->mac_reg[index] = val;
1183 s->mac_reg[TDT] &= 0xffff;
1184 start_xmit(s);
1187 static void
1188 set_icr(E1000State *s, int index, uint32_t val)
1190 DBGOUT(INTERRUPT, "set_icr %x\n", val);
1191 set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1194 static void
1195 set_imc(E1000State *s, int index, uint32_t val)
1197 s->mac_reg[IMS] &= ~val;
1198 set_ics(s, 0, 0);
1201 static void
1202 set_ims(E1000State *s, int index, uint32_t val)
1204 s->mac_reg[IMS] |= val;
1205 set_ics(s, 0, 0);
1208 #define getreg(x) [x] = mac_readreg
1209 static uint32_t (*macreg_readops[])(E1000State *, int) = {
1210 getreg(PBA), getreg(RCTL), getreg(TDH), getreg(TXDCTL),
1211 getreg(WUFC), getreg(TDT), getreg(CTRL), getreg(LEDCTL),
1212 getreg(MANC), getreg(MDIC), getreg(SWSM), getreg(STATUS),
1213 getreg(TORL), getreg(TOTL), getreg(IMS), getreg(TCTL),
1214 getreg(RDH), getreg(RDT), getreg(VET), getreg(ICS),
1215 getreg(TDBAL), getreg(TDBAH), getreg(RDBAH), getreg(RDBAL),
1216 getreg(TDLEN), getreg(RDLEN), getreg(RDTR), getreg(RADV),
1217 getreg(TADV), getreg(ITR),
1219 [TOTH] = mac_read_clr8, [TORH] = mac_read_clr8, [GPRC] = mac_read_clr4,
1220 [GPTC] = mac_read_clr4, [TPR] = mac_read_clr4, [TPT] = mac_read_clr4,
1221 [ICR] = mac_icr_read, [EECD] = get_eecd, [EERD] = flash_eerd_read,
1222 [CRCERRS ... MPC] = &mac_readreg,
1223 [RA ... RA+31] = &mac_readreg,
1224 [MTA ... MTA+127] = &mac_readreg,
1225 [VFTA ... VFTA+127] = &mac_readreg,
1227 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1229 #define putreg(x) [x] = mac_writereg
1230 static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
1231 putreg(PBA), putreg(EERD), putreg(SWSM), putreg(WUFC),
1232 putreg(TDBAL), putreg(TDBAH), putreg(TXDCTL), putreg(RDBAH),
1233 putreg(RDBAL), putreg(LEDCTL), putreg(VET),
1234 [TDLEN] = set_dlen, [RDLEN] = set_dlen, [TCTL] = set_tctl,
1235 [TDT] = set_tctl, [MDIC] = set_mdic, [ICS] = set_ics,
1236 [TDH] = set_16bit, [RDH] = set_16bit, [RDT] = set_rdt,
1237 [IMC] = set_imc, [IMS] = set_ims, [ICR] = set_icr,
1238 [EECD] = set_eecd, [RCTL] = set_rx_control, [CTRL] = set_ctrl,
1239 [RDTR] = set_16bit, [RADV] = set_16bit, [TADV] = set_16bit,
1240 [ITR] = set_16bit,
1241 [RA ... RA+31] = &mac_writereg,
1242 [MTA ... MTA+127] = &mac_writereg,
1243 [VFTA ... VFTA+127] = &mac_writereg,
1246 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1248 static void
1249 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1250 unsigned size)
1252 E1000State *s = opaque;
1253 unsigned int index = (addr & 0x1ffff) >> 2;
1255 if (index < NWRITEOPS && macreg_writeops[index]) {
1256 macreg_writeops[index](s, index, val);
1257 } else if (index < NREADOPS && macreg_readops[index]) {
1258 DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n", index<<2, val);
1259 } else {
1260 DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1261 index<<2, val);
1265 static uint64_t
1266 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1268 E1000State *s = opaque;
1269 unsigned int index = (addr & 0x1ffff) >> 2;
1271 if (index < NREADOPS && macreg_readops[index])
1273 return macreg_readops[index](s, index);
1275 DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1276 return 0;
1279 static const MemoryRegionOps e1000_mmio_ops = {
1280 .read = e1000_mmio_read,
1281 .write = e1000_mmio_write,
1282 .endianness = DEVICE_LITTLE_ENDIAN,
1283 .impl = {
1284 .min_access_size = 4,
1285 .max_access_size = 4,
1289 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1290 unsigned size)
1292 E1000State *s = opaque;
1294 (void)s;
1295 return 0;
1298 static void e1000_io_write(void *opaque, hwaddr addr,
1299 uint64_t val, unsigned size)
1301 E1000State *s = opaque;
1303 (void)s;
1306 static const MemoryRegionOps e1000_io_ops = {
1307 .read = e1000_io_read,
1308 .write = e1000_io_write,
1309 .endianness = DEVICE_LITTLE_ENDIAN,
1312 static bool is_version_1(void *opaque, int version_id)
1314 return version_id == 1;
1317 static void e1000_pre_save(void *opaque)
1319 E1000State *s = opaque;
1320 NetClientState *nc = qemu_get_queue(s->nic);
1322 /* If the mitigation timer is active, emulate a timeout now. */
1323 if (s->mit_timer_on) {
1324 e1000_mit_timer(s);
1328 * If link is down and auto-negotiation is supported and ongoing,
1329 * complete auto-negotiation immediately. This allows us to look
1330 * at MII_SR_AUTONEG_COMPLETE to infer link status on load.
1332 if (nc->link_down && have_autoneg(s)) {
1333 s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1337 static int e1000_post_load(void *opaque, int version_id)
1339 E1000State *s = opaque;
1340 NetClientState *nc = qemu_get_queue(s->nic);
1342 if (!(s->compat_flags & E1000_FLAG_MIT)) {
1343 s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1344 s->mac_reg[TADV] = 0;
1345 s->mit_irq_level = false;
1347 s->mit_ide = 0;
1348 s->mit_timer_on = false;
1350 /* nc.link_down can't be migrated, so infer link_down according
1351 * to link status bit in mac_reg[STATUS].
1352 * Alternatively, restart link negotiation if it was in progress. */
1353 nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1355 if (have_autoneg(s) &&
1356 !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1357 nc->link_down = false;
1358 timer_mod(s->autoneg_timer,
1359 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1362 return 0;
1365 static bool e1000_mit_state_needed(void *opaque)
1367 E1000State *s = opaque;
1369 return s->compat_flags & E1000_FLAG_MIT;
1372 static const VMStateDescription vmstate_e1000_mit_state = {
1373 .name = "e1000/mit_state",
1374 .version_id = 1,
1375 .minimum_version_id = 1,
1376 .needed = e1000_mit_state_needed,
1377 .fields = (VMStateField[]) {
1378 VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1379 VMSTATE_UINT32(mac_reg[RADV], E1000State),
1380 VMSTATE_UINT32(mac_reg[TADV], E1000State),
1381 VMSTATE_UINT32(mac_reg[ITR], E1000State),
1382 VMSTATE_BOOL(mit_irq_level, E1000State),
1383 VMSTATE_END_OF_LIST()
1387 static const VMStateDescription vmstate_e1000 = {
1388 .name = "e1000",
1389 .version_id = 2,
1390 .minimum_version_id = 1,
1391 .pre_save = e1000_pre_save,
1392 .post_load = e1000_post_load,
1393 .fields = (VMStateField[]) {
1394 VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1395 VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1396 VMSTATE_UNUSED(4), /* Was mmio_base. */
1397 VMSTATE_UINT32(rxbuf_size, E1000State),
1398 VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1399 VMSTATE_UINT32(eecd_state.val_in, E1000State),
1400 VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1401 VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1402 VMSTATE_UINT16(eecd_state.reading, E1000State),
1403 VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1404 VMSTATE_UINT8(tx.ipcss, E1000State),
1405 VMSTATE_UINT8(tx.ipcso, E1000State),
1406 VMSTATE_UINT16(tx.ipcse, E1000State),
1407 VMSTATE_UINT8(tx.tucss, E1000State),
1408 VMSTATE_UINT8(tx.tucso, E1000State),
1409 VMSTATE_UINT16(tx.tucse, E1000State),
1410 VMSTATE_UINT32(tx.paylen, E1000State),
1411 VMSTATE_UINT8(tx.hdr_len, E1000State),
1412 VMSTATE_UINT16(tx.mss, E1000State),
1413 VMSTATE_UINT16(tx.size, E1000State),
1414 VMSTATE_UINT16(tx.tso_frames, E1000State),
1415 VMSTATE_UINT8(tx.sum_needed, E1000State),
1416 VMSTATE_INT8(tx.ip, E1000State),
1417 VMSTATE_INT8(tx.tcp, E1000State),
1418 VMSTATE_BUFFER(tx.header, E1000State),
1419 VMSTATE_BUFFER(tx.data, E1000State),
1420 VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1421 VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1422 VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1423 VMSTATE_UINT32(mac_reg[EECD], E1000State),
1424 VMSTATE_UINT32(mac_reg[EERD], E1000State),
1425 VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1426 VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1427 VMSTATE_UINT32(mac_reg[ICR], E1000State),
1428 VMSTATE_UINT32(mac_reg[ICS], E1000State),
1429 VMSTATE_UINT32(mac_reg[IMC], E1000State),
1430 VMSTATE_UINT32(mac_reg[IMS], E1000State),
1431 VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1432 VMSTATE_UINT32(mac_reg[MANC], E1000State),
1433 VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1434 VMSTATE_UINT32(mac_reg[MPC], E1000State),
1435 VMSTATE_UINT32(mac_reg[PBA], E1000State),
1436 VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1437 VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1438 VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1439 VMSTATE_UINT32(mac_reg[RDH], E1000State),
1440 VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1441 VMSTATE_UINT32(mac_reg[RDT], E1000State),
1442 VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1443 VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1444 VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1445 VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1446 VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1447 VMSTATE_UINT32(mac_reg[TDH], E1000State),
1448 VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1449 VMSTATE_UINT32(mac_reg[TDT], E1000State),
1450 VMSTATE_UINT32(mac_reg[TORH], E1000State),
1451 VMSTATE_UINT32(mac_reg[TORL], E1000State),
1452 VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1453 VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1454 VMSTATE_UINT32(mac_reg[TPR], E1000State),
1455 VMSTATE_UINT32(mac_reg[TPT], E1000State),
1456 VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1457 VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1458 VMSTATE_UINT32(mac_reg[VET], E1000State),
1459 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1460 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1461 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1462 VMSTATE_END_OF_LIST()
1464 .subsections = (const VMStateDescription*[]) {
1465 &vmstate_e1000_mit_state,
1466 NULL
1471 * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1472 * Note: A valid DevId will be inserted during pci_e1000_init().
1474 static const uint16_t e1000_eeprom_template[64] = {
1475 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0x0000, 0x0000, 0x0000,
1476 0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1477 0x0008, 0x2000, 0x7e14, 0x0048, 0x1000, 0x00d8, 0x0000, 0x2700,
1478 0x6cc9, 0x3150, 0x0722, 0x040b, 0x0984, 0x0000, 0xc000, 0x0706,
1479 0x1008, 0x0000, 0x0f04, 0x7fff, 0x4d01, 0xffff, 0xffff, 0xffff,
1480 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
1481 0x0100, 0x4000, 0x121c, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
1482 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000,
1485 /* PCI interface */
1487 static void
1488 e1000_mmio_setup(E1000State *d)
1490 int i;
1491 const uint32_t excluded_regs[] = {
1492 E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1493 E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1496 memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1497 "e1000-mmio", PNPMMIO_SIZE);
1498 memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1499 for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1500 memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1501 excluded_regs[i+1] - excluded_regs[i] - 4);
1502 memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1505 static void
1506 pci_e1000_uninit(PCIDevice *dev)
1508 E1000State *d = E1000(dev);
1510 timer_del(d->autoneg_timer);
1511 timer_free(d->autoneg_timer);
1512 timer_del(d->mit_timer);
1513 timer_free(d->mit_timer);
1514 qemu_del_nic(d->nic);
1517 static NetClientInfo net_e1000_info = {
1518 .type = NET_CLIENT_OPTIONS_KIND_NIC,
1519 .size = sizeof(NICState),
1520 .can_receive = e1000_can_receive,
1521 .receive = e1000_receive,
1522 .receive_iov = e1000_receive_iov,
1523 .link_status_changed = e1000_set_link_status,
1526 static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1527 uint32_t val, int len)
1529 E1000State *s = E1000(pci_dev);
1531 pci_default_write_config(pci_dev, address, val, len);
1533 if (range_covers_byte(address, len, PCI_COMMAND) &&
1534 (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1535 qemu_flush_queued_packets(qemu_get_queue(s->nic));
1540 static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1542 DeviceState *dev = DEVICE(pci_dev);
1543 E1000State *d = E1000(pci_dev);
1544 PCIDeviceClass *pdc = PCI_DEVICE_GET_CLASS(pci_dev);
1545 uint8_t *pci_conf;
1546 uint16_t checksum = 0;
1547 int i;
1548 uint8_t *macaddr;
1550 pci_dev->config_write = e1000_write_config;
1552 pci_conf = pci_dev->config;
1554 /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1555 pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1557 pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1559 e1000_mmio_setup(d);
1561 pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1563 pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1565 memmove(d->eeprom_data, e1000_eeprom_template,
1566 sizeof e1000_eeprom_template);
1567 qemu_macaddr_default_if_unset(&d->conf.macaddr);
1568 macaddr = d->conf.macaddr.a;
1569 for (i = 0; i < 3; i++)
1570 d->eeprom_data[i] = (macaddr[2*i+1]<<8) | macaddr[2*i];
1571 d->eeprom_data[11] = d->eeprom_data[13] = pdc->device_id;
1572 for (i = 0; i < EEPROM_CHECKSUM_REG; i++)
1573 checksum += d->eeprom_data[i];
1574 checksum = (uint16_t) EEPROM_SUM - checksum;
1575 d->eeprom_data[EEPROM_CHECKSUM_REG] = checksum;
1577 d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1578 object_get_typename(OBJECT(d)), dev->id, d);
1580 qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1582 d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1583 d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1586 static void qdev_e1000_reset(DeviceState *dev)
1588 E1000State *d = E1000(dev);
1589 e1000_reset(d);
1592 static Property e1000_properties[] = {
1593 DEFINE_NIC_PROPERTIES(E1000State, conf),
1594 DEFINE_PROP_BIT("autonegotiation", E1000State,
1595 compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1596 DEFINE_PROP_BIT("mitigation", E1000State,
1597 compat_flags, E1000_FLAG_MIT_BIT, true),
1598 DEFINE_PROP_END_OF_LIST(),
1601 typedef struct E1000Info {
1602 const char *name;
1603 uint16_t device_id;
1604 uint8_t revision;
1605 uint16_t phy_id2;
1606 } E1000Info;
1608 static void e1000_class_init(ObjectClass *klass, void *data)
1610 DeviceClass *dc = DEVICE_CLASS(klass);
1611 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1612 E1000BaseClass *e = E1000_DEVICE_CLASS(klass);
1613 const E1000Info *info = data;
1615 k->realize = pci_e1000_realize;
1616 k->exit = pci_e1000_uninit;
1617 k->romfile = "efi-e1000.rom";
1618 k->vendor_id = PCI_VENDOR_ID_INTEL;
1619 k->device_id = info->device_id;
1620 k->revision = info->revision;
1621 e->phy_id2 = info->phy_id2;
1622 k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1623 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1624 dc->desc = "Intel Gigabit Ethernet";
1625 dc->reset = qdev_e1000_reset;
1626 dc->vmsd = &vmstate_e1000;
1627 dc->props = e1000_properties;
1630 static void e1000_instance_init(Object *obj)
1632 E1000State *n = E1000(obj);
1633 device_add_bootindex_property(obj, &n->conf.bootindex,
1634 "bootindex", "/ethernet-phy@0",
1635 DEVICE(n), NULL);
1638 static const TypeInfo e1000_base_info = {
1639 .name = TYPE_E1000_BASE,
1640 .parent = TYPE_PCI_DEVICE,
1641 .instance_size = sizeof(E1000State),
1642 .instance_init = e1000_instance_init,
1643 .class_size = sizeof(E1000BaseClass),
1644 .abstract = true,
1647 static const E1000Info e1000_devices[] = {
1649 .name = "e1000-82540em",
1650 .device_id = E1000_DEV_ID_82540EM,
1651 .revision = 0x03,
1652 .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT,
1655 .name = "e1000-82544gc",
1656 .device_id = E1000_DEV_ID_82544GC_COPPER,
1657 .revision = 0x03,
1658 .phy_id2 = E1000_PHY_ID2_82544x,
1661 .name = "e1000-82545em",
1662 .device_id = E1000_DEV_ID_82545EM_COPPER,
1663 .revision = 0x03,
1664 .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT,
1668 static const TypeInfo e1000_default_info = {
1669 .name = "e1000",
1670 .parent = "e1000-82540em",
1673 static void e1000_register_types(void)
1675 int i;
1677 type_register_static(&e1000_base_info);
1678 for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1679 const E1000Info *info = &e1000_devices[i];
1680 TypeInfo type_info = {};
1682 type_info.name = info->name;
1683 type_info.parent = TYPE_E1000_BASE;
1684 type_info.class_data = (void *)info;
1685 type_info.class_init = e1000_class_init;
1686 type_info.instance_init = e1000_instance_init;
1688 type_register(&type_info);
1690 type_register_static(&e1000_default_info);
1693 type_init(e1000_register_types)