ide: make all commands go through cmd_done
[qemu/ar7.git] / hw / net / e1000.c
blob0fc29a0ae38b2eb7c542032136991337d1bee62c
1 /*
2 * QEMU e1000 emulation
4 * Software developer's manual:
5 * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
7 * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8 * Copyright (c) 2008 Qumranet
9 * Based on work done by:
10 * Copyright (c) 2007 Dan Aloni
11 * Copyright (c) 2004 Antony T Curtis
13 * This library is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU Lesser General Public
15 * License as published by the Free Software Foundation; either
16 * version 2 of the License, or (at your option) any later version.
18 * This library is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 * Lesser General Public License for more details.
23 * You should have received a copy of the GNU Lesser General Public
24 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
28 #include "hw/hw.h"
29 #include "hw/pci/pci.h"
30 #include "net/net.h"
31 #include "net/checksum.h"
32 #include "hw/loader.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/dma.h"
35 #include "qemu/iov.h"
37 #include "e1000_regs.h"
39 #define E1000_DEBUG
41 #ifdef E1000_DEBUG
42 enum {
43 DEBUG_GENERAL, DEBUG_IO, DEBUG_MMIO, DEBUG_INTERRUPT,
44 DEBUG_RX, DEBUG_TX, DEBUG_MDIC, DEBUG_EEPROM,
45 DEBUG_UNKNOWN, DEBUG_TXSUM, DEBUG_TXERR, DEBUG_RXERR,
46 DEBUG_RXFILTER, DEBUG_PHY, DEBUG_NOTYET,
48 #define DBGBIT(x) (1<<DEBUG_##x)
49 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
51 #define DBGOUT(what, fmt, ...) do { \
52 if (debugflags & DBGBIT(what)) \
53 fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
54 } while (0)
55 #else
56 #define DBGOUT(what, fmt, ...) do {} while (0)
57 #endif
59 #define IOPORT_SIZE 0x40
60 #define PNPMMIO_SIZE 0x20000
61 #define MIN_BUF_SIZE 60 /* Min. octets in an ethernet frame sans FCS */
63 /* this is the size past which hardware will drop packets when setting LPE=0 */
64 #define MAXIMUM_ETHERNET_VLAN_SIZE 1522
65 /* this is the size past which hardware will drop packets when setting LPE=1 */
66 #define MAXIMUM_ETHERNET_LPE_SIZE 16384
68 #define MAXIMUM_ETHERNET_HDR_LEN (14+4)
71 * HW models:
72 * E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
73 * E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
74 * E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
75 * Others never tested
78 typedef struct E1000State_st {
79 /*< private >*/
80 PCIDevice parent_obj;
81 /*< public >*/
83 NICState *nic;
84 NICConf conf;
85 MemoryRegion mmio;
86 MemoryRegion io;
88 uint32_t mac_reg[0x8000];
89 uint16_t phy_reg[0x20];
90 uint16_t eeprom_data[64];
92 uint32_t rxbuf_size;
93 uint32_t rxbuf_min_shift;
94 struct e1000_tx {
95 unsigned char header[256];
96 unsigned char vlan_header[4];
97 /* Fields vlan and data must not be reordered or separated. */
98 unsigned char vlan[4];
99 unsigned char data[0x10000];
100 uint16_t size;
101 unsigned char sum_needed;
102 unsigned char vlan_needed;
103 uint8_t ipcss;
104 uint8_t ipcso;
105 uint16_t ipcse;
106 uint8_t tucss;
107 uint8_t tucso;
108 uint16_t tucse;
109 uint8_t hdr_len;
110 uint16_t mss;
111 uint32_t paylen;
112 uint16_t tso_frames;
113 char tse;
114 int8_t ip;
115 int8_t tcp;
116 char cptse; // current packet tse bit
117 } tx;
119 struct {
120 uint32_t val_in; // shifted in from guest driver
121 uint16_t bitnum_in;
122 uint16_t bitnum_out;
123 uint16_t reading;
124 uint32_t old_eecd;
125 } eecd_state;
127 QEMUTimer *autoneg_timer;
129 QEMUTimer *mit_timer; /* Mitigation timer. */
130 bool mit_timer_on; /* Mitigation timer is running. */
131 bool mit_irq_level; /* Tracks interrupt pin level. */
132 uint32_t mit_ide; /* Tracks E1000_TXD_CMD_IDE bit. */
134 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
135 #define E1000_FLAG_AUTONEG_BIT 0
136 #define E1000_FLAG_MIT_BIT 1
137 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
138 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
139 uint32_t compat_flags;
140 } E1000State;
142 typedef struct E1000BaseClass {
143 PCIDeviceClass parent_class;
144 uint16_t phy_id2;
145 } E1000BaseClass;
147 #define TYPE_E1000_BASE "e1000-base"
149 #define E1000(obj) \
150 OBJECT_CHECK(E1000State, (obj), TYPE_E1000_BASE)
152 #define E1000_DEVICE_CLASS(klass) \
153 OBJECT_CLASS_CHECK(E1000BaseClass, (klass), TYPE_E1000_BASE)
154 #define E1000_DEVICE_GET_CLASS(obj) \
155 OBJECT_GET_CLASS(E1000BaseClass, (obj), TYPE_E1000_BASE)
157 #define defreg(x) x = (E1000_##x>>2)
158 enum {
159 defreg(CTRL), defreg(EECD), defreg(EERD), defreg(GPRC),
160 defreg(GPTC), defreg(ICR), defreg(ICS), defreg(IMC),
161 defreg(IMS), defreg(LEDCTL), defreg(MANC), defreg(MDIC),
162 defreg(MPC), defreg(PBA), defreg(RCTL), defreg(RDBAH),
163 defreg(RDBAL), defreg(RDH), defreg(RDLEN), defreg(RDT),
164 defreg(STATUS), defreg(SWSM), defreg(TCTL), defreg(TDBAH),
165 defreg(TDBAL), defreg(TDH), defreg(TDLEN), defreg(TDT),
166 defreg(TORH), defreg(TORL), defreg(TOTH), defreg(TOTL),
167 defreg(TPR), defreg(TPT), defreg(TXDCTL), defreg(WUFC),
168 defreg(RA), defreg(MTA), defreg(CRCERRS),defreg(VFTA),
169 defreg(VET), defreg(RDTR), defreg(RADV), defreg(TADV),
170 defreg(ITR),
173 static void
174 e1000_link_down(E1000State *s)
176 s->mac_reg[STATUS] &= ~E1000_STATUS_LU;
177 s->phy_reg[PHY_STATUS] &= ~MII_SR_LINK_STATUS;
178 s->phy_reg[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE;
179 s->phy_reg[PHY_LP_ABILITY] &= ~MII_LPAR_LPACK;
182 static void
183 e1000_link_up(E1000State *s)
185 s->mac_reg[STATUS] |= E1000_STATUS_LU;
186 s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS;
189 static void
190 set_phy_ctrl(E1000State *s, int index, uint16_t val)
193 * QEMU 1.3 does not support link auto-negotiation emulation, so if we
194 * migrate during auto negotiation, after migration the link will be
195 * down.
197 if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
198 return;
200 if ((val & MII_CR_AUTO_NEG_EN) && (val & MII_CR_RESTART_AUTO_NEG)) {
201 e1000_link_down(s);
202 DBGOUT(PHY, "Start link auto negotiation\n");
203 timer_mod(s->autoneg_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
207 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
208 [PHY_CTRL] = set_phy_ctrl,
211 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
213 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
214 static const char phy_regcap[0x20] = {
215 [PHY_STATUS] = PHY_R, [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
216 [PHY_ID1] = PHY_R, [M88E1000_PHY_SPEC_CTRL] = PHY_RW,
217 [PHY_CTRL] = PHY_RW, [PHY_1000T_CTRL] = PHY_RW,
218 [PHY_LP_ABILITY] = PHY_R, [PHY_1000T_STATUS] = PHY_R,
219 [PHY_AUTONEG_ADV] = PHY_RW, [M88E1000_RX_ERR_CNTR] = PHY_R,
220 [PHY_ID2] = PHY_R, [M88E1000_PHY_SPEC_STATUS] = PHY_R,
221 [PHY_AUTONEG_EXP] = PHY_R,
224 /* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
225 static const uint16_t phy_reg_init[] = {
226 [PHY_CTRL] = 0x1140,
227 [PHY_STATUS] = 0x794d, /* link initially up with not completed autoneg */
228 [PHY_ID1] = 0x141, /* [PHY_ID2] configured per DevId, from e1000_reset() */
229 [PHY_1000T_CTRL] = 0x0e00, [M88E1000_PHY_SPEC_CTRL] = 0x360,
230 [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60, [PHY_AUTONEG_ADV] = 0xde1,
231 [PHY_LP_ABILITY] = 0x1e0, [PHY_1000T_STATUS] = 0x3c00,
232 [M88E1000_PHY_SPEC_STATUS] = 0xac00,
235 static const uint32_t mac_reg_init[] = {
236 [PBA] = 0x00100030,
237 [LEDCTL] = 0x602,
238 [CTRL] = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
239 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
240 [STATUS] = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
241 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
242 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
243 E1000_STATUS_LU,
244 [MANC] = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
245 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
246 E1000_MANC_RMCP_EN,
249 /* Helper function, *curr == 0 means the value is not set */
250 static inline void
251 mit_update_delay(uint32_t *curr, uint32_t value)
253 if (value && (*curr == 0 || value < *curr)) {
254 *curr = value;
258 static void
259 set_interrupt_cause(E1000State *s, int index, uint32_t val)
261 PCIDevice *d = PCI_DEVICE(s);
262 uint32_t pending_ints;
263 uint32_t mit_delay;
265 s->mac_reg[ICR] = val;
268 * Make sure ICR and ICS registers have the same value.
269 * The spec says that the ICS register is write-only. However in practice,
270 * on real hardware ICS is readable, and for reads it has the same value as
271 * ICR (except that ICS does not have the clear on read behaviour of ICR).
273 * The VxWorks PRO/1000 driver uses this behaviour.
275 s->mac_reg[ICS] = val;
277 pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
278 if (!s->mit_irq_level && pending_ints) {
280 * Here we detect a potential raising edge. We postpone raising the
281 * interrupt line if we are inside the mitigation delay window
282 * (s->mit_timer_on == 1).
283 * We provide a partial implementation of interrupt mitigation,
284 * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
285 * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
286 * RADV; relative timers based on TIDV and RDTR are not implemented.
288 if (s->mit_timer_on) {
289 return;
291 if (s->compat_flags & E1000_FLAG_MIT) {
292 /* Compute the next mitigation delay according to pending
293 * interrupts and the current values of RADV (provided
294 * RDTR!=0), TADV and ITR.
295 * Then rearm the timer.
297 mit_delay = 0;
298 if (s->mit_ide &&
299 (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
300 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
302 if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
303 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
305 mit_update_delay(&mit_delay, s->mac_reg[ITR]);
307 if (mit_delay) {
308 s->mit_timer_on = 1;
309 timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
310 mit_delay * 256);
312 s->mit_ide = 0;
316 s->mit_irq_level = (pending_ints != 0);
317 pci_set_irq(d, s->mit_irq_level);
320 static void
321 e1000_mit_timer(void *opaque)
323 E1000State *s = opaque;
325 s->mit_timer_on = 0;
326 /* Call set_interrupt_cause to update the irq level (if necessary). */
327 set_interrupt_cause(s, 0, s->mac_reg[ICR]);
330 static void
331 set_ics(E1000State *s, int index, uint32_t val)
333 DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
334 s->mac_reg[IMS]);
335 set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
338 static void
339 e1000_autoneg_timer(void *opaque)
341 E1000State *s = opaque;
342 if (!qemu_get_queue(s->nic)->link_down) {
343 e1000_link_up(s);
344 s->phy_reg[PHY_LP_ABILITY] |= MII_LPAR_LPACK;
345 s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
346 DBGOUT(PHY, "Auto negotiation is completed\n");
347 set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
351 static int
352 rxbufsize(uint32_t v)
354 v &= E1000_RCTL_BSEX | E1000_RCTL_SZ_16384 | E1000_RCTL_SZ_8192 |
355 E1000_RCTL_SZ_4096 | E1000_RCTL_SZ_2048 | E1000_RCTL_SZ_1024 |
356 E1000_RCTL_SZ_512 | E1000_RCTL_SZ_256;
357 switch (v) {
358 case E1000_RCTL_BSEX | E1000_RCTL_SZ_16384:
359 return 16384;
360 case E1000_RCTL_BSEX | E1000_RCTL_SZ_8192:
361 return 8192;
362 case E1000_RCTL_BSEX | E1000_RCTL_SZ_4096:
363 return 4096;
364 case E1000_RCTL_SZ_1024:
365 return 1024;
366 case E1000_RCTL_SZ_512:
367 return 512;
368 case E1000_RCTL_SZ_256:
369 return 256;
371 return 2048;
374 static void e1000_reset(void *opaque)
376 E1000State *d = opaque;
377 E1000BaseClass *edc = E1000_DEVICE_GET_CLASS(d);
378 uint8_t *macaddr = d->conf.macaddr.a;
379 int i;
381 timer_del(d->autoneg_timer);
382 timer_del(d->mit_timer);
383 d->mit_timer_on = 0;
384 d->mit_irq_level = 0;
385 d->mit_ide = 0;
386 memset(d->phy_reg, 0, sizeof d->phy_reg);
387 memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
388 d->phy_reg[PHY_ID2] = edc->phy_id2;
389 memset(d->mac_reg, 0, sizeof d->mac_reg);
390 memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
391 d->rxbuf_min_shift = 1;
392 memset(&d->tx, 0, sizeof d->tx);
394 if (qemu_get_queue(d->nic)->link_down) {
395 e1000_link_down(d);
398 /* Some guests expect pre-initialized RAH/RAL (AddrValid flag + MACaddr) */
399 d->mac_reg[RA] = 0;
400 d->mac_reg[RA + 1] = E1000_RAH_AV;
401 for (i = 0; i < 4; i++) {
402 d->mac_reg[RA] |= macaddr[i] << (8 * i);
403 d->mac_reg[RA + 1] |= (i < 2) ? macaddr[i + 4] << (8 * i) : 0;
405 qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
408 static void
409 set_ctrl(E1000State *s, int index, uint32_t val)
411 /* RST is self clearing */
412 s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
415 static void
416 set_rx_control(E1000State *s, int index, uint32_t val)
418 s->mac_reg[RCTL] = val;
419 s->rxbuf_size = rxbufsize(val);
420 s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
421 DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
422 s->mac_reg[RCTL]);
423 qemu_flush_queued_packets(qemu_get_queue(s->nic));
426 static void
427 set_mdic(E1000State *s, int index, uint32_t val)
429 uint32_t data = val & E1000_MDIC_DATA_MASK;
430 uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
432 if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
433 val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
434 else if (val & E1000_MDIC_OP_READ) {
435 DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
436 if (!(phy_regcap[addr] & PHY_R)) {
437 DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
438 val |= E1000_MDIC_ERROR;
439 } else
440 val = (val ^ data) | s->phy_reg[addr];
441 } else if (val & E1000_MDIC_OP_WRITE) {
442 DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
443 if (!(phy_regcap[addr] & PHY_W)) {
444 DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
445 val |= E1000_MDIC_ERROR;
446 } else {
447 if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
448 phyreg_writeops[addr](s, index, data);
450 s->phy_reg[addr] = data;
453 s->mac_reg[MDIC] = val | E1000_MDIC_READY;
455 if (val & E1000_MDIC_INT_EN) {
456 set_ics(s, 0, E1000_ICR_MDAC);
460 static uint32_t
461 get_eecd(E1000State *s, int index)
463 uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
465 DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
466 s->eecd_state.bitnum_out, s->eecd_state.reading);
467 if (!s->eecd_state.reading ||
468 ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
469 ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
470 ret |= E1000_EECD_DO;
471 return ret;
474 static void
475 set_eecd(E1000State *s, int index, uint32_t val)
477 uint32_t oldval = s->eecd_state.old_eecd;
479 s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
480 E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
481 if (!(E1000_EECD_CS & val)) // CS inactive; nothing to do
482 return;
483 if (E1000_EECD_CS & (val ^ oldval)) { // CS rise edge; reset state
484 s->eecd_state.val_in = 0;
485 s->eecd_state.bitnum_in = 0;
486 s->eecd_state.bitnum_out = 0;
487 s->eecd_state.reading = 0;
489 if (!(E1000_EECD_SK & (val ^ oldval))) // no clock edge
490 return;
491 if (!(E1000_EECD_SK & val)) { // falling edge
492 s->eecd_state.bitnum_out++;
493 return;
495 s->eecd_state.val_in <<= 1;
496 if (val & E1000_EECD_DI)
497 s->eecd_state.val_in |= 1;
498 if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
499 s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
500 s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
501 EEPROM_READ_OPCODE_MICROWIRE);
503 DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
504 s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
505 s->eecd_state.reading);
508 static uint32_t
509 flash_eerd_read(E1000State *s, int x)
511 unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
513 if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
514 return (s->mac_reg[EERD]);
516 if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
517 return (E1000_EEPROM_RW_REG_DONE | r);
519 return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
520 E1000_EEPROM_RW_REG_DONE | r);
523 static void
524 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
526 uint32_t sum;
528 if (cse && cse < n)
529 n = cse + 1;
530 if (sloc < n-1) {
531 sum = net_checksum_add(n-css, data+css);
532 stw_be_p(data + sloc, net_checksum_finish(sum));
536 static inline int
537 vlan_enabled(E1000State *s)
539 return ((s->mac_reg[CTRL] & E1000_CTRL_VME) != 0);
542 static inline int
543 vlan_rx_filter_enabled(E1000State *s)
545 return ((s->mac_reg[RCTL] & E1000_RCTL_VFE) != 0);
548 static inline int
549 is_vlan_packet(E1000State *s, const uint8_t *buf)
551 return (be16_to_cpup((uint16_t *)(buf + 12)) ==
552 le16_to_cpup((uint16_t *)(s->mac_reg + VET)));
555 static inline int
556 is_vlan_txd(uint32_t txd_lower)
558 return ((txd_lower & E1000_TXD_CMD_VLE) != 0);
561 /* FCS aka Ethernet CRC-32. We don't get it from backends and can't
562 * fill it in, just pad descriptor length by 4 bytes unless guest
563 * told us to strip it off the packet. */
564 static inline int
565 fcs_len(E1000State *s)
567 return (s->mac_reg[RCTL] & E1000_RCTL_SECRC) ? 0 : 4;
570 static void
571 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
573 NetClientState *nc = qemu_get_queue(s->nic);
574 if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
575 nc->info->receive(nc, buf, size);
576 } else {
577 qemu_send_packet(nc, buf, size);
581 static void
582 xmit_seg(E1000State *s)
584 uint16_t len, *sp;
585 unsigned int frames = s->tx.tso_frames, css, sofar, n;
586 struct e1000_tx *tp = &s->tx;
588 if (tp->tse && tp->cptse) {
589 css = tp->ipcss;
590 DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
591 frames, tp->size, css);
592 if (tp->ip) { // IPv4
593 stw_be_p(tp->data+css+2, tp->size - css);
594 stw_be_p(tp->data+css+4,
595 be16_to_cpup((uint16_t *)(tp->data+css+4))+frames);
596 } else // IPv6
597 stw_be_p(tp->data+css+4, tp->size - css);
598 css = tp->tucss;
599 len = tp->size - css;
600 DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->tcp, css, len);
601 if (tp->tcp) {
602 sofar = frames * tp->mss;
603 stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
604 if (tp->paylen - sofar > tp->mss)
605 tp->data[css + 13] &= ~9; // PSH, FIN
606 } else // UDP
607 stw_be_p(tp->data+css+4, len);
608 if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
609 unsigned int phsum;
610 // add pseudo-header length before checksum calculation
611 sp = (uint16_t *)(tp->data + tp->tucso);
612 phsum = be16_to_cpup(sp) + len;
613 phsum = (phsum >> 16) + (phsum & 0xffff);
614 stw_be_p(sp, phsum);
616 tp->tso_frames++;
619 if (tp->sum_needed & E1000_TXD_POPTS_TXSM)
620 putsum(tp->data, tp->size, tp->tucso, tp->tucss, tp->tucse);
621 if (tp->sum_needed & E1000_TXD_POPTS_IXSM)
622 putsum(tp->data, tp->size, tp->ipcso, tp->ipcss, tp->ipcse);
623 if (tp->vlan_needed) {
624 memmove(tp->vlan, tp->data, 4);
625 memmove(tp->data, tp->data + 4, 8);
626 memcpy(tp->data + 8, tp->vlan_header, 4);
627 e1000_send_packet(s, tp->vlan, tp->size + 4);
628 } else
629 e1000_send_packet(s, tp->data, tp->size);
630 s->mac_reg[TPT]++;
631 s->mac_reg[GPTC]++;
632 n = s->mac_reg[TOTL];
633 if ((s->mac_reg[TOTL] += s->tx.size) < n)
634 s->mac_reg[TOTH]++;
637 static void
638 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
640 PCIDevice *d = PCI_DEVICE(s);
641 uint32_t txd_lower = le32_to_cpu(dp->lower.data);
642 uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
643 unsigned int split_size = txd_lower & 0xffff, bytes, sz, op;
644 unsigned int msh = 0xfffff;
645 uint64_t addr;
646 struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
647 struct e1000_tx *tp = &s->tx;
649 s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
650 if (dtype == E1000_TXD_CMD_DEXT) { // context descriptor
651 op = le32_to_cpu(xp->cmd_and_length);
652 tp->ipcss = xp->lower_setup.ip_fields.ipcss;
653 tp->ipcso = xp->lower_setup.ip_fields.ipcso;
654 tp->ipcse = le16_to_cpu(xp->lower_setup.ip_fields.ipcse);
655 tp->tucss = xp->upper_setup.tcp_fields.tucss;
656 tp->tucso = xp->upper_setup.tcp_fields.tucso;
657 tp->tucse = le16_to_cpu(xp->upper_setup.tcp_fields.tucse);
658 tp->paylen = op & 0xfffff;
659 tp->hdr_len = xp->tcp_seg_setup.fields.hdr_len;
660 tp->mss = le16_to_cpu(xp->tcp_seg_setup.fields.mss);
661 tp->ip = (op & E1000_TXD_CMD_IP) ? 1 : 0;
662 tp->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0;
663 tp->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0;
664 tp->tso_frames = 0;
665 if (tp->tucso == 0) { // this is probably wrong
666 DBGOUT(TXSUM, "TCP/UDP: cso 0!\n");
667 tp->tucso = tp->tucss + (tp->tcp ? 16 : 6);
669 return;
670 } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
671 // data descriptor
672 if (tp->size == 0) {
673 tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
675 tp->cptse = ( txd_lower & E1000_TXD_CMD_TSE ) ? 1 : 0;
676 } else {
677 // legacy descriptor
678 tp->cptse = 0;
681 if (vlan_enabled(s) && is_vlan_txd(txd_lower) &&
682 (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
683 tp->vlan_needed = 1;
684 stw_be_p(tp->vlan_header,
685 le16_to_cpup((uint16_t *)(s->mac_reg + VET)));
686 stw_be_p(tp->vlan_header + 2,
687 le16_to_cpu(dp->upper.fields.special));
690 addr = le64_to_cpu(dp->buffer_addr);
691 if (tp->tse && tp->cptse) {
692 msh = tp->hdr_len + tp->mss;
693 do {
694 bytes = split_size;
695 if (tp->size + bytes > msh)
696 bytes = msh - tp->size;
698 bytes = MIN(sizeof(tp->data) - tp->size, bytes);
699 pci_dma_read(d, addr, tp->data + tp->size, bytes);
700 sz = tp->size + bytes;
701 if (sz >= tp->hdr_len && tp->size < tp->hdr_len) {
702 memmove(tp->header, tp->data, tp->hdr_len);
704 tp->size = sz;
705 addr += bytes;
706 if (sz == msh) {
707 xmit_seg(s);
708 memmove(tp->data, tp->header, tp->hdr_len);
709 tp->size = tp->hdr_len;
711 } while (split_size -= bytes);
712 } else if (!tp->tse && tp->cptse) {
713 // context descriptor TSE is not set, while data descriptor TSE is set
714 DBGOUT(TXERR, "TCP segmentation error\n");
715 } else {
716 split_size = MIN(sizeof(tp->data) - tp->size, split_size);
717 pci_dma_read(d, addr, tp->data + tp->size, split_size);
718 tp->size += split_size;
721 if (!(txd_lower & E1000_TXD_CMD_EOP))
722 return;
723 if (!(tp->tse && tp->cptse && tp->size < tp->hdr_len)) {
724 xmit_seg(s);
726 tp->tso_frames = 0;
727 tp->sum_needed = 0;
728 tp->vlan_needed = 0;
729 tp->size = 0;
730 tp->cptse = 0;
733 static uint32_t
734 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
736 PCIDevice *d = PCI_DEVICE(s);
737 uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
739 if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
740 return 0;
741 txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
742 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
743 dp->upper.data = cpu_to_le32(txd_upper);
744 pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
745 &dp->upper, sizeof(dp->upper));
746 return E1000_ICR_TXDW;
749 static uint64_t tx_desc_base(E1000State *s)
751 uint64_t bah = s->mac_reg[TDBAH];
752 uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
754 return (bah << 32) + bal;
757 static void
758 start_xmit(E1000State *s)
760 PCIDevice *d = PCI_DEVICE(s);
761 dma_addr_t base;
762 struct e1000_tx_desc desc;
763 uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
765 if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
766 DBGOUT(TX, "tx disabled\n");
767 return;
770 while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
771 base = tx_desc_base(s) +
772 sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
773 pci_dma_read(d, base, &desc, sizeof(desc));
775 DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
776 (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
777 desc.upper.data);
779 process_tx_desc(s, &desc);
780 cause |= txdesc_writeback(s, base, &desc);
782 if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
783 s->mac_reg[TDH] = 0;
785 * the following could happen only if guest sw assigns
786 * bogus values to TDT/TDLEN.
787 * there's nothing too intelligent we could do about this.
789 if (s->mac_reg[TDH] == tdh_start) {
790 DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
791 tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
792 break;
795 set_ics(s, 0, cause);
798 static int
799 receive_filter(E1000State *s, const uint8_t *buf, int size)
801 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
802 static const int mta_shift[] = {4, 3, 2, 0};
803 uint32_t f, rctl = s->mac_reg[RCTL], ra[2], *rp;
805 if (is_vlan_packet(s, buf) && vlan_rx_filter_enabled(s)) {
806 uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14));
807 uint32_t vfta = le32_to_cpup((uint32_t *)(s->mac_reg + VFTA) +
808 ((vid >> 5) & 0x7f));
809 if ((vfta & (1 << (vid & 0x1f))) == 0)
810 return 0;
813 if (rctl & E1000_RCTL_UPE) // promiscuous
814 return 1;
816 if ((buf[0] & 1) && (rctl & E1000_RCTL_MPE)) // promiscuous mcast
817 return 1;
819 if ((rctl & E1000_RCTL_BAM) && !memcmp(buf, bcast, sizeof bcast))
820 return 1;
822 for (rp = s->mac_reg + RA; rp < s->mac_reg + RA + 32; rp += 2) {
823 if (!(rp[1] & E1000_RAH_AV))
824 continue;
825 ra[0] = cpu_to_le32(rp[0]);
826 ra[1] = cpu_to_le32(rp[1]);
827 if (!memcmp(buf, (uint8_t *)ra, 6)) {
828 DBGOUT(RXFILTER,
829 "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x\n",
830 (int)(rp - s->mac_reg - RA)/2,
831 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
832 return 1;
835 DBGOUT(RXFILTER, "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x\n",
836 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
838 f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
839 f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff;
840 if (s->mac_reg[MTA + (f >> 5)] & (1 << (f & 0x1f)))
841 return 1;
842 DBGOUT(RXFILTER,
843 "dropping, inexact filter mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] %x\n",
844 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5],
845 (rctl >> E1000_RCTL_MO_SHIFT) & 3, f >> 5,
846 s->mac_reg[MTA + (f >> 5)]);
848 return 0;
851 static bool
852 have_autoneg(E1000State *s)
854 return (s->compat_flags & E1000_FLAG_AUTONEG) &&
855 (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN) &&
856 (s->phy_reg[PHY_CTRL] & MII_CR_RESTART_AUTO_NEG);
859 static void
860 e1000_set_link_status(NetClientState *nc)
862 E1000State *s = qemu_get_nic_opaque(nc);
863 uint32_t old_status = s->mac_reg[STATUS];
865 if (nc->link_down) {
866 e1000_link_down(s);
867 } else {
868 if (have_autoneg(s) &&
869 !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
870 /* emulate auto-negotiation if supported */
871 timer_mod(s->autoneg_timer,
872 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
873 } else {
874 e1000_link_up(s);
878 if (s->mac_reg[STATUS] != old_status)
879 set_ics(s, 0, E1000_ICR_LSC);
882 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
884 int bufs;
885 /* Fast-path short packets */
886 if (total_size <= s->rxbuf_size) {
887 return s->mac_reg[RDH] != s->mac_reg[RDT];
889 if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
890 bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
891 } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
892 bufs = s->mac_reg[RDLEN] / sizeof(struct e1000_rx_desc) +
893 s->mac_reg[RDT] - s->mac_reg[RDH];
894 } else {
895 return false;
897 return total_size <= bufs * s->rxbuf_size;
900 static int
901 e1000_can_receive(NetClientState *nc)
903 E1000State *s = qemu_get_nic_opaque(nc);
905 return (s->mac_reg[STATUS] & E1000_STATUS_LU) &&
906 (s->mac_reg[RCTL] & E1000_RCTL_EN) && e1000_has_rxbufs(s, 1);
909 static uint64_t rx_desc_base(E1000State *s)
911 uint64_t bah = s->mac_reg[RDBAH];
912 uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
914 return (bah << 32) + bal;
917 static ssize_t
918 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
920 E1000State *s = qemu_get_nic_opaque(nc);
921 PCIDevice *d = PCI_DEVICE(s);
922 struct e1000_rx_desc desc;
923 dma_addr_t base;
924 unsigned int n, rdt;
925 uint32_t rdh_start;
926 uint16_t vlan_special = 0;
927 uint8_t vlan_status = 0;
928 uint8_t min_buf[MIN_BUF_SIZE];
929 struct iovec min_iov;
930 uint8_t *filter_buf = iov->iov_base;
931 size_t size = iov_size(iov, iovcnt);
932 size_t iov_ofs = 0;
933 size_t desc_offset;
934 size_t desc_size;
935 size_t total_size;
937 if (!(s->mac_reg[STATUS] & E1000_STATUS_LU)) {
938 return -1;
941 if (!(s->mac_reg[RCTL] & E1000_RCTL_EN)) {
942 return -1;
945 /* Pad to minimum Ethernet frame length */
946 if (size < sizeof(min_buf)) {
947 iov_to_buf(iov, iovcnt, 0, min_buf, size);
948 memset(&min_buf[size], 0, sizeof(min_buf) - size);
949 min_iov.iov_base = filter_buf = min_buf;
950 min_iov.iov_len = size = sizeof(min_buf);
951 iovcnt = 1;
952 iov = &min_iov;
953 } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
954 /* This is very unlikely, but may happen. */
955 iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
956 filter_buf = min_buf;
959 /* Discard oversized packets if !LPE and !SBP. */
960 if ((size > MAXIMUM_ETHERNET_LPE_SIZE ||
961 (size > MAXIMUM_ETHERNET_VLAN_SIZE
962 && !(s->mac_reg[RCTL] & E1000_RCTL_LPE)))
963 && !(s->mac_reg[RCTL] & E1000_RCTL_SBP)) {
964 return size;
967 if (!receive_filter(s, filter_buf, size)) {
968 return size;
971 if (vlan_enabled(s) && is_vlan_packet(s, filter_buf)) {
972 vlan_special = cpu_to_le16(be16_to_cpup((uint16_t *)(filter_buf
973 + 14)));
974 iov_ofs = 4;
975 if (filter_buf == iov->iov_base) {
976 memmove(filter_buf + 4, filter_buf, 12);
977 } else {
978 iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
979 while (iov->iov_len <= iov_ofs) {
980 iov_ofs -= iov->iov_len;
981 iov++;
984 vlan_status = E1000_RXD_STAT_VP;
985 size -= 4;
988 rdh_start = s->mac_reg[RDH];
989 desc_offset = 0;
990 total_size = size + fcs_len(s);
991 if (!e1000_has_rxbufs(s, total_size)) {
992 set_ics(s, 0, E1000_ICS_RXO);
993 return -1;
995 do {
996 desc_size = total_size - desc_offset;
997 if (desc_size > s->rxbuf_size) {
998 desc_size = s->rxbuf_size;
1000 base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
1001 pci_dma_read(d, base, &desc, sizeof(desc));
1002 desc.special = vlan_special;
1003 desc.status |= (vlan_status | E1000_RXD_STAT_DD);
1004 if (desc.buffer_addr) {
1005 if (desc_offset < size) {
1006 size_t iov_copy;
1007 hwaddr ba = le64_to_cpu(desc.buffer_addr);
1008 size_t copy_size = size - desc_offset;
1009 if (copy_size > s->rxbuf_size) {
1010 copy_size = s->rxbuf_size;
1012 do {
1013 iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
1014 pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
1015 copy_size -= iov_copy;
1016 ba += iov_copy;
1017 iov_ofs += iov_copy;
1018 if (iov_ofs == iov->iov_len) {
1019 iov++;
1020 iov_ofs = 0;
1022 } while (copy_size);
1024 desc_offset += desc_size;
1025 desc.length = cpu_to_le16(desc_size);
1026 if (desc_offset >= total_size) {
1027 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
1028 } else {
1029 /* Guest zeroing out status is not a hardware requirement.
1030 Clear EOP in case guest didn't do it. */
1031 desc.status &= ~E1000_RXD_STAT_EOP;
1033 } else { // as per intel docs; skip descriptors with null buf addr
1034 DBGOUT(RX, "Null RX descriptor!!\n");
1036 pci_dma_write(d, base, &desc, sizeof(desc));
1038 if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
1039 s->mac_reg[RDH] = 0;
1040 /* see comment in start_xmit; same here */
1041 if (s->mac_reg[RDH] == rdh_start) {
1042 DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
1043 rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
1044 set_ics(s, 0, E1000_ICS_RXO);
1045 return -1;
1047 } while (desc_offset < total_size);
1049 s->mac_reg[GPRC]++;
1050 s->mac_reg[TPR]++;
1051 /* TOR - Total Octets Received:
1052 * This register includes bytes received in a packet from the <Destination
1053 * Address> field through the <CRC> field, inclusively.
1055 n = s->mac_reg[TORL] + size + /* Always include FCS length. */ 4;
1056 if (n < s->mac_reg[TORL])
1057 s->mac_reg[TORH]++;
1058 s->mac_reg[TORL] = n;
1060 n = E1000_ICS_RXT0;
1061 if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1062 rdt += s->mac_reg[RDLEN] / sizeof(desc);
1063 if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1064 s->rxbuf_min_shift)
1065 n |= E1000_ICS_RXDMT0;
1067 set_ics(s, 0, n);
1069 return size;
1072 static ssize_t
1073 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1075 const struct iovec iov = {
1076 .iov_base = (uint8_t *)buf,
1077 .iov_len = size
1080 return e1000_receive_iov(nc, &iov, 1);
1083 static uint32_t
1084 mac_readreg(E1000State *s, int index)
1086 return s->mac_reg[index];
1089 static uint32_t
1090 mac_icr_read(E1000State *s, int index)
1092 uint32_t ret = s->mac_reg[ICR];
1094 DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1095 set_interrupt_cause(s, 0, 0);
1096 return ret;
1099 static uint32_t
1100 mac_read_clr4(E1000State *s, int index)
1102 uint32_t ret = s->mac_reg[index];
1104 s->mac_reg[index] = 0;
1105 return ret;
1108 static uint32_t
1109 mac_read_clr8(E1000State *s, int index)
1111 uint32_t ret = s->mac_reg[index];
1113 s->mac_reg[index] = 0;
1114 s->mac_reg[index-1] = 0;
1115 return ret;
1118 static void
1119 mac_writereg(E1000State *s, int index, uint32_t val)
1121 uint32_t macaddr[2];
1123 s->mac_reg[index] = val;
1125 if (index == RA + 1) {
1126 macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1127 macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1128 qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1132 static void
1133 set_rdt(E1000State *s, int index, uint32_t val)
1135 s->mac_reg[index] = val & 0xffff;
1136 if (e1000_has_rxbufs(s, 1)) {
1137 qemu_flush_queued_packets(qemu_get_queue(s->nic));
1141 static void
1142 set_16bit(E1000State *s, int index, uint32_t val)
1144 s->mac_reg[index] = val & 0xffff;
1147 static void
1148 set_dlen(E1000State *s, int index, uint32_t val)
1150 s->mac_reg[index] = val & 0xfff80;
1153 static void
1154 set_tctl(E1000State *s, int index, uint32_t val)
1156 s->mac_reg[index] = val;
1157 s->mac_reg[TDT] &= 0xffff;
1158 start_xmit(s);
1161 static void
1162 set_icr(E1000State *s, int index, uint32_t val)
1164 DBGOUT(INTERRUPT, "set_icr %x\n", val);
1165 set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1168 static void
1169 set_imc(E1000State *s, int index, uint32_t val)
1171 s->mac_reg[IMS] &= ~val;
1172 set_ics(s, 0, 0);
1175 static void
1176 set_ims(E1000State *s, int index, uint32_t val)
1178 s->mac_reg[IMS] |= val;
1179 set_ics(s, 0, 0);
1182 #define getreg(x) [x] = mac_readreg
1183 static uint32_t (*macreg_readops[])(E1000State *, int) = {
1184 getreg(PBA), getreg(RCTL), getreg(TDH), getreg(TXDCTL),
1185 getreg(WUFC), getreg(TDT), getreg(CTRL), getreg(LEDCTL),
1186 getreg(MANC), getreg(MDIC), getreg(SWSM), getreg(STATUS),
1187 getreg(TORL), getreg(TOTL), getreg(IMS), getreg(TCTL),
1188 getreg(RDH), getreg(RDT), getreg(VET), getreg(ICS),
1189 getreg(TDBAL), getreg(TDBAH), getreg(RDBAH), getreg(RDBAL),
1190 getreg(TDLEN), getreg(RDLEN), getreg(RDTR), getreg(RADV),
1191 getreg(TADV), getreg(ITR),
1193 [TOTH] = mac_read_clr8, [TORH] = mac_read_clr8, [GPRC] = mac_read_clr4,
1194 [GPTC] = mac_read_clr4, [TPR] = mac_read_clr4, [TPT] = mac_read_clr4,
1195 [ICR] = mac_icr_read, [EECD] = get_eecd, [EERD] = flash_eerd_read,
1196 [CRCERRS ... MPC] = &mac_readreg,
1197 [RA ... RA+31] = &mac_readreg,
1198 [MTA ... MTA+127] = &mac_readreg,
1199 [VFTA ... VFTA+127] = &mac_readreg,
1201 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1203 #define putreg(x) [x] = mac_writereg
1204 static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
1205 putreg(PBA), putreg(EERD), putreg(SWSM), putreg(WUFC),
1206 putreg(TDBAL), putreg(TDBAH), putreg(TXDCTL), putreg(RDBAH),
1207 putreg(RDBAL), putreg(LEDCTL), putreg(VET),
1208 [TDLEN] = set_dlen, [RDLEN] = set_dlen, [TCTL] = set_tctl,
1209 [TDT] = set_tctl, [MDIC] = set_mdic, [ICS] = set_ics,
1210 [TDH] = set_16bit, [RDH] = set_16bit, [RDT] = set_rdt,
1211 [IMC] = set_imc, [IMS] = set_ims, [ICR] = set_icr,
1212 [EECD] = set_eecd, [RCTL] = set_rx_control, [CTRL] = set_ctrl,
1213 [RDTR] = set_16bit, [RADV] = set_16bit, [TADV] = set_16bit,
1214 [ITR] = set_16bit,
1215 [RA ... RA+31] = &mac_writereg,
1216 [MTA ... MTA+127] = &mac_writereg,
1217 [VFTA ... VFTA+127] = &mac_writereg,
1220 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1222 static void
1223 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1224 unsigned size)
1226 E1000State *s = opaque;
1227 unsigned int index = (addr & 0x1ffff) >> 2;
1229 if (index < NWRITEOPS && macreg_writeops[index]) {
1230 macreg_writeops[index](s, index, val);
1231 } else if (index < NREADOPS && macreg_readops[index]) {
1232 DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n", index<<2, val);
1233 } else {
1234 DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1235 index<<2, val);
1239 static uint64_t
1240 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1242 E1000State *s = opaque;
1243 unsigned int index = (addr & 0x1ffff) >> 2;
1245 if (index < NREADOPS && macreg_readops[index])
1247 return macreg_readops[index](s, index);
1249 DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1250 return 0;
1253 static const MemoryRegionOps e1000_mmio_ops = {
1254 .read = e1000_mmio_read,
1255 .write = e1000_mmio_write,
1256 .endianness = DEVICE_LITTLE_ENDIAN,
1257 .impl = {
1258 .min_access_size = 4,
1259 .max_access_size = 4,
1263 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1264 unsigned size)
1266 E1000State *s = opaque;
1268 (void)s;
1269 return 0;
1272 static void e1000_io_write(void *opaque, hwaddr addr,
1273 uint64_t val, unsigned size)
1275 E1000State *s = opaque;
1277 (void)s;
1280 static const MemoryRegionOps e1000_io_ops = {
1281 .read = e1000_io_read,
1282 .write = e1000_io_write,
1283 .endianness = DEVICE_LITTLE_ENDIAN,
1286 static bool is_version_1(void *opaque, int version_id)
1288 return version_id == 1;
1291 static void e1000_pre_save(void *opaque)
1293 E1000State *s = opaque;
1294 NetClientState *nc = qemu_get_queue(s->nic);
1296 /* If the mitigation timer is active, emulate a timeout now. */
1297 if (s->mit_timer_on) {
1298 e1000_mit_timer(s);
1302 * If link is down and auto-negotiation is supported and ongoing,
1303 * complete auto-negotiation immediately. This allows us to look
1304 * at MII_SR_AUTONEG_COMPLETE to infer link status on load.
1306 if (nc->link_down && have_autoneg(s)) {
1307 s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1311 static int e1000_post_load(void *opaque, int version_id)
1313 E1000State *s = opaque;
1314 NetClientState *nc = qemu_get_queue(s->nic);
1316 if (!(s->compat_flags & E1000_FLAG_MIT)) {
1317 s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1318 s->mac_reg[TADV] = 0;
1319 s->mit_irq_level = false;
1321 s->mit_ide = 0;
1322 s->mit_timer_on = false;
1324 /* nc.link_down can't be migrated, so infer link_down according
1325 * to link status bit in mac_reg[STATUS].
1326 * Alternatively, restart link negotiation if it was in progress. */
1327 nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1329 if (have_autoneg(s) &&
1330 !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1331 nc->link_down = false;
1332 timer_mod(s->autoneg_timer,
1333 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1336 return 0;
1339 static bool e1000_mit_state_needed(void *opaque)
1341 E1000State *s = opaque;
1343 return s->compat_flags & E1000_FLAG_MIT;
1346 static const VMStateDescription vmstate_e1000_mit_state = {
1347 .name = "e1000/mit_state",
1348 .version_id = 1,
1349 .minimum_version_id = 1,
1350 .fields = (VMStateField[]) {
1351 VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1352 VMSTATE_UINT32(mac_reg[RADV], E1000State),
1353 VMSTATE_UINT32(mac_reg[TADV], E1000State),
1354 VMSTATE_UINT32(mac_reg[ITR], E1000State),
1355 VMSTATE_BOOL(mit_irq_level, E1000State),
1356 VMSTATE_END_OF_LIST()
1360 static const VMStateDescription vmstate_e1000 = {
1361 .name = "e1000",
1362 .version_id = 2,
1363 .minimum_version_id = 1,
1364 .pre_save = e1000_pre_save,
1365 .post_load = e1000_post_load,
1366 .fields = (VMStateField[]) {
1367 VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1368 VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1369 VMSTATE_UNUSED(4), /* Was mmio_base. */
1370 VMSTATE_UINT32(rxbuf_size, E1000State),
1371 VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1372 VMSTATE_UINT32(eecd_state.val_in, E1000State),
1373 VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1374 VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1375 VMSTATE_UINT16(eecd_state.reading, E1000State),
1376 VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1377 VMSTATE_UINT8(tx.ipcss, E1000State),
1378 VMSTATE_UINT8(tx.ipcso, E1000State),
1379 VMSTATE_UINT16(tx.ipcse, E1000State),
1380 VMSTATE_UINT8(tx.tucss, E1000State),
1381 VMSTATE_UINT8(tx.tucso, E1000State),
1382 VMSTATE_UINT16(tx.tucse, E1000State),
1383 VMSTATE_UINT32(tx.paylen, E1000State),
1384 VMSTATE_UINT8(tx.hdr_len, E1000State),
1385 VMSTATE_UINT16(tx.mss, E1000State),
1386 VMSTATE_UINT16(tx.size, E1000State),
1387 VMSTATE_UINT16(tx.tso_frames, E1000State),
1388 VMSTATE_UINT8(tx.sum_needed, E1000State),
1389 VMSTATE_INT8(tx.ip, E1000State),
1390 VMSTATE_INT8(tx.tcp, E1000State),
1391 VMSTATE_BUFFER(tx.header, E1000State),
1392 VMSTATE_BUFFER(tx.data, E1000State),
1393 VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1394 VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1395 VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1396 VMSTATE_UINT32(mac_reg[EECD], E1000State),
1397 VMSTATE_UINT32(mac_reg[EERD], E1000State),
1398 VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1399 VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1400 VMSTATE_UINT32(mac_reg[ICR], E1000State),
1401 VMSTATE_UINT32(mac_reg[ICS], E1000State),
1402 VMSTATE_UINT32(mac_reg[IMC], E1000State),
1403 VMSTATE_UINT32(mac_reg[IMS], E1000State),
1404 VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1405 VMSTATE_UINT32(mac_reg[MANC], E1000State),
1406 VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1407 VMSTATE_UINT32(mac_reg[MPC], E1000State),
1408 VMSTATE_UINT32(mac_reg[PBA], E1000State),
1409 VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1410 VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1411 VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1412 VMSTATE_UINT32(mac_reg[RDH], E1000State),
1413 VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1414 VMSTATE_UINT32(mac_reg[RDT], E1000State),
1415 VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1416 VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1417 VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1418 VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1419 VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1420 VMSTATE_UINT32(mac_reg[TDH], E1000State),
1421 VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1422 VMSTATE_UINT32(mac_reg[TDT], E1000State),
1423 VMSTATE_UINT32(mac_reg[TORH], E1000State),
1424 VMSTATE_UINT32(mac_reg[TORL], E1000State),
1425 VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1426 VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1427 VMSTATE_UINT32(mac_reg[TPR], E1000State),
1428 VMSTATE_UINT32(mac_reg[TPT], E1000State),
1429 VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1430 VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1431 VMSTATE_UINT32(mac_reg[VET], E1000State),
1432 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1433 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1434 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1435 VMSTATE_END_OF_LIST()
1437 .subsections = (VMStateSubsection[]) {
1439 .vmsd = &vmstate_e1000_mit_state,
1440 .needed = e1000_mit_state_needed,
1441 }, {
1442 /* empty */
1448 * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1449 * Note: A valid DevId will be inserted during pci_e1000_init().
1451 static const uint16_t e1000_eeprom_template[64] = {
1452 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0x0000, 0x0000, 0x0000,
1453 0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1454 0x0008, 0x2000, 0x7e14, 0x0048, 0x1000, 0x00d8, 0x0000, 0x2700,
1455 0x6cc9, 0x3150, 0x0722, 0x040b, 0x0984, 0x0000, 0xc000, 0x0706,
1456 0x1008, 0x0000, 0x0f04, 0x7fff, 0x4d01, 0xffff, 0xffff, 0xffff,
1457 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
1458 0x0100, 0x4000, 0x121c, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
1459 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000,
1462 /* PCI interface */
1464 static void
1465 e1000_mmio_setup(E1000State *d)
1467 int i;
1468 const uint32_t excluded_regs[] = {
1469 E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1470 E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1473 memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1474 "e1000-mmio", PNPMMIO_SIZE);
1475 memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1476 for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1477 memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1478 excluded_regs[i+1] - excluded_regs[i] - 4);
1479 memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1482 static void
1483 e1000_cleanup(NetClientState *nc)
1485 E1000State *s = qemu_get_nic_opaque(nc);
1487 s->nic = NULL;
1490 static void
1491 pci_e1000_uninit(PCIDevice *dev)
1493 E1000State *d = E1000(dev);
1495 timer_del(d->autoneg_timer);
1496 timer_free(d->autoneg_timer);
1497 timer_del(d->mit_timer);
1498 timer_free(d->mit_timer);
1499 memory_region_destroy(&d->mmio);
1500 memory_region_destroy(&d->io);
1501 qemu_del_nic(d->nic);
1504 static NetClientInfo net_e1000_info = {
1505 .type = NET_CLIENT_OPTIONS_KIND_NIC,
1506 .size = sizeof(NICState),
1507 .can_receive = e1000_can_receive,
1508 .receive = e1000_receive,
1509 .receive_iov = e1000_receive_iov,
1510 .cleanup = e1000_cleanup,
1511 .link_status_changed = e1000_set_link_status,
1514 static int pci_e1000_init(PCIDevice *pci_dev)
1516 DeviceState *dev = DEVICE(pci_dev);
1517 E1000State *d = E1000(pci_dev);
1518 PCIDeviceClass *pdc = PCI_DEVICE_GET_CLASS(pci_dev);
1519 uint8_t *pci_conf;
1520 uint16_t checksum = 0;
1521 int i;
1522 uint8_t *macaddr;
1524 pci_conf = pci_dev->config;
1526 /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1527 pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1529 pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1531 e1000_mmio_setup(d);
1533 pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1535 pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1537 memmove(d->eeprom_data, e1000_eeprom_template,
1538 sizeof e1000_eeprom_template);
1539 qemu_macaddr_default_if_unset(&d->conf.macaddr);
1540 macaddr = d->conf.macaddr.a;
1541 for (i = 0; i < 3; i++)
1542 d->eeprom_data[i] = (macaddr[2*i+1]<<8) | macaddr[2*i];
1543 d->eeprom_data[11] = d->eeprom_data[13] = pdc->device_id;
1544 for (i = 0; i < EEPROM_CHECKSUM_REG; i++)
1545 checksum += d->eeprom_data[i];
1546 checksum = (uint16_t) EEPROM_SUM - checksum;
1547 d->eeprom_data[EEPROM_CHECKSUM_REG] = checksum;
1549 d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1550 object_get_typename(OBJECT(d)), dev->id, d);
1552 qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1554 add_boot_device_path(d->conf.bootindex, dev, "/ethernet-phy@0");
1556 d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1557 d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1559 return 0;
1562 static void qdev_e1000_reset(DeviceState *dev)
1564 E1000State *d = E1000(dev);
1565 e1000_reset(d);
1568 static Property e1000_properties[] = {
1569 DEFINE_NIC_PROPERTIES(E1000State, conf),
1570 DEFINE_PROP_BIT("autonegotiation", E1000State,
1571 compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1572 DEFINE_PROP_BIT("mitigation", E1000State,
1573 compat_flags, E1000_FLAG_MIT_BIT, true),
1574 DEFINE_PROP_END_OF_LIST(),
1577 typedef struct E1000Info {
1578 const char *name;
1579 uint16_t device_id;
1580 uint8_t revision;
1581 uint16_t phy_id2;
1582 } E1000Info;
1584 static void e1000_class_init(ObjectClass *klass, void *data)
1586 DeviceClass *dc = DEVICE_CLASS(klass);
1587 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1588 E1000BaseClass *e = E1000_DEVICE_CLASS(klass);
1589 const E1000Info *info = data;
1591 k->init = pci_e1000_init;
1592 k->exit = pci_e1000_uninit;
1593 k->romfile = "efi-e1000.rom";
1594 k->vendor_id = PCI_VENDOR_ID_INTEL;
1595 k->device_id = info->device_id;
1596 k->revision = info->revision;
1597 e->phy_id2 = info->phy_id2;
1598 k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1599 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1600 dc->desc = "Intel Gigabit Ethernet";
1601 dc->reset = qdev_e1000_reset;
1602 dc->vmsd = &vmstate_e1000;
1603 dc->props = e1000_properties;
1606 static const TypeInfo e1000_base_info = {
1607 .name = TYPE_E1000_BASE,
1608 .parent = TYPE_PCI_DEVICE,
1609 .instance_size = sizeof(E1000State),
1610 .class_size = sizeof(E1000BaseClass),
1611 .abstract = true,
1614 static const E1000Info e1000_devices[] = {
1616 .name = "e1000-82540em",
1617 .device_id = E1000_DEV_ID_82540EM,
1618 .revision = 0x03,
1619 .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT,
1622 .name = "e1000-82544gc",
1623 .device_id = E1000_DEV_ID_82544GC_COPPER,
1624 .revision = 0x03,
1625 .phy_id2 = E1000_PHY_ID2_82544x,
1628 .name = "e1000-82545em",
1629 .device_id = E1000_DEV_ID_82545EM_COPPER,
1630 .revision = 0x03,
1631 .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT,
1635 static const TypeInfo e1000_default_info = {
1636 .name = "e1000",
1637 .parent = "e1000-82540em",
1640 static void e1000_register_types(void)
1642 int i;
1644 type_register_static(&e1000_base_info);
1645 for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1646 const E1000Info *info = &e1000_devices[i];
1647 TypeInfo type_info = {};
1649 type_info.name = info->name;
1650 type_info.parent = TYPE_E1000_BASE;
1651 type_info.class_data = (void *)info;
1652 type_info.class_init = e1000_class_init;
1654 type_register(&type_info);
1656 type_register_static(&e1000_default_info);
1659 type_init(e1000_register_types)