Fix vectored aio bounce handling immediate errors (Avi Kivity)
[qemu-kvm/fedora.git] / hw / e1000.c
blob1644201d6ee6625e909a5f509a5cb40af0c3930b
1 /*
2 * QEMU e1000 emulation
4 * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
5 * Copyright (c) 2008 Qumranet
6 * Based on work done by:
7 * Copyright (c) 2007 Dan Aloni
8 * Copyright (c) 2004 Antony T Curtis
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with this library; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA
26 #include "hw.h"
27 #include "pci.h"
28 #include "net.h"
30 #include "e1000_hw.h"
32 #define DEBUG
34 #ifdef DEBUG
35 enum {
36 DEBUG_GENERAL, DEBUG_IO, DEBUG_MMIO, DEBUG_INTERRUPT,
37 DEBUG_RX, DEBUG_TX, DEBUG_MDIC, DEBUG_EEPROM,
38 DEBUG_UNKNOWN, DEBUG_TXSUM, DEBUG_TXERR, DEBUG_RXERR,
39 DEBUG_RXFILTER, DEBUG_NOTYET,
41 #define DBGBIT(x) (1<<DEBUG_##x)
42 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
44 #define DBGOUT(what, fmt, params...) do { \
45 if (debugflags & DBGBIT(what)) \
46 fprintf(stderr, "e1000: " fmt, ##params); \
47 } while (0)
48 #else
49 #define DBGOUT(what, fmt, params...) do {} while (0)
50 #endif
52 #define IOPORT_SIZE 0x40
53 #define PNPMMIO_SIZE 0x20000
56 * HW models:
57 * E1000_DEV_ID_82540EM works with Windows and Linux
58 * E1000_DEV_ID_82573L OK with windoze and Linux 2.6.22,
59 * appears to perform better than 82540EM, but breaks with Linux 2.6.18
60 * E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
61 * Others never tested
63 enum { E1000_DEVID = E1000_DEV_ID_82540EM };
66 * May need to specify additional MAC-to-PHY entries --
67 * Intel's Windows driver refuses to initialize unless they match
69 enum {
70 PHY_ID2_INIT = E1000_DEVID == E1000_DEV_ID_82573L ? 0xcc2 :
71 E1000_DEVID == E1000_DEV_ID_82544GC_COPPER ? 0xc30 :
72 /* default to E1000_DEV_ID_82540EM */ 0xc20
75 typedef struct E1000State_st {
76 PCIDevice dev;
77 VLANClientState *vc;
78 NICInfo *nd;
79 int mmio_index;
81 uint32_t mac_reg[0x8000];
82 uint16_t phy_reg[0x20];
83 uint16_t eeprom_data[64];
85 uint32_t rxbuf_size;
86 uint32_t rxbuf_min_shift;
87 int check_rxov;
88 struct e1000_tx {
89 unsigned char header[256];
90 unsigned char vlan_header[4];
91 unsigned char vlan[4];
92 unsigned char data[0x10000];
93 uint16_t size;
94 unsigned char sum_needed;
95 unsigned char vlan_needed;
96 uint8_t ipcss;
97 uint8_t ipcso;
98 uint16_t ipcse;
99 uint8_t tucss;
100 uint8_t tucso;
101 uint16_t tucse;
102 uint8_t hdr_len;
103 uint16_t mss;
104 uint32_t paylen;
105 uint16_t tso_frames;
106 char tse;
107 int8_t ip;
108 int8_t tcp;
109 char cptse; // current packet tse bit
110 } tx;
112 struct {
113 uint32_t val_in; // shifted in from guest driver
114 uint16_t bitnum_in;
115 uint16_t bitnum_out;
116 uint16_t reading;
117 uint32_t old_eecd;
118 } eecd_state;
119 } E1000State;
121 #define defreg(x) x = (E1000_##x>>2)
122 enum {
123 defreg(CTRL), defreg(EECD), defreg(EERD), defreg(GPRC),
124 defreg(GPTC), defreg(ICR), defreg(ICS), defreg(IMC),
125 defreg(IMS), defreg(LEDCTL), defreg(MANC), defreg(MDIC),
126 defreg(MPC), defreg(PBA), defreg(RCTL), defreg(RDBAH),
127 defreg(RDBAL), defreg(RDH), defreg(RDLEN), defreg(RDT),
128 defreg(STATUS), defreg(SWSM), defreg(TCTL), defreg(TDBAH),
129 defreg(TDBAL), defreg(TDH), defreg(TDLEN), defreg(TDT),
130 defreg(TORH), defreg(TORL), defreg(TOTH), defreg(TOTL),
131 defreg(TPR), defreg(TPT), defreg(TXDCTL), defreg(WUFC),
132 defreg(RA), defreg(MTA), defreg(CRCERRS),defreg(VFTA),
133 defreg(VET),
136 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
137 static const char phy_regcap[0x20] = {
138 [PHY_STATUS] = PHY_R, [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
139 [PHY_ID1] = PHY_R, [M88E1000_PHY_SPEC_CTRL] = PHY_RW,
140 [PHY_CTRL] = PHY_RW, [PHY_1000T_CTRL] = PHY_RW,
141 [PHY_LP_ABILITY] = PHY_R, [PHY_1000T_STATUS] = PHY_R,
142 [PHY_AUTONEG_ADV] = PHY_RW, [M88E1000_RX_ERR_CNTR] = PHY_R,
143 [PHY_ID2] = PHY_R, [M88E1000_PHY_SPEC_STATUS] = PHY_R
146 static void
147 ioport_map(PCIDevice *pci_dev, int region_num, uint32_t addr,
148 uint32_t size, int type)
150 DBGOUT(IO, "e1000_ioport_map addr=0x%04x size=0x%08x\n", addr, size);
153 static void
154 set_interrupt_cause(E1000State *s, int index, uint32_t val)
156 if (val)
157 val |= E1000_ICR_INT_ASSERTED;
158 s->mac_reg[ICR] = val;
159 qemu_set_irq(s->dev.irq[0], (s->mac_reg[IMS] & s->mac_reg[ICR]) != 0);
162 static void
163 set_ics(E1000State *s, int index, uint32_t val)
165 DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
166 s->mac_reg[IMS]);
167 set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
170 static int
171 rxbufsize(uint32_t v)
173 v &= E1000_RCTL_BSEX | E1000_RCTL_SZ_16384 | E1000_RCTL_SZ_8192 |
174 E1000_RCTL_SZ_4096 | E1000_RCTL_SZ_2048 | E1000_RCTL_SZ_1024 |
175 E1000_RCTL_SZ_512 | E1000_RCTL_SZ_256;
176 switch (v) {
177 case E1000_RCTL_BSEX | E1000_RCTL_SZ_16384:
178 return 16384;
179 case E1000_RCTL_BSEX | E1000_RCTL_SZ_8192:
180 return 8192;
181 case E1000_RCTL_BSEX | E1000_RCTL_SZ_4096:
182 return 4096;
183 case E1000_RCTL_SZ_1024:
184 return 1024;
185 case E1000_RCTL_SZ_512:
186 return 512;
187 case E1000_RCTL_SZ_256:
188 return 256;
190 return 2048;
193 static void
194 set_rx_control(E1000State *s, int index, uint32_t val)
196 s->mac_reg[RCTL] = val;
197 s->rxbuf_size = rxbufsize(val);
198 s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
199 DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
200 s->mac_reg[RCTL]);
203 static void
204 set_mdic(E1000State *s, int index, uint32_t val)
206 uint32_t data = val & E1000_MDIC_DATA_MASK;
207 uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
209 if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
210 val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
211 else if (val & E1000_MDIC_OP_READ) {
212 DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
213 if (!(phy_regcap[addr] & PHY_R)) {
214 DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
215 val |= E1000_MDIC_ERROR;
216 } else
217 val = (val ^ data) | s->phy_reg[addr];
218 } else if (val & E1000_MDIC_OP_WRITE) {
219 DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
220 if (!(phy_regcap[addr] & PHY_W)) {
221 DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
222 val |= E1000_MDIC_ERROR;
223 } else
224 s->phy_reg[addr] = data;
226 s->mac_reg[MDIC] = val | E1000_MDIC_READY;
227 set_ics(s, 0, E1000_ICR_MDAC);
230 static uint32_t
231 get_eecd(E1000State *s, int index)
233 uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
235 DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
236 s->eecd_state.bitnum_out, s->eecd_state.reading);
237 if (!s->eecd_state.reading ||
238 ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
239 ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
240 ret |= E1000_EECD_DO;
241 return ret;
244 static void
245 set_eecd(E1000State *s, int index, uint32_t val)
247 uint32_t oldval = s->eecd_state.old_eecd;
249 s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
250 E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
251 if (!(E1000_EECD_SK & (val ^ oldval))) // no clock edge
252 return;
253 if (!(E1000_EECD_SK & val)) { // falling edge
254 s->eecd_state.bitnum_out++;
255 return;
257 if (!(val & E1000_EECD_CS)) { // rising, no CS (EEPROM reset)
258 memset(&s->eecd_state, 0, sizeof s->eecd_state);
259 return;
261 s->eecd_state.val_in <<= 1;
262 if (val & E1000_EECD_DI)
263 s->eecd_state.val_in |= 1;
264 if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
265 s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
266 s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
267 EEPROM_READ_OPCODE_MICROWIRE);
269 DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
270 s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
271 s->eecd_state.reading);
274 static uint32_t
275 flash_eerd_read(E1000State *s, int x)
277 unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
279 if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
280 return 0;
281 return (s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
282 E1000_EEPROM_RW_REG_DONE | r;
285 static void
286 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
288 uint32_t sum;
290 if (cse && cse < n)
291 n = cse + 1;
292 if (sloc < n-1) {
293 sum = net_checksum_add(n-css, data+css);
294 cpu_to_be16wu((uint16_t *)(data + sloc),
295 net_checksum_finish(sum));
299 static inline int
300 vlan_enabled(E1000State *s)
302 return ((s->mac_reg[CTRL] & E1000_CTRL_VME) != 0);
305 static inline int
306 vlan_rx_filter_enabled(E1000State *s)
308 return ((s->mac_reg[RCTL] & E1000_RCTL_VFE) != 0);
311 static inline int
312 is_vlan_packet(E1000State *s, const uint8_t *buf)
314 return (be16_to_cpup((uint16_t *)(buf + 12)) ==
315 le16_to_cpup((uint16_t *)(s->mac_reg + VET)));
318 static inline int
319 is_vlan_txd(uint32_t txd_lower)
321 return ((txd_lower & E1000_TXD_CMD_VLE) != 0);
324 static void
325 xmit_seg(E1000State *s)
327 uint16_t len, *sp;
328 unsigned int frames = s->tx.tso_frames, css, sofar, n;
329 struct e1000_tx *tp = &s->tx;
331 if (tp->tse && tp->cptse) {
332 css = tp->ipcss;
333 DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
334 frames, tp->size, css);
335 if (tp->ip) { // IPv4
336 cpu_to_be16wu((uint16_t *)(tp->data+css+2),
337 tp->size - css);
338 cpu_to_be16wu((uint16_t *)(tp->data+css+4),
339 be16_to_cpup((uint16_t *)(tp->data+css+4))+frames);
340 } else // IPv6
341 cpu_to_be16wu((uint16_t *)(tp->data+css+4),
342 tp->size - css);
343 css = tp->tucss;
344 len = tp->size - css;
345 DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->tcp, css, len);
346 if (tp->tcp) {
347 sofar = frames * tp->mss;
348 cpu_to_be32wu((uint32_t *)(tp->data+css+4), // seq
349 be32_to_cpupu((uint32_t *)(tp->data+css+4))+sofar);
350 if (tp->paylen - sofar > tp->mss)
351 tp->data[css + 13] &= ~9; // PSH, FIN
352 } else // UDP
353 cpu_to_be16wu((uint16_t *)(tp->data+css+4), len);
354 if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
355 // add pseudo-header length before checksum calculation
356 sp = (uint16_t *)(tp->data + tp->tucso);
357 cpu_to_be16wu(sp, be16_to_cpup(sp) + len);
359 tp->tso_frames++;
362 if (tp->sum_needed & E1000_TXD_POPTS_TXSM)
363 putsum(tp->data, tp->size, tp->tucso, tp->tucss, tp->tucse);
364 if (tp->sum_needed & E1000_TXD_POPTS_IXSM)
365 putsum(tp->data, tp->size, tp->ipcso, tp->ipcss, tp->ipcse);
366 if (tp->vlan_needed) {
367 memmove(tp->vlan, tp->data, 12);
368 memcpy(tp->data + 8, tp->vlan_header, 4);
369 qemu_send_packet(s->vc, tp->vlan, tp->size + 4);
370 } else
371 qemu_send_packet(s->vc, tp->data, tp->size);
372 s->mac_reg[TPT]++;
373 s->mac_reg[GPTC]++;
374 n = s->mac_reg[TOTL];
375 if ((s->mac_reg[TOTL] += s->tx.size) < n)
376 s->mac_reg[TOTH]++;
379 static void
380 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
382 uint32_t txd_lower = le32_to_cpu(dp->lower.data);
383 uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
384 unsigned int split_size = txd_lower & 0xffff, bytes, sz, op;
385 unsigned int msh = 0xfffff, hdr = 0;
386 uint64_t addr;
387 struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
388 struct e1000_tx *tp = &s->tx;
390 if (dtype == E1000_TXD_CMD_DEXT) { // context descriptor
391 op = le32_to_cpu(xp->cmd_and_length);
392 tp->ipcss = xp->lower_setup.ip_fields.ipcss;
393 tp->ipcso = xp->lower_setup.ip_fields.ipcso;
394 tp->ipcse = le16_to_cpu(xp->lower_setup.ip_fields.ipcse);
395 tp->tucss = xp->upper_setup.tcp_fields.tucss;
396 tp->tucso = xp->upper_setup.tcp_fields.tucso;
397 tp->tucse = le16_to_cpu(xp->upper_setup.tcp_fields.tucse);
398 tp->paylen = op & 0xfffff;
399 tp->hdr_len = xp->tcp_seg_setup.fields.hdr_len;
400 tp->mss = le16_to_cpu(xp->tcp_seg_setup.fields.mss);
401 tp->ip = (op & E1000_TXD_CMD_IP) ? 1 : 0;
402 tp->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0;
403 tp->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0;
404 tp->tso_frames = 0;
405 if (tp->tucso == 0) { // this is probably wrong
406 DBGOUT(TXSUM, "TCP/UDP: cso 0!\n");
407 tp->tucso = tp->tucss + (tp->tcp ? 16 : 6);
409 return;
410 } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
411 // data descriptor
412 tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
413 tp->cptse = ( txd_lower & E1000_TXD_CMD_TSE ) ? 1 : 0;
414 } else
415 // legacy descriptor
416 tp->cptse = 0;
418 if (vlan_enabled(s) && is_vlan_txd(txd_lower) &&
419 (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
420 tp->vlan_needed = 1;
421 cpu_to_be16wu((uint16_t *)(tp->vlan_header),
422 le16_to_cpup((uint16_t *)(s->mac_reg + VET)));
423 cpu_to_be16wu((uint16_t *)(tp->vlan_header + 2),
424 le16_to_cpu(dp->upper.fields.special));
427 addr = le64_to_cpu(dp->buffer_addr);
428 if (tp->tse && tp->cptse) {
429 hdr = tp->hdr_len;
430 msh = hdr + tp->mss;
431 do {
432 bytes = split_size;
433 if (tp->size + bytes > msh)
434 bytes = msh - tp->size;
435 cpu_physical_memory_read(addr, tp->data + tp->size, bytes);
436 if ((sz = tp->size + bytes) >= hdr && tp->size < hdr)
437 memmove(tp->header, tp->data, hdr);
438 tp->size = sz;
439 addr += bytes;
440 if (sz == msh) {
441 xmit_seg(s);
442 memmove(tp->data, tp->header, hdr);
443 tp->size = hdr;
445 } while (split_size -= bytes);
446 } else if (!tp->tse && tp->cptse) {
447 // context descriptor TSE is not set, while data descriptor TSE is set
448 DBGOUT(TXERR, "TCP segmentaion Error\n");
449 } else {
450 cpu_physical_memory_read(addr, tp->data + tp->size, split_size);
451 tp->size += split_size;
454 if (!(txd_lower & E1000_TXD_CMD_EOP))
455 return;
456 if (!(tp->tse && tp->cptse && tp->size < hdr))
457 xmit_seg(s);
458 tp->tso_frames = 0;
459 tp->sum_needed = 0;
460 tp->vlan_needed = 0;
461 tp->size = 0;
462 tp->cptse = 0;
465 static uint32_t
466 txdesc_writeback(target_phys_addr_t base, struct e1000_tx_desc *dp)
468 uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
470 if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
471 return 0;
472 txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
473 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
474 dp->upper.data = cpu_to_le32(txd_upper);
475 cpu_physical_memory_write(base + ((char *)&dp->upper - (char *)dp),
476 (void *)&dp->upper, sizeof(dp->upper));
477 return E1000_ICR_TXDW;
480 static void
481 start_xmit(E1000State *s)
483 target_phys_addr_t base;
484 struct e1000_tx_desc desc;
485 uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
487 if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
488 DBGOUT(TX, "tx disabled\n");
489 return;
492 while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
493 base = ((uint64_t)s->mac_reg[TDBAH] << 32) + s->mac_reg[TDBAL] +
494 sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
495 cpu_physical_memory_read(base, (void *)&desc, sizeof(desc));
497 DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
498 (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
499 desc.upper.data);
501 process_tx_desc(s, &desc);
502 cause |= txdesc_writeback(base, &desc);
504 if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
505 s->mac_reg[TDH] = 0;
507 * the following could happen only if guest sw assigns
508 * bogus values to TDT/TDLEN.
509 * there's nothing too intelligent we could do about this.
511 if (s->mac_reg[TDH] == tdh_start) {
512 DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
513 tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
514 break;
517 set_ics(s, 0, cause);
520 static int
521 receive_filter(E1000State *s, const uint8_t *buf, int size)
523 static uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
524 static int mta_shift[] = {4, 3, 2, 0};
525 uint32_t f, rctl = s->mac_reg[RCTL], ra[2], *rp;
527 if (is_vlan_packet(s, buf) && vlan_rx_filter_enabled(s)) {
528 uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14));
529 uint32_t vfta = le32_to_cpup((uint32_t *)(s->mac_reg + VFTA) +
530 ((vid >> 5) & 0x7f));
531 if ((vfta & (1 << (vid & 0x1f))) == 0)
532 return 0;
535 if (rctl & E1000_RCTL_UPE) // promiscuous
536 return 1;
538 if ((buf[0] & 1) && (rctl & E1000_RCTL_MPE)) // promiscuous mcast
539 return 1;
541 if ((rctl & E1000_RCTL_BAM) && !memcmp(buf, bcast, sizeof bcast))
542 return 1;
544 for (rp = s->mac_reg + RA; rp < s->mac_reg + RA + 32; rp += 2) {
545 if (!(rp[1] & E1000_RAH_AV))
546 continue;
547 ra[0] = cpu_to_le32(rp[0]);
548 ra[1] = cpu_to_le32(rp[1]);
549 if (!memcmp(buf, (uint8_t *)ra, 6)) {
550 DBGOUT(RXFILTER,
551 "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x\n",
552 (int)(rp - s->mac_reg - RA)/2,
553 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
554 return 1;
557 DBGOUT(RXFILTER, "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x\n",
558 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
560 f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
561 f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff;
562 if (s->mac_reg[MTA + (f >> 5)] & (1 << (f & 0x1f)))
563 return 1;
564 DBGOUT(RXFILTER,
565 "dropping, inexact filter mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] %x\n",
566 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5],
567 (rctl >> E1000_RCTL_MO_SHIFT) & 3, f >> 5,
568 s->mac_reg[MTA + (f >> 5)]);
570 return 0;
573 static void
574 e1000_set_link_status(VLANClientState *vc)
576 E1000State *s = vc->opaque;
577 uint32_t old_status = s->mac_reg[STATUS];
579 if (vc->link_down)
580 s->mac_reg[STATUS] &= ~E1000_STATUS_LU;
581 else
582 s->mac_reg[STATUS] |= E1000_STATUS_LU;
584 if (s->mac_reg[STATUS] != old_status)
585 set_ics(s, 0, E1000_ICR_LSC);
588 static int
589 e1000_can_receive(void *opaque)
591 E1000State *s = opaque;
593 return (s->mac_reg[RCTL] & E1000_RCTL_EN);
596 static void
597 e1000_receive(void *opaque, const uint8_t *buf, int size)
599 E1000State *s = opaque;
600 struct e1000_rx_desc desc;
601 target_phys_addr_t base;
602 unsigned int n, rdt;
603 uint32_t rdh_start;
604 uint16_t vlan_special = 0;
605 uint8_t vlan_status = 0, vlan_offset = 0;
607 if (!(s->mac_reg[RCTL] & E1000_RCTL_EN))
608 return;
610 if (size > s->rxbuf_size) {
611 DBGOUT(RX, "packet too large for buffers (%d > %d)\n", size,
612 s->rxbuf_size);
613 return;
616 if (!receive_filter(s, buf, size))
617 return;
619 if (vlan_enabled(s) && is_vlan_packet(s, buf)) {
620 vlan_special = cpu_to_le16(be16_to_cpup((uint16_t *)(buf + 14)));
621 memmove((void *)(buf + 4), buf, 12);
622 vlan_status = E1000_RXD_STAT_VP;
623 vlan_offset = 4;
624 size -= 4;
627 rdh_start = s->mac_reg[RDH];
628 size += 4; // for the header
629 do {
630 if (s->mac_reg[RDH] == s->mac_reg[RDT] && s->check_rxov) {
631 set_ics(s, 0, E1000_ICS_RXO);
632 return;
634 base = ((uint64_t)s->mac_reg[RDBAH] << 32) + s->mac_reg[RDBAL] +
635 sizeof(desc) * s->mac_reg[RDH];
636 cpu_physical_memory_read(base, (void *)&desc, sizeof(desc));
637 desc.special = vlan_special;
638 desc.status |= (vlan_status | E1000_RXD_STAT_DD);
639 if (desc.buffer_addr) {
640 cpu_physical_memory_write(le64_to_cpu(desc.buffer_addr),
641 (void *)(buf + vlan_offset), size);
642 desc.length = cpu_to_le16(size);
643 desc.status |= E1000_RXD_STAT_EOP|E1000_RXD_STAT_IXSM;
644 } else // as per intel docs; skip descriptors with null buf addr
645 DBGOUT(RX, "Null RX descriptor!!\n");
646 cpu_physical_memory_write(base, (void *)&desc, sizeof(desc));
648 if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
649 s->mac_reg[RDH] = 0;
650 s->check_rxov = 1;
651 /* see comment in start_xmit; same here */
652 if (s->mac_reg[RDH] == rdh_start) {
653 DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
654 rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
655 set_ics(s, 0, E1000_ICS_RXO);
656 return;
658 } while (desc.buffer_addr == 0);
660 s->mac_reg[GPRC]++;
661 s->mac_reg[TPR]++;
662 n = s->mac_reg[TORL];
663 if ((s->mac_reg[TORL] += size) < n)
664 s->mac_reg[TORH]++;
666 n = E1000_ICS_RXT0;
667 if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
668 rdt += s->mac_reg[RDLEN] / sizeof(desc);
669 if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
670 s->rxbuf_min_shift)
671 n |= E1000_ICS_RXDMT0;
673 set_ics(s, 0, n);
676 static uint32_t
677 mac_readreg(E1000State *s, int index)
679 return s->mac_reg[index];
682 static uint32_t
683 mac_icr_read(E1000State *s, int index)
685 uint32_t ret = s->mac_reg[ICR];
687 DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
688 set_interrupt_cause(s, 0, 0);
689 return ret;
692 static uint32_t
693 mac_read_clr4(E1000State *s, int index)
695 uint32_t ret = s->mac_reg[index];
697 s->mac_reg[index] = 0;
698 return ret;
701 static uint32_t
702 mac_read_clr8(E1000State *s, int index)
704 uint32_t ret = s->mac_reg[index];
706 s->mac_reg[index] = 0;
707 s->mac_reg[index-1] = 0;
708 return ret;
711 static void
712 mac_writereg(E1000State *s, int index, uint32_t val)
714 s->mac_reg[index] = val;
717 static void
718 set_rdt(E1000State *s, int index, uint32_t val)
720 s->check_rxov = 0;
721 s->mac_reg[index] = val & 0xffff;
724 static void
725 set_16bit(E1000State *s, int index, uint32_t val)
727 s->mac_reg[index] = val & 0xffff;
730 static void
731 set_dlen(E1000State *s, int index, uint32_t val)
733 s->mac_reg[index] = val & 0xfff80;
736 static void
737 set_tctl(E1000State *s, int index, uint32_t val)
739 s->mac_reg[index] = val;
740 s->mac_reg[TDT] &= 0xffff;
741 start_xmit(s);
744 static void
745 set_icr(E1000State *s, int index, uint32_t val)
747 DBGOUT(INTERRUPT, "set_icr %x\n", val);
748 set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
751 static void
752 set_imc(E1000State *s, int index, uint32_t val)
754 s->mac_reg[IMS] &= ~val;
755 set_ics(s, 0, 0);
758 static void
759 set_ims(E1000State *s, int index, uint32_t val)
761 s->mac_reg[IMS] |= val;
762 set_ics(s, 0, 0);
765 #define getreg(x) [x] = mac_readreg
766 static uint32_t (*macreg_readops[])(E1000State *, int) = {
767 getreg(PBA), getreg(RCTL), getreg(TDH), getreg(TXDCTL),
768 getreg(WUFC), getreg(TDT), getreg(CTRL), getreg(LEDCTL),
769 getreg(MANC), getreg(MDIC), getreg(SWSM), getreg(STATUS),
770 getreg(TORL), getreg(TOTL), getreg(IMS), getreg(TCTL),
771 getreg(RDH), getreg(RDT), getreg(VET),
773 [TOTH] = mac_read_clr8, [TORH] = mac_read_clr8, [GPRC] = mac_read_clr4,
774 [GPTC] = mac_read_clr4, [TPR] = mac_read_clr4, [TPT] = mac_read_clr4,
775 [ICR] = mac_icr_read, [EECD] = get_eecd, [EERD] = flash_eerd_read,
776 [CRCERRS ... MPC] = &mac_readreg,
777 [RA ... RA+31] = &mac_readreg,
778 [MTA ... MTA+127] = &mac_readreg,
779 [VFTA ... VFTA+127] = &mac_readreg,
781 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
783 #define putreg(x) [x] = mac_writereg
784 static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
785 putreg(PBA), putreg(EERD), putreg(SWSM), putreg(WUFC),
786 putreg(TDBAL), putreg(TDBAH), putreg(TXDCTL), putreg(RDBAH),
787 putreg(RDBAL), putreg(LEDCTL), putreg(CTRL), putreg(VET),
788 [TDLEN] = set_dlen, [RDLEN] = set_dlen, [TCTL] = set_tctl,
789 [TDT] = set_tctl, [MDIC] = set_mdic, [ICS] = set_ics,
790 [TDH] = set_16bit, [RDH] = set_16bit, [RDT] = set_rdt,
791 [IMC] = set_imc, [IMS] = set_ims, [ICR] = set_icr,
792 [EECD] = set_eecd, [RCTL] = set_rx_control,
793 [RA ... RA+31] = &mac_writereg,
794 [MTA ... MTA+127] = &mac_writereg,
795 [VFTA ... VFTA+127] = &mac_writereg,
797 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
799 static void
800 e1000_mmio_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
802 E1000State *s = opaque;
803 unsigned int index = (addr & 0x1ffff) >> 2;
805 #ifdef TARGET_WORDS_BIGENDIAN
806 val = bswap32(val);
807 #endif
808 if (index < NWRITEOPS && macreg_writeops[index])
809 macreg_writeops[index](s, index, val);
810 else if (index < NREADOPS && macreg_readops[index])
811 DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04x\n", index<<2, val);
812 else
813 DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08x\n",
814 index<<2, val);
817 static void
818 e1000_mmio_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
820 // emulate hw without byte enables: no RMW
821 e1000_mmio_writel(opaque, addr & ~3,
822 (val & 0xffff) << (8*(addr & 3)));
825 static void
826 e1000_mmio_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
828 // emulate hw without byte enables: no RMW
829 e1000_mmio_writel(opaque, addr & ~3,
830 (val & 0xff) << (8*(addr & 3)));
833 static uint32_t
834 e1000_mmio_readl(void *opaque, target_phys_addr_t addr)
836 E1000State *s = opaque;
837 unsigned int index = (addr & 0x1ffff) >> 2;
839 if (index < NREADOPS && macreg_readops[index])
841 uint32_t val = macreg_readops[index](s, index);
842 #ifdef TARGET_WORDS_BIGENDIAN
843 val = bswap32(val);
844 #endif
845 return val;
847 DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
848 return 0;
851 static uint32_t
852 e1000_mmio_readb(void *opaque, target_phys_addr_t addr)
854 return ((e1000_mmio_readl(opaque, addr & ~3)) >>
855 (8 * (addr & 3))) & 0xff;
858 static uint32_t
859 e1000_mmio_readw(void *opaque, target_phys_addr_t addr)
861 return ((e1000_mmio_readl(opaque, addr & ~3)) >>
862 (8 * (addr & 3))) & 0xffff;
865 static const int mac_regtosave[] = {
866 CTRL, EECD, EERD, GPRC, GPTC, ICR, ICS, IMC, IMS,
867 LEDCTL, MANC, MDIC, MPC, PBA, RCTL, RDBAH, RDBAL, RDH,
868 RDLEN, RDT, STATUS, SWSM, TCTL, TDBAH, TDBAL, TDH, TDLEN,
869 TDT, TORH, TORL, TOTH, TOTL, TPR, TPT, TXDCTL, WUFC,
870 VET,
872 enum { MAC_NSAVE = ARRAY_SIZE(mac_regtosave) };
874 static const struct {
875 int size;
876 int array0;
877 } mac_regarraystosave[] = { {32, RA}, {128, MTA}, {128, VFTA} };
878 enum { MAC_NARRAYS = ARRAY_SIZE(mac_regarraystosave) };
880 static void
881 nic_save(QEMUFile *f, void *opaque)
883 E1000State *s = (E1000State *)opaque;
884 int i, j;
886 pci_device_save(&s->dev, f);
887 qemu_put_be32(f, 0);
888 qemu_put_be32s(f, &s->rxbuf_size);
889 qemu_put_be32s(f, &s->rxbuf_min_shift);
890 qemu_put_be32s(f, &s->eecd_state.val_in);
891 qemu_put_be16s(f, &s->eecd_state.bitnum_in);
892 qemu_put_be16s(f, &s->eecd_state.bitnum_out);
893 qemu_put_be16s(f, &s->eecd_state.reading);
894 qemu_put_be32s(f, &s->eecd_state.old_eecd);
895 qemu_put_8s(f, &s->tx.ipcss);
896 qemu_put_8s(f, &s->tx.ipcso);
897 qemu_put_be16s(f, &s->tx.ipcse);
898 qemu_put_8s(f, &s->tx.tucss);
899 qemu_put_8s(f, &s->tx.tucso);
900 qemu_put_be16s(f, &s->tx.tucse);
901 qemu_put_be32s(f, &s->tx.paylen);
902 qemu_put_8s(f, &s->tx.hdr_len);
903 qemu_put_be16s(f, &s->tx.mss);
904 qemu_put_be16s(f, &s->tx.size);
905 qemu_put_be16s(f, &s->tx.tso_frames);
906 qemu_put_8s(f, &s->tx.sum_needed);
907 qemu_put_s8s(f, &s->tx.ip);
908 qemu_put_s8s(f, &s->tx.tcp);
909 qemu_put_buffer(f, s->tx.header, sizeof s->tx.header);
910 qemu_put_buffer(f, s->tx.data, sizeof s->tx.data);
911 for (i = 0; i < 64; i++)
912 qemu_put_be16s(f, s->eeprom_data + i);
913 for (i = 0; i < 0x20; i++)
914 qemu_put_be16s(f, s->phy_reg + i);
915 for (i = 0; i < MAC_NSAVE; i++)
916 qemu_put_be32s(f, s->mac_reg + mac_regtosave[i]);
917 for (i = 0; i < MAC_NARRAYS; i++)
918 for (j = 0; j < mac_regarraystosave[i].size; j++)
919 qemu_put_be32s(f,
920 s->mac_reg + mac_regarraystosave[i].array0 + j);
923 static int
924 nic_load(QEMUFile *f, void *opaque, int version_id)
926 E1000State *s = (E1000State *)opaque;
927 int i, j, ret;
929 if ((ret = pci_device_load(&s->dev, f)) < 0)
930 return ret;
931 if (version_id == 1)
932 qemu_get_sbe32s(f, &i); /* once some unused instance id */
933 qemu_get_be32(f); /* Ignored. Was mmio_base. */
934 qemu_get_be32s(f, &s->rxbuf_size);
935 qemu_get_be32s(f, &s->rxbuf_min_shift);
936 qemu_get_be32s(f, &s->eecd_state.val_in);
937 qemu_get_be16s(f, &s->eecd_state.bitnum_in);
938 qemu_get_be16s(f, &s->eecd_state.bitnum_out);
939 qemu_get_be16s(f, &s->eecd_state.reading);
940 qemu_get_be32s(f, &s->eecd_state.old_eecd);
941 qemu_get_8s(f, &s->tx.ipcss);
942 qemu_get_8s(f, &s->tx.ipcso);
943 qemu_get_be16s(f, &s->tx.ipcse);
944 qemu_get_8s(f, &s->tx.tucss);
945 qemu_get_8s(f, &s->tx.tucso);
946 qemu_get_be16s(f, &s->tx.tucse);
947 qemu_get_be32s(f, &s->tx.paylen);
948 qemu_get_8s(f, &s->tx.hdr_len);
949 qemu_get_be16s(f, &s->tx.mss);
950 qemu_get_be16s(f, &s->tx.size);
951 qemu_get_be16s(f, &s->tx.tso_frames);
952 qemu_get_8s(f, &s->tx.sum_needed);
953 qemu_get_s8s(f, &s->tx.ip);
954 qemu_get_s8s(f, &s->tx.tcp);
955 qemu_get_buffer(f, s->tx.header, sizeof s->tx.header);
956 qemu_get_buffer(f, s->tx.data, sizeof s->tx.data);
957 for (i = 0; i < 64; i++)
958 qemu_get_be16s(f, s->eeprom_data + i);
959 for (i = 0; i < 0x20; i++)
960 qemu_get_be16s(f, s->phy_reg + i);
961 for (i = 0; i < MAC_NSAVE; i++)
962 qemu_get_be32s(f, s->mac_reg + mac_regtosave[i]);
963 for (i = 0; i < MAC_NARRAYS; i++)
964 for (j = 0; j < mac_regarraystosave[i].size; j++)
965 qemu_get_be32s(f,
966 s->mac_reg + mac_regarraystosave[i].array0 + j);
967 return 0;
970 static const uint16_t e1000_eeprom_template[64] = {
971 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0x0000, 0x0000, 0x0000,
972 0x3000, 0x1000, 0x6403, E1000_DEVID, 0x8086, E1000_DEVID, 0x8086, 0x3040,
973 0x0008, 0x2000, 0x7e14, 0x0048, 0x1000, 0x00d8, 0x0000, 0x2700,
974 0x6cc9, 0x3150, 0x0722, 0x040b, 0x0984, 0x0000, 0xc000, 0x0706,
975 0x1008, 0x0000, 0x0f04, 0x7fff, 0x4d01, 0xffff, 0xffff, 0xffff,
976 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
977 0x0100, 0x4000, 0x121c, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
978 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000,
981 static const uint16_t phy_reg_init[] = {
982 [PHY_CTRL] = 0x1140, [PHY_STATUS] = 0x796d, // link initially up
983 [PHY_ID1] = 0x141, [PHY_ID2] = PHY_ID2_INIT,
984 [PHY_1000T_CTRL] = 0x0e00, [M88E1000_PHY_SPEC_CTRL] = 0x360,
985 [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60, [PHY_AUTONEG_ADV] = 0xde1,
986 [PHY_LP_ABILITY] = 0x1e0, [PHY_1000T_STATUS] = 0x3c00,
987 [M88E1000_PHY_SPEC_STATUS] = 0xac00,
990 static const uint32_t mac_reg_init[] = {
991 [PBA] = 0x00100030,
992 [LEDCTL] = 0x602,
993 [CTRL] = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
994 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
995 [STATUS] = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
996 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
997 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
998 E1000_STATUS_LU,
999 [MANC] = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
1000 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
1001 E1000_MANC_RMCP_EN,
1004 /* PCI interface */
1006 static CPUWriteMemoryFunc *e1000_mmio_write[] = {
1007 e1000_mmio_writeb, e1000_mmio_writew, e1000_mmio_writel
1010 static CPUReadMemoryFunc *e1000_mmio_read[] = {
1011 e1000_mmio_readb, e1000_mmio_readw, e1000_mmio_readl
1014 static void
1015 e1000_mmio_map(PCIDevice *pci_dev, int region_num,
1016 uint32_t addr, uint32_t size, int type)
1018 E1000State *d = (E1000State *)pci_dev;
1019 int i;
1020 const uint32_t excluded_regs[] = {
1021 E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1022 E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1026 DBGOUT(MMIO, "e1000_mmio_map addr=0x%08x 0x%08x\n", addr, size);
1028 cpu_register_physical_memory(addr, PNPMMIO_SIZE, d->mmio_index);
1029 qemu_register_coalesced_mmio(addr, excluded_regs[0]);
1031 for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1032 qemu_register_coalesced_mmio(addr + excluded_regs[i] + 4,
1033 excluded_regs[i + 1] -
1034 excluded_regs[i] - 4);
1037 static int
1038 pci_e1000_uninit(PCIDevice *dev)
1040 E1000State *d = (E1000State *) dev;
1042 cpu_unregister_io_memory(d->mmio_index);
1044 return 0;
1047 PCIDevice *
1048 pci_e1000_init(PCIBus *bus, NICInfo *nd, int devfn)
1050 E1000State *d;
1051 uint8_t *pci_conf;
1052 uint16_t checksum = 0;
1053 static const char info_str[] = "e1000";
1054 int i;
1056 d = (E1000State *)pci_register_device(bus, "e1000",
1057 sizeof(E1000State), devfn, NULL, NULL);
1059 pci_conf = d->dev.config;
1060 memset(pci_conf, 0, 256);
1062 pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL);
1063 pci_config_set_device_id(pci_conf, E1000_DEVID);
1064 *(uint16_t *)(pci_conf+0x04) = cpu_to_le16(0x0407);
1065 *(uint16_t *)(pci_conf+0x06) = cpu_to_le16(0x0010);
1066 pci_conf[0x08] = 0x03;
1067 pci_config_set_class(pci_conf, PCI_CLASS_NETWORK_ETHERNET);
1068 pci_conf[0x0c] = 0x10;
1070 pci_conf[0x3d] = 1; // interrupt pin 0
1072 d->mmio_index = cpu_register_io_memory(0, e1000_mmio_read,
1073 e1000_mmio_write, d);
1075 pci_register_io_region((PCIDevice *)d, 0, PNPMMIO_SIZE,
1076 PCI_ADDRESS_SPACE_MEM, e1000_mmio_map);
1078 pci_register_io_region((PCIDevice *)d, 1, IOPORT_SIZE,
1079 PCI_ADDRESS_SPACE_IO, ioport_map);
1081 d->nd = nd;
1082 memmove(d->eeprom_data, e1000_eeprom_template,
1083 sizeof e1000_eeprom_template);
1084 for (i = 0; i < 3; i++)
1085 d->eeprom_data[i] = (nd->macaddr[2*i+1]<<8) | nd->macaddr[2*i];
1086 for (i = 0; i < EEPROM_CHECKSUM_REG; i++)
1087 checksum += d->eeprom_data[i];
1088 checksum = (uint16_t) EEPROM_SUM - checksum;
1089 d->eeprom_data[EEPROM_CHECKSUM_REG] = checksum;
1091 memset(d->phy_reg, 0, sizeof d->phy_reg);
1092 memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
1093 memset(d->mac_reg, 0, sizeof d->mac_reg);
1094 memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
1095 d->rxbuf_min_shift = 1;
1096 memset(&d->tx, 0, sizeof d->tx);
1098 d->vc = qemu_new_vlan_client(nd->vlan, nd->model, nd->name,
1099 e1000_receive, e1000_can_receive, d);
1100 d->vc->link_status_changed = e1000_set_link_status;
1102 qemu_format_nic_info_str(d->vc, d->nd->macaddr);
1104 register_savevm(info_str, -1, 2, nic_save, nic_load, d);
1105 d->dev.unregister = pci_e1000_uninit;
1107 return (PCIDevice *)d;