? if_sk.c.new ? if_wm-example.diff ? o Index: if_wm.c =================================================================== RCS file: /cvsroot/src/sys/dev/pci/if_wm.c,v retrieving revision 1.320 diff -u -u -r1.320 if_wm.c --- if_wm.c 4 May 2015 10:10:42 -0000 1.320 +++ if_wm.c 12 May 2015 14:34:08 -0000 @@ -154,6 +154,43 @@ #define WM_MPSAFE 1 #endif +#ifdef __HAVE_PCI_MSI_MSIX +#if 0 /* off by default */ +#define WM_MSI_MSIX 1 +#endif +#endif + + +#ifdef WM_MSI_MSIX +/* + * This device driver set affinity to each interrupts like below (round-robin). + * If the number CPUs is less than the number of interrupts, this driver usase + * the same CPU for multiple interrupts. + */ +#define WM_TX_INTR_CPUID 0 +#define WM_RX_INTR_CPUID 1 +#define WM_LINK_INTR_CPUID 2 + +static struct { + int (*msix_intr)(void *); + int msix_cpuid; + const char *msix_name; +} wm_msix_info[] = { + { wm_tx_intr, WM_TX_INTR_CPUID, "TX" }, + { wm_rx_intr, WM_RX_INTR_CPUID, "RX" }, + { wm_linkx_intr, WM_LINK_INTR_CPUID, "LINK" }, +}; +/* + * This device driver divides interrupt to TX, RX and link state. + * Each MSI-X vector indexes are below. + */ +#define WM_NINTR 3 +#define WM_TX_INTR_INDEX 0 +#define WM_RX_INTR_INDEX 1 +#define WM_LINK_INTR_INDEX 2 +#define WM_MAX_NINTR WM_NINTR +#endif + /* * Transmit descriptor list size. Due to errata, we can only have * 256 hardware descriptors in the ring on < 82544, but we use 4096 @@ -295,7 +332,12 @@ int sc_flowflags; /* 802.3x flow control flags */ int sc_align_tweak; - void *sc_ih; /* interrupt cookie */ + pci_intr_handle_t ih; /* space for legacy */ + void *sc_ihs[WM_MAX_NINTR]; /* + * interrupt cookie. + * legacy and msi use sc_ihs[0]. + */ + callout_t sc_tick_ch; /* tick callout */ bool sc_stopping; @@ -401,6 +443,9 @@ kmutex_t *sc_tx_lock; /* lock for tx operations */ kmutex_t *sc_rx_lock; /* lock for rx operations */ + + int sc_nintrs; /* number of interrupts */ + pci_intr_handle_t *sc_intrs; /* legacy and msi use sc_intrs[0] */ }; #define WM_TX_LOCK(_sc) if ((_sc)->sc_tx_lock) mutex_enter((_sc)->sc_tx_lock) @@ -597,6 +642,11 @@ static void wm_linkintr_tbi(struct wm_softc *, uint32_t); static void wm_linkintr(struct wm_softc *, uint32_t); static int wm_intr(void *); +#ifdef WM_MSI_MSIX +static int wm_tx_intr(void *); +static int wm_rx_intr(void *); +static int wm_link_intr(void *); +#endif /* * Media related. @@ -1341,6 +1391,68 @@ return 0; } +#ifdef WM_MSI_MSIX +static void +wm_msix_setup(struct wm_softc *sc, size_t i) +{ + char *intrstr; + char intrbuf[PCI_INTRSTR_LEN]; + const char *name = wm_msix_info[i].msix_name; + struct pci_chipset_tag_t *pc = sc->sc_pc; + void *vih; + kcpuset_t *affinity; + int error; + + intrstr = pci_intr_string(pc, sc->sc_intrs[i], + intrbuf, sizeof(intrbuf)); +#ifdef WM_MPSAFE + pci_intr_setattr(pc, &sc->sc_intrs[i], PCI_INTR_MPSAFE, true); +#endif + vih = pci_intr_establish(pc, sc->sc_intrs[i], IPL_NET, + wm_msix_info[i].msix_intr, sc); + if (vih == NULL) { + aprint_error_dev(sc->sc_dev, + "unable to establish MSI-X(for %s)%s%s\n", name, + intrstr ? " at " : "", intrstr ? intrstr : ""); + return; + } + + kcpuset_create(&affinity, false); + kcpuset_zero(affinity); + /* Round-robin affinity */ + kcpuset_set(affinity, wm_msix_info[i].msix_cpuid % ncpu); + error = pci_intr_distribute(vih, affinity, NULL); + if (error != 0) { + aprint_normal_dev(sc->sc_dev, + "for %s interrupting at %s affinity to %u\n", name, + intrstr, WM_TX_INTR_CPUID % ncpu); + } else { + aprint_normal_dev(sc->sc_dev, + "for %s interrupting at %s\n", name, intrstr); + } + sc->sc_ihs[i] = vih; + kcpuset_destroy(&affinity); +} +#endif + +static int +wm_alloc_interrupts(struct pci_attach_args *pa, struct wm_softc *sc) +{ +#ifdef WM_MSIX_MSI + if (pci_msix_alloc_exact(pa, &sc->sc_intrs, WM_NINTR) == 0) + return WM_NINTR; + if (pci_msi_alloc_exact(pa, &sc->sc_intrs, 1) == 0 || + pci_intx_alloc(pa, &sc->sc_intrs) == 0) + return 1; +#endif + // XXX: Should make a function that allocates for legacy so we don't + // need the spare space, or worry about freeing that? And where is the + // free code for the above alloc functions? + if (pci_intr_map(pa, sc->sc_intrs = &sc->sc_ih) == 0) + return 1; + return -1; +} + /* The attach function (ca_attach) */ static void wm_attach(device_t parent, device_t self, void *aux) @@ -1350,7 +1462,6 @@ prop_dictionary_t dict; struct ifnet *ifp = &sc->sc_ethercom.ec_if; pci_chipset_tag_t pc = pa->pa_pc; - pci_intr_handle_t ih; const char *intrstr = NULL; const char *eetype, *xname; bus_space_tag_t memt; @@ -1500,25 +1611,38 @@ } /* - * Map and establish our interrupt. + * Map and establish our interrupt(s). */ - if (pci_intr_map(pa, &ih)) { + switch (sc->sc_nintrs = wm_alloc_interrupts(pa, sc)) { + case -1: aprint_error_dev(sc->sc_dev, "unable to map interrupt\n"); return; - } - intrstr = pci_intr_string(pc, ih, intrbuf, sizeof(intrbuf)); + case 1: + intrstr = pci_intr_string(pc, &sc->sc_intrs[0], intrbuf, + sizeof(intrbuf)); #ifdef WM_MPSAFE - pci_intr_setattr(pc, &ih, PCI_INTR_MPSAFE, true); + pci_intr_setattr(pc, &sc->sc_intrs[0], PCI_INTR_MPSAFE, true); +#endif + sc->sc_ihs[0] = pci_intr_establish(pc, &sc->sc_intrs[0], + IPL_NET, wm_intr, sc); + if (sc->sc_ihs[0] == NULL) { + aprint_error_dev(sc->sc_dev, + "unable to establish interrupt"); + if (intrstr != NULL) + aprint_error(" at %s", intrstr); + aprint_error("\n"); + return; + } + aprint_normal_dev(sc->sc_dev, "interrupting at %s\n", intrstr); + break; +#ifdef WM_MSI_MSIX + default: + /* MSI-X */ + for (size_t i = 0; i < __arraycount(wm_msix); i++) + wm_msix_setup(sc, i); + break; #endif - sc->sc_ih = pci_intr_establish(pc, ih, IPL_NET, wm_intr, sc); - if (sc->sc_ih == NULL) { - aprint_error_dev(sc->sc_dev, "unable to establish interrupt"); - if (intrstr != NULL) - aprint_error(" at %s", intrstr); - aprint_error("\n"); - return; } - aprint_normal_dev(sc->sc_dev, "interrupting at %s\n", intrstr); /* * Check the function ID (unit number of the chip). @@ -2498,10 +2622,15 @@ bus_dmamem_free(sc->sc_dmat, &sc->sc_cd_seg, sc->sc_cd_rseg); /* Disestablish the interrupt handler */ - if (sc->sc_ih != NULL) { - pci_intr_disestablish(sc->sc_pc, sc->sc_ih); - sc->sc_ih = NULL; + for (i = 0; i < sc->sc_nintrs; i++) { + if (sc->sc_ihs[i] != NULL) { + pci_intr_disestablish(sc->sc_pc, sc->sc_ihs[i]); + sc->sc_ihs[i] = NULL; + } } +#ifdef WM_MSI_MSIX + pci_intr_release(sc->sc_pc, sc->sc_intrs, sc->sc_nintrs); +#endif /* WM_MSI_MSIX */ /* Unmap the registers */ if (sc->sc_ss) { @@ -3547,6 +3676,10 @@ /* Clear interrupt */ CSR_WRITE(sc, WMREG_IMC, 0xffffffffU); + if (sc->sc_nintrs > 1) { + CSR_WRITE(sc, WMREG_EIMC, 0xffffffffU); + CSR_WRITE(sc, WMREG_EIAC, 0); + } /* Stop the transmit and receive processes. */ CSR_WRITE(sc, WMREG_RCTL, 0); @@ -3791,6 +3924,10 @@ /* Clear any pending interrupt events. */ CSR_WRITE(sc, WMREG_IMC, 0xffffffffU); reg = CSR_READ(sc, WMREG_ICR); + if (sc->sc_nintrs > 1) { + CSR_WRITE(sc, WMREG_EIMC, 0xffffffffU); + CSR_WRITE(sc, WMREG_EIAC, 0); + } /* reload sc_ctrl */ sc->sc_ctrl = CSR_READ(sc, WMREG_CTRL); @@ -4217,11 +4354,44 @@ reg |= RXCSUM_IPV6OFL | RXCSUM_TUOFL; CSR_WRITE(sc, WMREG_RXCSUM, reg); + /* Set up MSI-X */ + if (sc->sc_nintrs > 1) { + uint32_t ivar; + + CSR_WRITE(sc, WMREG_GPIE, WMREG_GPIE_NSICR | WMREG_GPIE_MSIX_MODE | + WMREG_GPIE_EIAME | WMREG_GPIE_PBA); + + /* TX */ + ivar = CSR_READ(sc, WMREG_IVAR0); + ivar &= 0xFFFF00FF; + ivar |= (WM_TX_INTR_INDEX | WMREG_IVAR_VALID) << 8; + CSR_WRITE(sc, WMREG_IVAR0, ivar); + + /* RX */ + ivar = CSR_READ(sc, WMREG_IVAR0); + ivar &= 0xFFFFFF00; + ivar |= WM_RX_INTR_INDEX | WMREG_IVAR_VALID; + CSR_WRITE(sc, WMREG_IVAR0, ivar); + + /* LINK */ + ivar = (WM_LINK_INTR_INDEX | WMREG_IVAR_VALID) << 8; + CSR_WRITE(sc, WMREG_IVAR_MISC, ivar); + } + /* Set up the interrupt registers. */ CSR_WRITE(sc, WMREG_IMC, 0xffffffffU); sc->sc_icr = ICR_TXDW | ICR_LSC | ICR_RXSEQ | ICR_RXDMT0 | ICR_RXO | ICR_RXT0; - CSR_WRITE(sc, WMREG_IMS, sc->sc_icr); + if (sc->sc_nintrs > 1) { + uint32_t mask = (1 << WM_RX_INTR_INDEX) | (1 << WM_TX_INTR_INDEX) | + (1 << WM_LINK_INTR_INDEX); + CSR_WRITE(sc, WMREG_EIAC, mask); + CSR_WRITE(sc, WMREG_EIAM, mask); + CSR_WRITE(sc, WMREG_EIMS, mask); + CSR_WRITE(sc, WMREG_IMS, ICR_LSC); + } else { + CSR_WRITE(sc, WMREG_IMS, sc->sc_icr); + } if ((sc->sc_type == WM_T_ICH8) || (sc->sc_type == WM_T_ICH9) || (sc->sc_type == WM_T_ICH10) || (sc->sc_type == WM_T_PCH) @@ -4429,6 +4599,10 @@ */ CSR_WRITE(sc, WMREG_IMC, 0xffffffffU); sc->sc_icr = 0; + if (sc->sc_nintrs > 1) { + CSR_WRITE(sc, WMREG_EIMC, 0xffffffffU); + CSR_WRITE(sc, WMREG_EIAC, 0); + } /* Release any queued transmit buffers. */ for (i = 0; i < WM_TXQUEUELEN(sc); i++) { @@ -6101,6 +6275,145 @@ return handled; } +#ifdef WM_MSI_MSIX +/* + * wm_tx_intr: + * + * Interrupt service routine. + */ +static int +wm_tx_intr(void *arg) +{ + struct wm_softc *sc = arg; + struct ifnet *ifp = &sc->sc_ethercom.ec_if; + uint32_t icr; + int handled = 0; + + CSR_WRITE(sc, WMREG_EIMC, 1 << WM_TX_INTR_INDEX); + + WM_TX_LOCK(sc); + + if (sc->sc_stopping) + goto out; + + while (1 /* CONSTCOND */) { + icr = CSR_READ(sc, WMREG_ICR); /* XXXX no need? */ + if ((icr & sc->sc_icr) == 0) + break; + rnd_add_uint32(&sc->rnd_source, icr); + + handled = 1; + + +#if defined(WM_DEBUG) || defined(WM_EVENT_COUNTERS) + if (icr & ICR_TXDW) { + DPRINTF(WM_DEBUG_TX, + ("%s: TX: got TXDW interrupt\n", + device_xname(sc->sc_dev))); + WM_EVCNT_INCR(&sc->sc_ev_txdw); + } +#endif + + wm_txintr(sc); + } +out: + WM_TX_UNLOCK(sc); + + CSR_WRITE(sc, WMREG_EIMS, 1 << WM_TX_INTR_INDEX); + + if (handled) { + /* Try to get more packets going. */ + ifp->if_start(ifp); + } + + return handled; +} + +/* + * wm_rx_intr: + * + * Interrupt service routine. + */ +static int +wm_rx_intr(void *arg) +{ + struct wm_softc *sc = arg; + uint32_t icr; + + CSR_WRITE(sc, WMREG_EIMC, 1 << WM_RX_INTR_INDEX); + WM_RX_LOCK(sc); + + if (sc->sc_stopping) + goto out; + + while (1 /* CONSTCOND */) { + icr = CSR_READ(sc, WMREG_ICR); + if ((icr & sc->sc_icr) == 0) + break; + rnd_add_uint32(&sc->rnd_source, icr); + +#if defined(WM_DEBUG) || defined(WM_EVENT_COUNTERS) + if (icr & (ICR_RXDMT0|ICR_RXT0)) { + DPRINTF(WM_DEBUG_RX, + ("%s: RX: got Rx intr 0x%08x\n", + device_xname(sc->sc_dev), + icr & (ICR_RXDMT0|ICR_RXT0))); + WM_EVCNT_INCR(&sc->sc_ev_rxintr); + } +#endif + wm_rxintr(sc); + + if (icr & ICR_RXO) { +#if defined(WM_DEBUG) + log(LOG_WARNING, "%s: Receive overrun\n", + device_xname(sc->sc_dev)); +#endif /* defined(WM_DEBUG) */ + } + } +out: + WM_RX_UNLOCK(sc); + + CSR_WRITE(sc, WMREG_EIMS, 1 << WM_RX_INTR_INDEX); + + return 1; +} + +/* + * wm_link_intr: + * + * Interrupt service routine. + */ +static int +wm_link_intr(void *arg) +{ + struct wm_softc *sc = arg; + uint32_t icr; + + CSR_WRITE(sc, WMREG_EIMC, 1 << WM_LINK_INTR_INDEX); + WM_TX_LOCK(sc); + if (sc->sc_stopping) + goto out; + + while (1 /* CONSTCOND */) { + icr = CSR_READ(sc, WMREG_ICR); + if ((icr & sc->sc_icr) == 0) + break; + rnd_add_uint32(&sc->rnd_source, icr); + + if (icr & (ICR_LSC|ICR_RXSEQ)) { + WM_EVCNT_INCR(&sc->sc_ev_linkintr); + wm_linkintr(sc, icr); + } + } + +out: + WM_TX_UNLOCK(sc); + CSR_WRITE(sc, WMREG_EIMS, 1 << WM_LINK_INTR_INDEX); + + return 1; +} +#endif /* WM_MSI_MSIX */ + /* * Media related. * GMII, SGMII, TBI (and SERDES) Index: if_wmreg.h =================================================================== RCS file: /cvsroot/src/sys/dev/pci/if_wmreg.h,v retrieving revision 1.69 diff -u -u -r1.69 if_wmreg.h --- if_wmreg.h 4 May 2015 10:10:42 -0000 1.69 +++ if_wmreg.h 12 May 2015 14:34:08 -0000 @@ -438,6 +438,18 @@ #define WMREG_ICS 0x00c8 /* Interrupt Cause Set Register */ /* See ICR bits. */ +#define WMREG_IVAL 0x00e4 /* Interrupt Vector Allocation Register */ +#define WMREG_IVAR0 0x01700 /* Interrupt Vector Allocation */ +#define WMREG_IVAR_MISC 0x01740 /* IVAR for other causes */ + +#define WMREG_GPIE 0x01514 /* General Purpose Interrupt Enable */ +#define WMREG_GPIE_NSICR 0x00000001 +#define WMREG_GPIE_MSIX_MODE 0x00000010 +#define WMREG_GPIE_EIAME 0x40000000 +#define WMREG_GPIE_PBA 0x80000000 + +#define WMREG_IVAR_VALID 0x80 + #define WMREG_IMS 0x00d0 /* Interrupt Mask Set Register */ /* See ICR bits. */