commit a77db6f7d142541570d8a3f0d2ae1e5e6c58703b Author: k-nakahara <k-nakahara@iij.ad.jp> Date: Tue Mar 31 20:06:30 2015 +0900 if_wm example of MSI-X and pci_intr_distribute() diff --git a/sys/dev/pci/if_wm.c b/sys/dev/pci/if_wm.c index 0bc75fd..58ced52 100644 --- a/sys/dev/pci/if_wm.c +++ b/sys/dev/pci/if_wm.c @@ -154,6 +154,31 @@ int wm_debug = WM_DEBUG_TX | WM_DEBUG_RX | WM_DEBUG_LINK | WM_DEBUG_GMII #define WM_MPSAFE 1 #endif +#ifdef __HAVE_PCI_MSI_MSIX +#if 0 /* off by default */ +#define WM_MSI_MSIX 1 +#endif +#endif + +/* + * This device driver divides interrupt to TX, RX and link state. + * Each MSI-X vector indexes are below. + */ +#define WM_NINTR 3 +#define WM_TX_INTR_INDEX 0 +#define WM_RX_INTR_INDEX 1 +#define WM_LINK_INTR_INDEX 2 +#define WM_MAX_NINTR WM_NINTR + +/* + * This device driver set affinity to each interrupts like below (round-robin). + * If the number CPUs is less than the number of interrupts, this driver usase + * the same CPU for multiple interrupts. + */ +#define WM_TX_INTR_CPUID 0 +#define WM_RX_INTR_CPUID 1 +#define WM_LINK_INTR_CPUID 2 + /* * Transmit descriptor list size. Due to errata, we can only have * 256 hardware descriptors in the ring on < 82544, but we use 4096 @@ -295,7 +320,11 @@ struct wm_softc { int sc_flowflags; /* 802.3x flow control flags */ int sc_align_tweak; - void *sc_ih; /* interrupt cookie */ + void *sc_ihs[WM_MAX_NINTR]; /* + * interrupt cookie. + * legacy and msi use sc_ihs[0]. + */ + callout_t sc_tick_ch; /* tick callout */ bool sc_stopping; @@ -401,6 +430,9 @@ struct wm_softc { kmutex_t *sc_tx_lock; /* lock for tx operations */ kmutex_t *sc_rx_lock; /* lock for rx operations */ + + int sc_nintrs; /* number of interrupts */ + pci_intr_handle_t *sc_intrs; /* legacy and msi use sc_intrs[0] */ }; #define WM_TX_LOCK(_sc) if ((_sc)->sc_tx_lock) mutex_enter((_sc)->sc_tx_lock) @@ -597,6 +629,11 @@ static void wm_linkintr_gmii(struct wm_softc *, uint32_t); static void wm_linkintr_tbi(struct wm_softc *, uint32_t); static void wm_linkintr(struct wm_softc *, uint32_t); static int wm_intr(void *); +#ifdef WM_MSI_MSIX +static int wm_tx_intr(void *); +static int wm_rx_intr(void *); +static int wm_link_intr(void *); +#endif /* * Media related. @@ -1350,7 +1387,9 @@ wm_attach(device_t parent, device_t self, void *aux) prop_dictionary_t dict; struct ifnet *ifp = &sc->sc_ethercom.ec_if; pci_chipset_tag_t pc = pa->pa_pc; +#ifndef WM_MSI_MSIX pci_intr_handle_t ih; +#endif const char *intrstr = NULL; const char *eetype, *xname; bus_space_tag_t memt; @@ -1499,6 +1538,7 @@ wm_attach(device_t parent, device_t self, void *aux) return; } +#ifndef WM_MSI_MSIX /* * Map and establish our interrupt. */ @@ -1510,8 +1550,8 @@ wm_attach(device_t parent, device_t self, void *aux) #ifdef WM_MPSAFE pci_intr_setattr(pc, &ih, PCI_INTR_MPSAFE, true); #endif - sc->sc_ih = pci_intr_establish(pc, ih, IPL_NET, wm_intr, sc); - if (sc->sc_ih == NULL) { + sc->sc_ihs[0] = pci_intr_establish(pc, ih, IPL_NET, wm_intr, sc); + if (sc->sc_ihs[0] == NULL) { aprint_error_dev(sc->sc_dev, "unable to establish interrupt"); if (intrstr != NULL) aprint_error(" at %s", intrstr); @@ -1519,6 +1559,150 @@ wm_attach(device_t parent, device_t self, void *aux) return; } aprint_normal_dev(sc->sc_dev, "interrupting at %s\n", intrstr); + sc->sc_nintrs = 1; +#else /* WM_MSI_MSIX */ + if (pci_msix_alloc_exact(pa, &sc->sc_intrs, WM_NINTR) == 0) { + /* 1st, try to use MSI-X */ + void *vih; + kcpuset_t *affinity; + + kcpuset_create(&affinity, false); + + /* + * for TX + */ + intrstr = pci_intr_string(pc, sc->sc_intrs[WM_TX_INTR_INDEX], + intrbuf, sizeof(intrbuf)); +#ifdef WM_MPSAFE + pci_intr_setattr(pc, &sc->sc_intrs[WM_TX_INTR_INDEX], PCI_INTR_MPSAFE, + true); +#endif + vih = pci_intr_establish(pc, sc->sc_intrs[WM_TX_INTR_INDEX], + IPL_NET, wm_tx_intr, sc); + if (vih == NULL) { + aprint_error_dev(sc->sc_dev, + "unable to establish MSI-X(for TX)%s%s\n", + intrstr ? " at " : "", intrstr ? intrstr : ""); + error = EBUSY; + return; + } + kcpuset_zero(affinity); + /* Round-robin affinity */ + kcpuset_set(affinity, WM_TX_INTR_CPUID % ncpu); + error = pci_intr_distribute(vih, affinity, NULL); + if (error != 0) { + aprint_normal_dev(sc->sc_dev, + "for TX interrupting at %s affinity to %u\n", + intrstr, WM_TX_INTR_CPUID % ncpu); + } else { + aprint_normal_dev(sc->sc_dev, + "for TX interrupting at %s\n", + intrstr); + } + sc->sc_ihs[WM_TX_INTR_INDEX] = vih; + + /* + * for RX + */ + intrstr = pci_intr_string(pc, sc->sc_intrs[WM_RX_INTR_INDEX], + intrbuf, sizeof(intrbuf)); +#ifdef WM_MPSAFE + pci_intr_setattr(pc, &sc->sc_intrs[WM_RX_INTR_INDEX], + PCI_INTR_MPSAFE, true); +#endif + vih = pci_intr_establish(pc, sc->sc_intrs[WM_RX_INTR_INDEX], + IPL_NET, wm_rx_intr, sc); + if (vih == NULL) { + aprint_error_dev(sc->sc_dev, + "unable to establish MSI-X(for RX)%s%s\n", + intrstr ? " at " : "", intrstr ? intrstr : ""); + error = EBUSY; + return; + } + kcpuset_zero(affinity); + kcpuset_set(affinity, WM_RX_INTR_CPUID % ncpu); + error = pci_intr_distribute(vih, affinity, NULL); + if (error != 0) { + aprint_normal_dev(sc->sc_dev, + "for RX interrupting at %s affinity to %u\n", + intrstr, WM_TX_INTR_CPUID % ncpu); + } else { + aprint_normal_dev(sc->sc_dev, + "for RX interrupting at %s\n", + intrstr); + } + sc->sc_ihs[WM_RX_INTR_INDEX] = vih; + + /* + * for link state changing + */ + intrstr = pci_intr_string(pc, sc->sc_intrs[WM_LINK_INTR_INDEX], + intrbuf, sizeof(intrbuf)); +#ifdef WM_MPSAFE + pci_intr_setattr(pc, &sc->sc_intrs[WM_LINK_INTR_INDEX], + PCI_INTR_MPSAFE, true); +#endif + vih = pci_intr_establish(pc, sc->sc_intrs[WM_LINK_INTR_INDEX], + IPL_NET, wm_link_intr, sc); + if (vih == NULL) { + aprint_error_dev(sc->sc_dev, + "unable to establish MSI-X(for LINK)%s%s\n", + intrstr ? " at " : "", intrstr ? intrstr : ""); + error = EBUSY; + return; + } + kcpuset_zero(affinity); + kcpuset_set(affinity, WM_LINK_INTR_CPUID % ncpu); + error = pci_intr_distribute(vih, affinity, NULL); + if (error != 0) { + aprint_normal_dev(sc->sc_dev, + "for LINK interrupting at %s affinity to %u\n", + intrstr, WM_TX_INTR_CPUID % ncpu); + } else { + aprint_normal_dev(sc->sc_dev, + "for LINK interrupting at %s\n", + intrstr); + } + sc->sc_ihs[WM_LINK_INTR_INDEX] = vih; + + sc->sc_nintrs = WM_NINTR; + kcpuset_destroy(affinity); + } else if (pci_msi_alloc_exact(pa, &sc->sc_intrs, 1) == 0) { + /* 2nd, try to use MSI */ + intrstr = pci_intr_string(pc, sc->sc_intrs[0], intrbuf, + sizeof(intrbuf)); +#ifdef WM_MPSAFE + pci_intr_setattr(pc, &sc->sc_intrs[0], PCI_INTR_MPSAFE, true); +#endif + sc->sc_ihs[0] = pci_intr_establish(pc, sc->sc_intrs[0], + IPL_NET, wm_intr, sc); + if (sc->sc_ihs[0] == NULL) { + aprint_error_dev(sc->sc_dev, "unable to establish MSI\n"); + error = EBUSY; + return; + } + aprint_normal_dev(sc->sc_dev, "MSI at %s\n", intrstr); + + sc->sc_nintrs = 1; + } else if (pci_intx_alloc(pa, &sc->sc_intrs)) { + /* Last, try to use INTx */ + intrstr = pci_intr_string(pc, sc->sc_intrs[0], intrbuf, + sizeof(intrbuf)); +#ifdef WM_MPSAFE + pci_intr_setattr(pc, &sc->sc_intrs[0], PCI_INTR_MPSAFE, true); +#endif + sc->sc_ihs[0] = pci_intr_establish(pc, sc->sc_intrs[0], + IPL_NET, wm_intr, sc); + if (sc->sc_ihs[0] == NULL) { + aprint_error_dev(sc->sc_dev, "unable to establish MSI\n"); + error = EBUSY; + return; + } + aprint_normal_dev(sc->sc_dev, "interrupting at %s\n", intrstr); + + sc->sc_nintrs = 1; + } +#endif /* WM_MSI_MSIX */ /* * Check the function ID (unit number of the chip). @@ -2498,10 +2682,15 @@ wm_detach(device_t self, int flags __unused) bus_dmamem_free(sc->sc_dmat, &sc->sc_cd_seg, sc->sc_cd_rseg); /* Disestablish the interrupt handler */ - if (sc->sc_ih != NULL) { - pci_intr_disestablish(sc->sc_pc, sc->sc_ih); - sc->sc_ih = NULL; + for (i = 0; i < sc->sc_nintrs; i++) { + if (sc->sc_ihs[i] != NULL) { + pci_intr_disestablish(sc->sc_pc, sc->sc_ihs[i]); + sc->sc_ihs[i] = NULL; + } } +#ifdef WM_MSI_MSIX + pci_intr_release(sc->sc_pc, sc->sc_intrs, sc->sc_nintrs); +#endif /* WM_MSI_MSIX */ /* Unmap the registers */ if (sc->sc_ss) { @@ -3547,6 +3736,10 @@ wm_reset(struct wm_softc *sc) /* Clear interrupt */ CSR_WRITE(sc, WMREG_IMC, 0xffffffffU); + if (sc->sc_nintrs > 1) { + CSR_WRITE(sc, WMREG_EIMC, 0xffffffffU); + CSR_WRITE(sc, WMREG_EIAC, 0); + } /* Stop the transmit and receive processes. */ CSR_WRITE(sc, WMREG_RCTL, 0); @@ -3791,6 +3984,10 @@ wm_reset(struct wm_softc *sc) /* Clear any pending interrupt events. */ CSR_WRITE(sc, WMREG_IMC, 0xffffffffU); reg = CSR_READ(sc, WMREG_ICR); + if (sc->sc_nintrs > 1) { + CSR_WRITE(sc, WMREG_EIMC, 0xffffffffU); + CSR_WRITE(sc, WMREG_EIAC, 0); + } /* reload sc_ctrl */ sc->sc_ctrl = CSR_READ(sc, WMREG_CTRL); @@ -4217,11 +4414,44 @@ wm_init_locked(struct ifnet *ifp) reg |= RXCSUM_IPV6OFL | RXCSUM_TUOFL; CSR_WRITE(sc, WMREG_RXCSUM, reg); + /* Set up MSI-X */ + if (sc->sc_nintrs > 1) { + uint32_t ivar; + + CSR_WRITE(sc, WMREG_GPIE, WMREG_GPIE_NSICR | WMREG_GPIE_MSIX_MODE | + WMREG_GPIE_EIAME | WMREG_GPIE_PBA); + + /* TX */ + ivar = CSR_READ(sc, WMREG_IVAR0); + ivar &= 0xFFFF00FF; + ivar |= (WM_TX_INTR_INDEX | WMREG_IVAR_VALID) << 8; + CSR_WRITE(sc, WMREG_IVAR0, ivar); + + /* RX */ + ivar = CSR_READ(sc, WMREG_IVAR0); + ivar &= 0xFFFFFF00; + ivar |= WM_RX_INTR_INDEX | WMREG_IVAR_VALID; + CSR_WRITE(sc, WMREG_IVAR0, ivar); + + /* LINK */ + ivar = (WM_LINK_INTR_INDEX | WMREG_IVAR_VALID) << 8; + CSR_WRITE(sc, WMREG_IVAR_MISC, ivar); + } + /* Set up the interrupt registers. */ CSR_WRITE(sc, WMREG_IMC, 0xffffffffU); sc->sc_icr = ICR_TXDW | ICR_LSC | ICR_RXSEQ | ICR_RXDMT0 | ICR_RXO | ICR_RXT0; - CSR_WRITE(sc, WMREG_IMS, sc->sc_icr); + if (sc->sc_nintrs > 1) { + uint32_t mask = (1 << WM_RX_INTR_INDEX) | (1 << WM_TX_INTR_INDEX) | + (1 << WM_LINK_INTR_INDEX); + CSR_WRITE(sc, WMREG_EIAC, mask); + CSR_WRITE(sc, WMREG_EIAM, mask); + CSR_WRITE(sc, WMREG_EIMS, mask); + CSR_WRITE(sc, WMREG_IMS, ICR_LSC); + } else { + CSR_WRITE(sc, WMREG_IMS, sc->sc_icr); + } if ((sc->sc_type == WM_T_ICH8) || (sc->sc_type == WM_T_ICH9) || (sc->sc_type == WM_T_ICH10) || (sc->sc_type == WM_T_PCH) @@ -4429,6 +4659,10 @@ wm_stop_locked(struct ifnet *ifp, int disable) */ CSR_WRITE(sc, WMREG_IMC, 0xffffffffU); sc->sc_icr = 0; + if (sc->sc_nintrs > 1) { + CSR_WRITE(sc, WMREG_EIMC, 0xffffffffU); + CSR_WRITE(sc, WMREG_EIAC, 0); + } /* Release any queued transmit buffers. */ for (i = 0; i < WM_TXQUEUELEN(sc); i++) { @@ -6101,6 +6335,145 @@ wm_intr(void *arg) return handled; } +#ifdef WM_MSI_MSIX +/* + * wm_tx_intr: + * + * Interrupt service routine. + */ +static int +wm_tx_intr(void *arg) +{ + struct wm_softc *sc = arg; + struct ifnet *ifp = &sc->sc_ethercom.ec_if; + uint32_t icr; + int handled = 0; + + CSR_WRITE(sc, WMREG_EIMC, 1 << WM_TX_INTR_INDEX); + + WM_TX_LOCK(sc); + + if (sc->sc_stopping) + goto out; + + while (1 /* CONSTCOND */) { + icr = CSR_READ(sc, WMREG_ICR); /* XXXX no need? */ + if ((icr & sc->sc_icr) == 0) + break; + rnd_add_uint32(&sc->rnd_source, icr); + + handled = 1; + + +#if defined(WM_DEBUG) || defined(WM_EVENT_COUNTERS) + if (icr & ICR_TXDW) { + DPRINTF(WM_DEBUG_TX, + ("%s: TX: got TXDW interrupt\n", + device_xname(sc->sc_dev))); + WM_EVCNT_INCR(&sc->sc_ev_txdw); + } +#endif + + wm_txintr(sc); + } +out: + WM_TX_UNLOCK(sc); + + CSR_WRITE(sc, WMREG_EIMS, 1 << WM_TX_INTR_INDEX); + + if (handled) { + /* Try to get more packets going. */ + ifp->if_start(ifp); + } + + return handled; +} + +/* + * wm_rx_intr: + * + * Interrupt service routine. + */ +static int +wm_rx_intr(void *arg) +{ + struct wm_softc *sc = arg; + uint32_t icr; + + CSR_WRITE(sc, WMREG_EIMC, 1 << WM_RX_INTR_INDEX); + WM_RX_LOCK(sc); + + if (sc->sc_stopping) + goto out; + + while (1 /* CONSTCOND */) { + icr = CSR_READ(sc, WMREG_ICR); + if ((icr & sc->sc_icr) == 0) + break; + rnd_add_uint32(&sc->rnd_source, icr); + +#if defined(WM_DEBUG) || defined(WM_EVENT_COUNTERS) + if (icr & (ICR_RXDMT0|ICR_RXT0)) { + DPRINTF(WM_DEBUG_RX, + ("%s: RX: got Rx intr 0x%08x\n", + device_xname(sc->sc_dev), + icr & (ICR_RXDMT0|ICR_RXT0))); + WM_EVCNT_INCR(&sc->sc_ev_rxintr); + } +#endif + wm_rxintr(sc); + + if (icr & ICR_RXO) { +#if defined(WM_DEBUG) + log(LOG_WARNING, "%s: Receive overrun\n", + device_xname(sc->sc_dev)); +#endif /* defined(WM_DEBUG) */ + } + } +out: + WM_RX_UNLOCK(sc); + + CSR_WRITE(sc, WMREG_EIMS, 1 << WM_RX_INTR_INDEX); + + return 1; +} + +/* + * wm_link_intr: + * + * Interrupt service routine. + */ +static int +wm_link_intr(void *arg) +{ + struct wm_softc *sc = arg; + uint32_t icr; + + CSR_WRITE(sc, WMREG_EIMC, 1 << WM_LINK_INTR_INDEX); + WM_TX_LOCK(sc); + if (sc->sc_stopping) + goto out; + + while (1 /* CONSTCOND */) { + icr = CSR_READ(sc, WMREG_ICR); + if ((icr & sc->sc_icr) == 0) + break; + rnd_add_uint32(&sc->rnd_source, icr); + + if (icr & (ICR_LSC|ICR_RXSEQ)) { + WM_EVCNT_INCR(&sc->sc_ev_linkintr); + wm_linkintr(sc, icr); + } + } + +out: + WM_TX_UNLOCK(sc); + CSR_WRITE(sc, WMREG_EIMS, 1 << WM_LINK_INTR_INDEX); + + return 1; +} +#endif /* WM_MSI_MSIX */ + /* * Media related. * GMII, SGMII, TBI (and SERDES) diff --git a/sys/dev/pci/if_wmreg.h b/sys/dev/pci/if_wmreg.h index 14169a7..5a245ce 100644 --- a/sys/dev/pci/if_wmreg.h +++ b/sys/dev/pci/if_wmreg.h @@ -438,6 +438,18 @@ struct livengood_tcpip_ctxdesc { #define WMREG_ICS 0x00c8 /* Interrupt Cause Set Register */ /* See ICR bits. */ +#define WMREG_IVAL 0x00e4 /* Interrupt Vector Allocation Register */ +#define WMREG_IVAR0 0x01700 /* Interrupt Vector Allocation */ +#define WMREG_IVAR_MISC 0x01740 /* IVAR for other causes */ + +#define WMREG_GPIE 0x01514 /* General Purpose Interrupt Enable */ +#define WMREG_GPIE_NSICR 0x00000001 +#define WMREG_GPIE_MSIX_MODE 0x00000010 +#define WMREG_GPIE_EIAME 0x40000000 +#define WMREG_GPIE_PBA 0x80000000 + +#define WMREG_IVAR_VALID 0x80 + #define WMREG_IMS 0x00d0 /* Interrupt Mask Set Register */ /* See ICR bits. */