diff --git a/doc/TODO.smpnet b/doc/TODO.smpnet new file mode 100644 index 0000000..f6090b0 --- /dev/null +++ b/doc/TODO.smpnet @@ -0,0 +1,28 @@ +$NetBSD$ + +Non MP-safe components +====================== + + - bpf + - To be listed more... + +bpf +=== + +MP-ification of bpf requires all of bpf_mtap* are called in normal LWP context +or softint context, i.e., not in hardware interrupt context. For Tx, all +bpf_mtap satisfy the requrement. For Rx, most of bpf_mtap are called in softint. +Unfortunately some bpf_mtap on Rx are still called in hardware interrupt context. + +This is the list of the functions that have such bpf_mtap: + + - sca_frame_process() @ sys/dev/ic/hd64570.c + - en_intr() @ sys/dev/ic/midway.c + - rxintr_cleanup() and txintr_cleanup() @ sys/dev/pci/if_lmc.c + - ipr_rx_data_rdy() @ sys/netisdn/i4b_ipr.c + +Ideally we should make the functions run in softint somehow, but we don't have +actual devices, no time (or interest/love) to work on the task, so instead we +provide a deferred bpf_mtap mechanism that forcibly runs bpf_mtap in softint +context. It's a workaround and once the functions run in softint, we should use +the original bpf_mtap again. diff --git a/sys/dev/ic/hd64570.c b/sys/dev/ic/hd64570.c index 86e7465..b4eedc4 100644 --- a/sys/dev/ic/hd64570.c +++ b/sys/dev/ic/hd64570.c @@ -457,6 +457,7 @@ sca_port_attach(struct sca_softc *sc, u_int port) if_attach(ifp); if_alloc_sadl(ifp); bpf_attach(ifp, DLT_HDLC, HDLC_HDRLEN); + bpf_mtap_softint_init(ifp); if (sc->sc_parent == NULL) printf("%s: port %d\n", ifp->if_xname, port); @@ -1574,7 +1575,7 @@ sca_frame_process(sca_port_t *scp) return; } - bpf_mtap(&scp->sp_if, m); /* XXX not in softint */ + bpf_mtap_softint(&scp->sp_if, m); scp->sp_if.if_ipackets++; diff --git a/sys/dev/ic/midway.c b/sys/dev/ic/midway.c index d37ec1e..719ff33 100644 --- a/sys/dev/ic/midway.c +++ b/sys/dev/ic/midway.c @@ -2766,7 +2766,7 @@ EN_INTR_TYPE en_intr(void *arg) ifp->if_ipackets++; #endif - bpf_mtap(ifp, m); /* XXX not in softint */ + bpf_mtap_softint(ifp, m); atm_input(ifp, &ah, m, sc->rxslot[slot].rxhand); } @@ -3623,6 +3623,7 @@ en_pvcattach(struct ifnet *ifp) LIST_INSERT_HEAD(&sc->sif_list, (struct pvcsif *)pvc_ifp, sif_links); if_attach(pvc_ifp); atm_ifattach(pvc_ifp); + bpf_mtap_softint_init(pvc_ifp); #ifdef ATM_PVCEXT rrp_add(sc, pvc_ifp); diff --git a/sys/dev/pci/if_lmc.c b/sys/dev/pci/if_lmc.c index 039f6b9..e2f0d1f 100644 --- a/sys/dev/pci/if_lmc.c +++ b/sys/dev/pci/if_lmc.c @@ -4294,7 +4294,7 @@ rxintr_cleanup(softc_t *sc) sc->status.cntrs.ipackets++; /* Berkeley Packet Filter */ - LMC_BPF_MTAP(sc, first_mbuf); /* XXX not in softint */ + LMC_BPF_MTAP(sc, first_mbuf); /* Give this good packet to the network stacks. */ sc->quota--; @@ -4446,7 +4446,7 @@ txintr_cleanup(softc_t *sc) sc->status.cntrs.opackets++; /* Berkeley Packet Filter */ - LMC_BPF_MTAP(sc, m); /* XXX not in softint */ + LMC_BPF_MTAP(sc, m); } m_freem(m); diff --git a/sys/dev/pci/if_lmc.h b/sys/dev/pci/if_lmc.h index 8261dba..62432c1 100644 --- a/sys/dev/pci/if_lmc.h +++ b/sys/dev/pci/if_lmc.h @@ -984,8 +984,12 @@ typedef int intr_return_t; # define SLEEP(usecs) tsleep(sc, PZERO, DEVICE_NAME, 1+(usecs/tick)) # define DMA_SYNC(map, size, flags) bus_dmamap_sync(ring->tag, map, 0, size, flags) # define DMA_LOAD(map, addr, size) bus_dmamap_load(ring->tag, map, addr, size, 0, BUS_DMA_NOWAIT) -# define LMC_BPF_MTAP(sc, mbuf) bpf_mtap((sc)->ifp, mbuf) -# define LMC_BPF_ATTACH(sc, dlt, len) bpf_attach((sc)->ifp, dlt, len) +# define LMC_BPF_MTAP(sc, mbuf) bpf_mtap_softint((sc)->ifp, mbuf) +# define LMC_BPF_ATTACH(sc, dlt, len) \ + do { \ + bpf_attach((sc)->ifp, dlt, len); \ + bpf_mtap_softint_init((sc)->ifp); \ + } while (0) # define LMC_BPF_DETACH(sc) bpf_detach((sc)->ifp) static int driver_announced = 0; /* print driver info once only */ diff --git a/sys/net/bpf.c b/sys/net/bpf.c index 74dc59e..4ade664 100644 --- a/sys/net/bpf.c +++ b/sys/net/bpf.c @@ -45,6 +45,7 @@ __KERNEL_RCSID(0, "$NetBSD: bpf.c,v 1.203 2016/07/19 02:47:45 pgoyette Exp $"); #include "opt_bpf.h" #include "sl.h" #include "strip.h" +#include "opt_net_mpsafe.h" #endif #include @@ -60,6 +61,7 @@ __KERNEL_RCSID(0, "$NetBSD: bpf.c,v 1.203 2016/07/19 02:47:45 pgoyette Exp $"); #include #include #include +#include #include #include @@ -73,6 +75,7 @@ __KERNEL_RCSID(0, "$NetBSD: bpf.c,v 1.203 2016/07/19 02:47:45 pgoyette Exp $"); #include #include #include +#include #include #include @@ -1593,6 +1596,92 @@ _bpf_mtap_sl_out(struct bpf_if *bp, u_char *chdr, struct mbuf *m) m_freem(m); } +static struct mbuf * +bpf_mbuf_enqueue(struct bpf_if *bp, struct mbuf *m) +{ + struct mbuf *dup; + + dup = m_dup(m, 0, M_COPYALL, M_NOWAIT); + if (dup == NULL) + return NULL; + + if (bp->bif_mbuf_tail != NULL) { + bp->bif_mbuf_tail->m_nextpkt = dup; + } else { + bp->bif_mbuf_head = dup; + } + bp->bif_mbuf_tail = dup; +#ifdef BPF_MTAP_SOFTINT_DEBUG + log(LOG_DEBUG, "%s: enqueued mbuf=%p to %s\n", + __func__, dup, bp->bif_ifp->if_xname); +#endif + + return dup; +} + +static struct mbuf * +bpf_mbuf_dequeue(struct bpf_if *bp) +{ + struct mbuf *m; + int s; + + s = splnet(); + m = bp->bif_mbuf_head; + if (m != NULL) { + bp->bif_mbuf_head = m->m_nextpkt; + m->m_nextpkt = NULL; + + if (bp->bif_mbuf_head == NULL) + bp->bif_mbuf_tail = NULL; +#ifdef BPF_MTAP_SOFTINT_DEBUG + log(LOG_DEBUG, "%s: dequeued mbuf=%p from %s\n", + __func__, m, bp->bif_ifp->if_xname); +#endif + } + splx(s); + + return m; +} + +static void +bpf_mtap_si(void *arg) +{ + struct bpf_if *bp = arg; + struct mbuf *m; + + while ((m = bpf_mbuf_dequeue(bp)) != NULL) { +#ifdef BPF_MTAP_SOFTINT_DEBUG + log(LOG_DEBUG, "%s: tapping mbuf=%p on %s\n", + __func__, m, bp->bif_ifp->if_xname); +#endif +#ifndef NET_MPSAFE + KERNEL_LOCK(1, NULL); +#endif + bpf_ops->bpf_mtap(bp, m); +#ifndef NET_MPSAFE + KERNEL_UNLOCK_ONE(NULL); +#endif + m_freem(m); + } +} + +void +bpf_mtap_softint(struct ifnet *ifp, struct mbuf *m) +{ + struct bpf_if *bp = ifp->if_bpf; + struct mbuf *dup; + + KASSERT(cpu_intr_p()); + + if (bp == NULL || bp->bif_dlist == NULL) + return; + KASSERT(bp->bif_si != NULL); + + dup = bpf_mbuf_enqueue(bp, m); + if (dup != NULL) + softint_schedule(bp->bif_si); +} + static int bpf_hdrlen(struct bpf_d *d) { @@ -1789,6 +1878,7 @@ _bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp) bp->bif_driverp = driverp; bp->bif_ifp = ifp; bp->bif_dlt = dlt; + bp->bif_si = NULL; bp->bif_next = bpf_iflist; bpf_iflist = bp; @@ -1802,6 +1892,29 @@ _bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp) #endif } +void +bpf_mtap_softint_init(struct ifnet *ifp) +{ + struct bpf_if *bp; + + mutex_enter(&bpf_mtx); + for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { + if (bp->bif_ifp != ifp) + continue; + + bp->bif_mbuf_head = NULL; + bp->bif_mbuf_tail = NULL; + bp->bif_si = softint_establish(SOFTINT_NET, bpf_mtap_si, bp); + if (bp->bif_si == NULL) + panic("%s: softint_establish() failed", __func__); + break; + } + mutex_exit(&bpf_mtx); + + if (bp == NULL) + panic("%s: no bpf_if found for %s", __func__, ifp->if_xname); +} + /* * Remove an interface from bpf. */ @@ -1832,6 +1945,16 @@ _bpfdetach(struct ifnet *ifp) bp != NULL; pbp = &bp->bif_next, bp = bp->bif_next) { if (bp->bif_ifp == ifp) { *pbp = bp->bif_next; + if (bp->bif_si != NULL) { + s = splnet(); + while (bp->bif_mbuf_head != NULL) { + struct mbuf *m = bp->bif_mbuf_head; + bp->bif_mbuf_head = m->m_nextpkt; + m_freem(m); + } + splx(s); + softint_disestablish(bp->bif_si); + } free(bp, M_DEVBUF); goto again; } diff --git a/sys/net/bpf.h b/sys/net/bpf.h index b942e54..c097b02 100644 --- a/sys/net/bpf.h +++ b/sys/net/bpf.h @@ -517,6 +517,9 @@ int bpf_validate_ext(const bpf_ctx_t *, const struct bpf_insn *, int); bpfjit_func_t bpf_jit_generate(bpf_ctx_t *, void *, size_t); void bpf_jit_freecode(bpfjit_func_t); +void bpf_mtap_softint_init(struct ifnet *); +void bpf_mtap_softint(struct ifnet *, struct mbuf *); + #endif int bpf_validate(const struct bpf_insn *, int); diff --git a/sys/net/bpfdesc.h b/sys/net/bpfdesc.h index 4cf0ab7..fcbb91c 100644 --- a/sys/net/bpfdesc.h +++ b/sys/net/bpfdesc.h @@ -139,6 +139,9 @@ struct bpf_if { u_int bif_dlt; /* link layer type */ u_int bif_hdrlen; /* length of header (with padding) */ struct ifnet *bif_ifp; /* corresponding interface */ + void *bif_si; + struct mbuf *bif_mbuf_head; + struct mbuf *bif_mbuf_tail; }; #ifdef _KERNEL diff --git a/sys/netisdn/i4b_ipr.c b/sys/netisdn/i4b_ipr.c index 6f04d07..46de02d 100644 --- a/sys/netisdn/i4b_ipr.c +++ b/sys/netisdn/i4b_ipr.c @@ -406,6 +406,7 @@ iripattach(void) bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int)); #else bpf_attach(&sc->sc_if, DLT_NULL, sizeof(u_int)); + bpf_mtap_softint_init(&sc->sc_if); #endif #endif } @@ -1070,7 +1071,7 @@ error: mm.m_len = 4; mm.m_data = (char *)⁡ - bpf_mtap(&sc->sc_if, &mm); /* XXX not in softint */ + bpf_mtap_softint(&sc->sc_if, &mm); } #endif /* NBPFILTER > 0 || NBPF > 0 */