commit dfb9cadeeb9bf18afb01ad829cc3575ef8325068
Author: Ryota Ozaki <ozaki-r@iij.ad.jp>
Date:   Thu Jan 14 11:25:25 2016 +0900

    Implement softint-based if_input

diff --git a/sys/dev/pci/if_iwm.c b/sys/dev/pci/if_iwm.c
index 12c60bb..ecde05c 100644
--- a/sys/dev/pci/if_iwm.c
+++ b/sys/dev/pci/if_iwm.c
@@ -6825,7 +6825,7 @@ iwm_attach(device_t parent, device_t self, void *aux)
 	IFQ_SET_READY(&ifp->if_snd);
 	memcpy(ifp->if_xname, DEVNAME(sc), IFNAMSIZ);
 
-	if_initialize(ifp);
+	if_initialize(ifp, 0);
 #if 0
 	ieee80211_ifattach(ic);
 #else
diff --git a/sys/dev/pci/if_rtwn.c b/sys/dev/pci/if_rtwn.c
index 99c7d46..fe60c68 100644
--- a/sys/dev/pci/if_rtwn.c
+++ b/sys/dev/pci/if_rtwn.c
@@ -355,7 +355,7 @@ rtwn_attach(device_t parent, device_t self, void *aux)
 	IFQ_SET_READY(&ifp->if_snd);
 	memcpy(ifp->if_xname, device_xname(sc->sc_dev), IFNAMSIZ);
 
-	if_initialize(ifp);
+	if_initialize(ifp, 0);
 	ieee80211_ifattach(ic);
 	if_register(ifp);
 
diff --git a/sys/dev/pci/if_wm.c b/sys/dev/pci/if_wm.c
index 4124859..6a19ffe 100644
--- a/sys/dev/pci/if_wm.c
+++ b/sys/dev/pci/if_wm.c
@@ -7121,7 +7121,7 @@ wm_rxeof(struct wm_rxqueue *rxq)
 		bpf_mtap(ifp, m);
 
 		/* Pass it on. */
-		(*ifp->if_input)(ifp, m);
+		if_input(ifp, m);
 
 		WM_RX_LOCK(rxq);
 
diff --git a/sys/net/if.c b/sys/net/if.c
index 56e3d98..63aaf24 100644
--- a/sys/net/if.c
+++ b/sys/net/if.c
@@ -198,6 +198,7 @@ static void if_attachdomain1(struct ifnet *);
 static int ifconf(u_long, void *);
 static int if_clone_create(const char *);
 static int if_clone_destroy(const char *);
+static void if_input_pktq(void *);
 
 #if defined(INET) || defined(INET6)
 static void sysctl_net_pktq_setup(struct sysctllog **, int);
@@ -571,12 +572,12 @@ skip:
  * (e.g., ether_ifattach and ieee80211_ifattach) or if_alloc_sadl,
  * and be followed by if_register:
  *
- *     if_initialize(ifp);
+ *     if_initialize(ifp, 0);
  *     ether_ifattach(ifp, enaddr);
  *     if_register(ifp);
  */
 void
-if_initialize(ifnet_t *ifp)
+if_initialize(ifnet_t *ifp, int flags)
 {
 	KASSERT(if_indexlim > 0);
 	TAILQ_INIT(&ifp->if_addrlist);
@@ -618,6 +619,11 @@ if_initialize(ifnet_t *ifp)
 	IF_AFDATA_LOCK_INIT(ifp);
 
 	if_getindex(ifp);
+
+	if ((flags & IF_INPUTF_NO_SOFTINT) == 0) {
+		ifp->if_input_pktq = pktq_create(IFQ_MAXLEN, if_input_pktq,
+		    ifp, PKTQ_F_NO_DISTRIBUTION);
+	}
 }
 
 /*
@@ -655,7 +661,7 @@ if_register(ifnet_t *ifp)
 void
 if_attach(ifnet_t *ifp)
 {
-	if_initialize(ifp);
+	if_initialize(ifp, 0);
 	if_register(ifp);
 }
 
@@ -925,6 +931,11 @@ again:
 	xc = xc_broadcast(0, (xcfunc_t)nullop, NULL, NULL);
 	xc_wait(xc);
 
+	if (ifp->if_input_pktq != NULL) {
+		pktq_flush(ifp->if_input_pktq);
+		pktq_destroy(ifp->if_input_pktq);
+	}
+
 	splx(s);
 }
 
@@ -2442,6 +2453,28 @@ if_mcast_op(ifnet_t *ifp, const unsigned long cmd, const struct sockaddr *sa)
 }
 
 static void
+if_input_pktq(void *arg)
+{
+	struct ifnet *ifp = arg;
+	struct mbuf *m;
+
+	while ((m = pktq_dequeue(ifp->if_input_pktq)) != NULL) {
+		ifp->if_input(ifp, m);
+	}
+}
+
+void
+if_input(struct ifnet *ifp, struct mbuf *m)
+{
+
+	if (ifp->if_input_pktq) {
+		pktq_enqueue_curcpu(ifp->if_input_pktq, m);
+	} else {
+		ifp->if_input(ifp, m);
+	}
+}
+
+static void
 sysctl_sndq_setup(struct sysctllog **clog, const char *ifname,
     struct ifaltq *ifq)
 {
diff --git a/sys/net/if.h b/sys/net/if.h
index f8051e4..c4c9046 100644
--- a/sys/net/if.h
+++ b/sys/net/if.h
@@ -351,12 +351,13 @@ typedef struct ifnet {
 	struct ifnet_lock *if_ioctl_lock;
 #ifdef _KERNEL /* XXX kvm(3) */
 	struct callout *if_slowtimo_ch;
-#endif
 #ifdef GATEWAY
 	struct kmutex	*if_afdata_lock;
 #else
 	struct krwlock	*if_afdata_lock;
 #endif
+	pktqueue_t	*if_input_pktq;
+#endif
 } ifnet_t;
  
 #define	if_mtu		if_data.ifi_mtu
@@ -924,7 +925,8 @@ void if_activate_sadl(struct ifnet *, struct ifaddr *,
     const struct sockaddr_dl *);
 void	if_set_sadl(struct ifnet *, const void *, u_char, bool);
 void	if_alloc_sadl(struct ifnet *);
-void	if_initialize(struct ifnet *);
+void	if_initialize(struct ifnet *, int);
+#define	IF_INPUTF_NO_SOFTINT	1	/* Don't use softint in if_input */
 void	if_register(struct ifnet *);
 void	if_attach(struct ifnet *); /* Deprecated. Use if_initialize and if_register */
 void	if_attachdomain(void);
@@ -946,6 +948,7 @@ int	if_do_dad(struct ifnet *);
 int	if_mcast_op(ifnet_t *, const unsigned long, const struct sockaddr *);
 int	if_flags_set(struct ifnet *, const short);
 int	if_clone_list(int, char *, int *);
+void	if_input(struct ifnet *, struct mbuf *);
 
 void ifa_insert(struct ifnet *, struct ifaddr *);
 void ifa_remove(struct ifnet *, struct ifaddr *);
diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c
index f51edd3..2f3aeff 100644
--- a/sys/net/if_bridge.c
+++ b/sys/net/if_bridge.c
@@ -448,7 +448,7 @@ bridge_clone_create(struct if_clone *ifc, int unit)
 	ifp->if_dlt = DLT_EN10MB;
 	ifp->if_hdrlen = ETHER_HDR_LEN;
 
-	sc->sc_fwd_pktq = pktq_create(IFQ_MAXLEN, bridge_forward, sc);
+	sc->sc_fwd_pktq = pktq_create(IFQ_MAXLEN, bridge_forward, sc, 0);
 	KASSERT(sc->sc_fwd_pktq != NULL);
 
 	bridge_sysctl_fwdq_setup(&ifp->if_sysctl_log, sc);
diff --git a/sys/net/if_tap.c b/sys/net/if_tap.c
index c7a6aa9..f0168e1 100644
--- a/sys/net/if_tap.c
+++ b/sys/net/if_tap.c
@@ -342,7 +342,7 @@ tap_attach(device_t parent, device_t self, void *aux)
 	sc->sc_ec.ec_capabilities = ETHERCAP_VLAN_MTU | ETHERCAP_JUMBO_MTU;
 
 	/* Those steps are mandatory for an Ethernet driver. */
-	if_initialize(ifp);
+	if_initialize(ifp, IF_INPUTF_NO_SOFTINT);
 	ether_ifattach(ifp, enaddr);
 	if_register(ifp);
 
@@ -1061,7 +1061,7 @@ tap_dev_write(int unit, struct uio *uio, int flags)
 
 	bpf_mtap(ifp, m);
 	s = splnet();
-	(*ifp->if_input)(ifp, m);
+	if_input(ifp, m);
 	splx(s);
 
 	return (0);
diff --git a/sys/net/pktqueue.c b/sys/net/pktqueue.c
index cf3f96d..9f9d784 100644
--- a/sys/net/pktqueue.c
+++ b/sys/net/pktqueue.c
@@ -55,9 +55,11 @@ __KERNEL_RCSID(0, "$NetBSD: pktqueue.c,v 1.8 2014/07/04 01:50:22 ozaki-r Exp $")
  * WARNING: update this if struct pktqueue changes.
  */
 #define	PKTQ_CLPAD	\
-    MAX(COHERENCY_UNIT, COHERENCY_UNIT - sizeof(kmutex_t) - sizeof(u_int))
+    MAX(COHERENCY_UNIT, COHERENCY_UNIT - sizeof(int) - sizeof(kmutex_t) \
+        - sizeof(u_int))
 
 struct pktqueue {
+	int		pq_flags;
 	/*
 	 * The lock used for a barrier mechanism.  The barrier counter,
 	 * as well as the drop counter, are managed atomically though.
@@ -96,14 +98,17 @@ typedef struct {
     roundup2(offsetof(pktqueue_t, pq_queue[ncpu]), coherency_unit)
 
 pktqueue_t *
-pktq_create(size_t maxlen, void (*intrh)(void *), void *sc)
+pktq_create(size_t maxlen, void (*intrh)(void *), void *sc, int flags)
 {
-	const u_int sflags = SOFTINT_NET | SOFTINT_MPSAFE | SOFTINT_RCPU;
+	u_int sflags = SOFTINT_NET | SOFTINT_MPSAFE;
 	const size_t len = PKTQUEUE_STRUCT_LEN(ncpu);
 	pktqueue_t *pq;
 	percpu_t *pc;
 	void *sih;
 
+	if ((flags & PKTQ_F_NO_DISTRIBUTION) == 0)
+		sflags |= SOFTINT_RCPU;
+
 	if ((pc = percpu_alloc(sizeof(pktq_counters_t))) == NULL) {
 		return NULL;
 	}
@@ -120,6 +125,7 @@ pktq_create(size_t maxlen, void (*intrh)(void *), void *sc)
 	pq->pq_maxlen = maxlen;
 	pq->pq_counters = pc;
 	pq->pq_sih = sih;
+	pq->pq_flags = flags;
 
 	return pq;
 }
@@ -220,7 +226,10 @@ pktq_enqueue(pktqueue_t *pq, struct mbuf *m, const u_int hash __unused)
 		pktq_inc_count(pq, PQCNT_DROP);
 		return false;
 	}
-	softint_schedule_cpu(pq->pq_sih, cpu_lookup(cpuid));
+	if (pq->pq_flags & PKTQ_F_NO_DISTRIBUTION)
+		softint_schedule(pq->pq_sih);
+	else
+		softint_schedule_cpu(pq->pq_sih, cpu_lookup(cpuid));
 	pktq_inc_count(pq, PQCNT_ENQUEUE);
 	return true;
 }
diff --git a/sys/net/pktqueue.h b/sys/net/pktqueue.h
index c50d8c2..f37309e 100644
--- a/sys/net/pktqueue.h
+++ b/sys/net/pktqueue.h
@@ -37,6 +37,7 @@
 #endif
 
 #include <sys/sysctl.h>
+#include <sys/cpu.h>
 
 struct mbuf;
 
@@ -44,7 +45,8 @@ typedef struct pktqueue pktqueue_t;
 
 typedef enum { PKTQ_MAXLEN, PKTQ_NITEMS, PKTQ_DROPS } pktq_count_t;
 
-pktqueue_t *	pktq_create(size_t, void (*)(void *), void *);
+pktqueue_t *	pktq_create(size_t, void (*)(void *), void *, int);
+#define		PKTQ_F_NO_DISTRIBUTION	1
 void		pktq_destroy(pktqueue_t *);
 
 bool		pktq_enqueue(pktqueue_t *, struct mbuf *, const u_int);
@@ -53,6 +55,13 @@ void		pktq_barrier(pktqueue_t *);
 void		pktq_flush(pktqueue_t *);
 int		pktq_set_maxlen(pktqueue_t *, size_t);
 
+static inline bool
+pktq_enqueue_curcpu(pktqueue_t *pq, struct mbuf *m)
+{
+
+	return pktq_enqueue(pq, m, curcpu()->ci_index);
+}
+
 uint32_t	pktq_rps_hash(const struct mbuf *);
 uint64_t	pktq_get_count(pktqueue_t *, pktq_count_t);
 
diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c
index dda6ef2..c77d00e 100644
--- a/sys/netinet/ip_input.c
+++ b/sys/netinet/ip_input.c
@@ -308,7 +308,7 @@ ip_init(void)
 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
 	KASSERT(pr != NULL);
 
-	ip_pktq = pktq_create(IFQ_MAXLEN, ipintr, NULL);
+	ip_pktq = pktq_create(IFQ_MAXLEN, ipintr, NULL, 0);
 	KASSERT(ip_pktq != NULL);
 
 	for (u_int i = 0; i < IPPROTO_MAX; i++) {
diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c
index a427e50..f55d034 100644
--- a/sys/netinet6/ip6_input.c
+++ b/sys/netinet6/ip6_input.c
@@ -177,7 +177,7 @@ ip6_init(void)
 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
 			ip6_protox[pr->pr_protocol] = pr - inet6sw;
 
-	ip6_pktq = pktq_create(IFQ_MAXLEN, ip6intr, NULL);
+	ip6_pktq = pktq_create(IFQ_MAXLEN, ip6intr, NULL, 0);
 	KASSERT(ip6_pktq != NULL);
 
 	scope6_init();