diff --git a/sys/net/files.net b/sys/net/files.net index 44440f4..67653e4 100644 --- a/sys/net/files.net +++ b/sys/net/files.net @@ -62,6 +62,7 @@ file netinet/ip_carp.c carp & (inet | inet6) needs-flag file netinet/ip_ecn.c ipsec | gif | stf file netinet/ip_encap.c inet | inet6 file netinet/ip_etherip.c etherip & inet +file netinet/wqinput.c inet | inet6 file netinet6/ip6_etherip.c etherip & inet6 file netinet6/in6_gif.c gif & inet6 diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c index 887d849..581b83f 100644 --- a/sys/netinet/ip_carp.c +++ b/sys/netinet/ip_carp.c @@ -70,6 +70,7 @@ __KERNEL_RCSID(0, "$NetBSD: ip_carp.c,v 1.83 2017/01/16 15:44:47 christos Exp $" #include #include #include +#include #if NFDDI > 0 #include @@ -234,6 +235,14 @@ static void carp_ether_purgemulti(struct carp_softc *); static void sysctl_net_inet_carp_setup(struct sysctllog **); +/* workqueue-based pr_input */ +static struct wqinput *carp_wqinput; +static void _carp_proto_input(struct mbuf *, int, int); +#ifdef INET6 +static struct wqinput *carp6_wqinput; +static void _carp6_proto_input(struct mbuf *, int, int); +#endif + struct if_clone carp_cloner = IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy); @@ -468,19 +477,15 @@ carp_setroute(struct carp_softc *sc, int cmd) * we have rearranged checks order compared to the rfc, * but it seems more efficient this way or not possible otherwise. */ -void -carp_proto_input(struct mbuf *m, ...) +static void +_carp_proto_input(struct mbuf *m, int hlen, int proto) { struct ip *ip = mtod(m, struct ip *); struct carp_softc *sc = NULL; struct carp_header *ch; int iplen, len; - va_list ap; struct ifnet *rcvif; - va_start(ap, m); - va_end(ap); - CARP_STATINC(CARP_STAT_IPACKETS); MCLAIM(m, &carp_proto_mowner_rx); @@ -542,11 +547,17 @@ carp_proto_input(struct mbuf *m, ...) carp_proto_input_c(m, ch, AF_INET); } +void +carp_proto_input(struct mbuf *m, ...) +{ + + wqinput_input(carp_wqinput, m, 0, 0); +} + #ifdef INET6 -int -carp6_proto_input(struct mbuf **mp, int *offp, int proto) +static void +_carp6_proto_input(struct mbuf *m, int off, int proto) { - struct mbuf *m = *mp; struct carp_softc *sc = NULL; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct carp_header *ch; @@ -558,7 +569,7 @@ carp6_proto_input(struct mbuf **mp, int *offp, int proto) if (!carp_opts[CARPCTL_ALLOW]) { m_freem(m); - return (IPPROTO_DONE); + return; } rcvif = m_get_rcvif_NOMPSAFE(m); @@ -569,7 +580,7 @@ carp6_proto_input(struct mbuf **mp, int *offp, int proto) CARP_LOG(sc, ("packet received on non-carp interface: %s", rcvif->if_xname)); m_freem(m); - return (IPPROTO_DONE); + return; } /* verify that the IP TTL is 255 */ @@ -578,31 +589,40 @@ carp6_proto_input(struct mbuf **mp, int *offp, int proto) CARP_LOG(sc, ("received ttl %d != %d on %s", ip6->ip6_hlim, CARP_DFLTTL, rcvif->if_xname)); m_freem(m); - return (IPPROTO_DONE); + return; } /* verify that we have a complete carp packet */ len = m->m_len; - IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch)); + IP6_EXTHDR_GET(ch, struct carp_header *, m, off, sizeof(*ch)); if (ch == NULL) { CARP_STATINC(CARP_STAT_BADLEN); CARP_LOG(sc, ("packet size %u too small", len)); - return (IPPROTO_DONE); + return; } /* verify the CARP checksum */ - m->m_data += *offp; + m->m_data += off; if (carp_cksum(m, sizeof(*ch))) { CARP_STATINC(CARP_STAT_BADSUM); CARP_LOG(sc, ("checksum failed, on %s", rcvif->if_xname)); m_freem(m); - return (IPPROTO_DONE); + return; } - m->m_data -= *offp; + m->m_data -= off; carp_proto_input_c(m, ch, AF_INET6); - return (IPPROTO_DONE); + return; +} + +int +carp6_proto_input(struct mbuf **mp, int *offp, int proto) +{ + + wqinput_input(carp6_wqinput, *mp, *offp, proto); + + return IPPROTO_DONE; } #endif /* INET6 */ @@ -2342,6 +2362,11 @@ carp_init(void) MOWNER_ATTACH(&carp_proto6_mowner_rx); MOWNER_ATTACH(&carp_proto6_mowner_tx); #endif + + carp_wqinput = wqinput_create("carp_wqinput", _carp_proto_input); +#ifdef INET6 + carp6_wqinput = wqinput_create("carp6_wqinput", _carp6_proto_input); +#endif } static void diff --git a/sys/netinet/ip_icmp.c b/sys/netinet/ip_icmp.c index f7d0cb0..f4801bc 100644 --- a/sys/netinet/ip_icmp.c +++ b/sys/netinet/ip_icmp.c @@ -125,6 +125,7 @@ __KERNEL_RCSID(0, "$NetBSD: ip_icmp.c,v 1.155 2017/01/24 07:09:24 ozaki-r Exp $" #include #include #include +#include #ifdef IPSEC #include @@ -175,6 +176,10 @@ static void icmp_redirect_timeout(struct rtentry *, struct rttimer *); static void sysctl_netinet_icmp_setup(struct sysctllog **); +/* workqueue-based pr_input */ +static struct wqinput *icmp_wqinput; +static void _icmp_input(struct mbuf *, int, int); + void icmp_init(void) { @@ -191,6 +196,7 @@ icmp_init(void) } icmpstat_percpu = percpu_alloc(sizeof(uint64_t) * ICMP_NSTATS); + icmp_wqinput = wqinput_create("icmp_wqinput", _icmp_input); } /* @@ -384,10 +390,9 @@ struct sockaddr_in icmpmask = { /* * Process a received ICMP message. */ -void -icmp_input(struct mbuf *m, ...) +static void +_icmp_input(struct mbuf *m, int hlen, int proto) { - int proto; struct icmp *icp; struct ip *ip = mtod(m, struct ip *); int icmplen; @@ -395,15 +400,8 @@ icmp_input(struct mbuf *m, ...) struct in_ifaddr *ia; void *(*ctlfunc)(int, const struct sockaddr *, void *); int code; - int hlen; - va_list ap; struct rtentry *rt; - va_start(ap, m); - hlen = va_arg(ap, int); - proto = va_arg(ap, int); - va_end(ap); - /* * Locate icmp structure in mbuf, and check * that not corrupted and of at least minimum length. @@ -685,6 +683,20 @@ freeit: return; } +void +icmp_input(struct mbuf *m, ...) +{ + int hlen, proto; + va_list ap; + + va_start(ap, m); + hlen = va_arg(ap, int); + proto = va_arg(ap, int); + va_end(ap); + + wqinput_input(icmp_wqinput, m, hlen, proto); +} + /* * Reflect the ip packet back to the source */ diff --git a/sys/netinet/wqinput.c b/sys/netinet/wqinput.c new file mode 100644 index 0000000..056dbb6 --- /dev/null +++ b/sys/netinet/wqinput.c @@ -0,0 +1,189 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2017 Internet Initiative Japan Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define WQINPUT_LIST_MAXLEN IFQ_MAXLEN + +struct wqinput_work { + struct mbuf *ww_mbuf; + int ww_off; + int ww_proto; + struct wqinput_work *ww_next; +}; + +struct wqinput_worklist { + /* + * XXX: TAILQ cannot be used because TAILQ_INIT memories the address + * of percpu data while percpu(9) may move percpu data during bootup. + */ + struct wqinput_work *wwl_head; + struct wqinput_work *wwl_tail; + unsigned int wwl_len; + unsigned long wwl_dropped; + struct work wwl_work; + bool wwl_wq_is_active; +}; + +struct wqinput { + struct workqueue *wqi_wq; + struct pool wqi_work_pool; + struct percpu *wqi_worklists; /* struct wqinput_worklist */ + void (*wqi_input)(struct mbuf *, int, int); +}; + +static void wqinput_work(struct work *, void *); + +struct wqinput * +wqinput_create(const char *name, void (*func)(struct mbuf *, int, int)) +{ + struct wqinput *wqi; + int error; + + wqi = kmem_alloc(sizeof(*wqi), KM_SLEEP); + + error = workqueue_create(&wqi->wqi_wq, name, wqinput_work, wqi, + PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE|WQ_PERCPU); + if (error != 0) + panic("%s: workqueue_create failed (%d)\n", __func__, error); + pool_init(&wqi->wqi_work_pool, sizeof(struct wqinput_work), 0, 0, 0, + name, NULL, IPL_SOFTNET); + wqi->wqi_worklists = percpu_alloc(sizeof(struct wqinput_worklist)); + wqi->wqi_input = func; + + return wqi; +} + +static struct wqinput_work * +wqinput_work_get(struct wqinput_worklist *wwl) +{ + struct wqinput_work *work; + + /* Must be called at IPL_SOFTNET */ + + work = wwl->wwl_head; + if (work != NULL) { + KASSERTMSG(wwl->wwl_len > 0, "wwl->wwl_len=%d", wwl->wwl_len); + wwl->wwl_len--; + wwl->wwl_head = work->ww_next; + work->ww_next = NULL; + + if (wwl->wwl_head == NULL) + wwl->wwl_tail = NULL; + } else { + KASSERT(wwl->wwl_len == 0); + } + + return work; +} + +static void +wqinput_work(struct work *wk, void *arg) +{ + struct wqinput *wqi = arg; + struct wqinput_work *work; + struct wqinput_worklist *wwl; + int s; + + /* Users expect to run at IPL_SOFTNET */ + s = splsoftnet(); + /* This also prevents LWP migrations between CPUs */ + wwl = percpu_getref(wqi->wqi_worklists); + + /* We can allow enqueuing another work at this point */ + wwl->wwl_wq_is_active = false; + + while ((work = wqinput_work_get(wwl)) != NULL) { + mutex_enter(softnet_lock); + wqi->wqi_input(work->ww_mbuf, work->ww_off, work->ww_proto); + mutex_exit(softnet_lock); + + pool_put(&wqi->wqi_work_pool, work); + } + + percpu_putref(wqi->wqi_worklists); + splx(s); +} + +static void +wqinput_work_put(struct wqinput_worklist *wwl, struct wqinput_work *work) +{ + + if (wwl->wwl_tail != NULL) { + wwl->wwl_tail->ww_next = work; + } else { + wwl->wwl_head = work; + } + wwl->wwl_tail = work; + wwl->wwl_len++; +} + +void +wqinput_input(struct wqinput *wqi, struct mbuf *m, int off, int proto) +{ + struct wqinput_work *work; + struct wqinput_worklist *wwl; + + wwl = percpu_getref(wqi->wqi_worklists); + + /* Prevent too much work and mbuf from being queued */ + if (wwl->wwl_len >= WQINPUT_LIST_MAXLEN) { + wwl->wwl_dropped++; + m_freem(m); + goto out; + } + + work = pool_get(&wqi->wqi_work_pool, PR_NOWAIT); + work->ww_mbuf = m; + work->ww_off = off; + work->ww_proto = proto; + work->ww_next = NULL; + + wqinput_work_put(wwl, work); + + /* Avoid enqueuing another work when one is already enqueued */ + if (wwl->wwl_wq_is_active) + goto out; + wwl->wwl_wq_is_active = true; + + workqueue_enqueue(wqi->wqi_wq, &wwl->wwl_work, NULL); +out: + percpu_putref(wqi->wqi_worklists); +} diff --git a/sys/netinet/wqinput.h b/sys/netinet/wqinput.h new file mode 100644 index 0000000..004c967 --- /dev/null +++ b/sys/netinet/wqinput.h @@ -0,0 +1,42 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2017 Internet Initiative Japan Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _NETINET_WQINPUT_H_ +#define _NETINET_WQINPUT_H_ + +#if !defined(_KERNEL) +#error "not supposed to be exposed to userland." +#endif + +#include + +struct wqinput; +struct wqinput *wqinput_create(const char *, void(*)(struct mbuf *, int, int)); +void wqinput_input(struct wqinput *, struct mbuf *, int, int); + +#endif /* _NETINET_WQINPUT_H_ */ diff --git a/sys/netinet6/icmp6.c b/sys/netinet6/icmp6.c index fb5b442..e00b8a9 100644 --- a/sys/netinet6/icmp6.c +++ b/sys/netinet6/icmp6.c @@ -90,6 +90,7 @@ __KERNEL_RCSID(0, "$NetBSD: icmp6.c,v 1.206 2017/01/16 15:44:47 christos Exp $") #include #include #include +#include #include #include #include @@ -169,6 +170,9 @@ static void icmp6_mtudisc_timeout(struct rtentry *, struct rttimer *); static void icmp6_redirect_timeout(struct rtentry *, struct rttimer *); static void sysctl_net_inet6_icmp6_setup(struct sysctllog **); +/* workqueue-based pr_input */ +static struct wqinput *icmp6_wqinput; +static void _icmp6_input(struct mbuf *m, int off, int proto); void icmp6_init(void) @@ -180,6 +184,8 @@ icmp6_init(void) icmp6_redirect_timeout_q = rt_timer_queue_create(icmp6_redirtimeout); icmp6stat_percpu = percpu_alloc(sizeof(uint64_t) * ICMP6_NSTATS); + + icmp6_wqinput = wqinput_create("icmp6_wqinput", _icmp6_input); } static void @@ -444,13 +450,12 @@ icmp6_error(struct mbuf *m, int type, int code, int param) /* * Process a received ICMP6 message. */ -int -icmp6_input(struct mbuf **mp, int *offp, int proto) +static void +_icmp6_input(struct mbuf *m, int off, int proto) { - struct mbuf *m = *mp, *n; + struct mbuf *n; struct ip6_hdr *ip6, *nip6; struct icmp6_hdr *icmp6, *nicmp6; - int off = *offp; int icmp6len = m->m_pkthdr.len - off; int code, sum, noff; struct ifnet *rcvif; @@ -879,7 +884,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) if (icmp6_notify_error(m, off, icmp6len, code)) { /* In this case, m should've been freed. */ m_put_rcvif_psref(rcvif, &psref); - return (IPPROTO_DONE); + return; } break; @@ -896,11 +901,20 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) /* deliver the packet to appropriate sockets */ icmp6_rip6_input(&m, off); - return IPPROTO_DONE; + return; freeit: m_put_rcvif_psref(rcvif, &psref); m_freem(m); + return; +} + +int +icmp6_input(struct mbuf **mp, int *offp, int proto) +{ + + wqinput_input(icmp6_wqinput, *mp, *offp, proto); + return IPPROTO_DONE; } diff --git a/sys/rump/librump/rumpnet/Makefile.rumpnet b/sys/rump/librump/rumpnet/Makefile.rumpnet index 8a792d7..e114bdf5 100644 --- a/sys/rump/librump/rumpnet/Makefile.rumpnet +++ b/sys/rump/librump/rumpnet/Makefile.rumpnet @@ -38,6 +38,9 @@ SRCS+= # bpf stubs, required for all kernels SRCS+= bpf_stub.c +# workqueue-based pr_input (required by inet and inet6) +SRCS+= wqinput.c + CPPFLAGS+= -I${RUMPTOP}/librump/rumpkern .include "${RUMPTOP}/Makefile.rump"