diff --git a/sys/net/if_gif.c b/sys/net/if_gif.c index 34d99f3..c99a613 100644 --- a/sys/net/if_gif.c +++ b/sys/net/if_gif.c @@ -93,6 +93,7 @@ static void gifintr(void *); * gif global variable definitions */ LIST_HEAD(, gif_softc) gif_softc_list; /* XXX should be static */ +krwlock_t *gif_softc_list_lock; static int gif_clone_create(struct if_clone *, int); static int gif_clone_destroy(struct ifnet *); @@ -123,6 +124,11 @@ gifattach(int count) { LIST_INIT(&gif_softc_list); +#ifdef GIF_MPSAFE + gif_softc_list_lock = rw_obj_alloc(); + KASSERT(gif_softc_list_lock != NULL); +#endif + if_clone_attach(&gif_cloner); } @@ -139,7 +145,9 @@ gif_clone_create(struct if_clone *ifc, int unit) gifattach0(sc); + GIF_LIST_WRITE_LOCK(); LIST_INSERT_HEAD(&gif_softc_list, sc, gif_list); + GIF_LIST_WRITE_UNLOCK(); return (0); } @@ -152,6 +160,10 @@ gifattach0(struct gif_softc *sc) cv_init(&sc->gif_si_cv, "if_gif_cv"); sc->gif_si_refs = 0; sc->encap_cookie4 = sc->encap_cookie6 = NULL; +#ifdef GIF_MPSAFE + sc->gif_lock = rw_obj_alloc(); + KASSERT(sc->gif_lock != NULL); +#endif sc->gif_if.if_addrlen = 0; sc->gif_if.if_mtu = GIF_MTU; @@ -172,7 +184,9 @@ gif_clone_destroy(struct ifnet *ifp) { struct gif_softc *sc = (void *) ifp; + GIF_LIST_WRITE_LOCK(); LIST_REMOVE(sc, gif_list); + GIF_LIST_WRITE_UNLOCK(); gif_delete_tunnel(&sc->gif_if); bpf_detach(ifp); @@ -181,6 +195,9 @@ gif_clone_destroy(struct ifnet *ifp) cv_destroy(&sc->gif_si_cv); mutex_obj_free(sc->gif_si_lock); +#ifdef GIF_MPSAFE + rw_obj_free(sc->gif_lock); +#endif kmem_free(sc, sizeof(struct gif_softc)); return (0); @@ -192,17 +209,20 @@ gif_encapcheck(struct mbuf *m, int off, int proto, void *arg) { struct ip ip; struct gif_softc *sc; + int ret = 0; sc = arg; if (sc == NULL) return 0; + GIF_READ_LOCK(sc); + if ((sc->gif_if.if_flags & IFF_UP) == 0) - return 0; + goto out; /* no physical address */ if (!sc->gif_psrc || !sc->gif_pdst) - return 0; + goto out; switch (proto) { #ifdef INET @@ -214,13 +234,13 @@ gif_encapcheck(struct mbuf *m, int off, int proto, void *arg) break; #endif default: - return 0; + goto out; } /* Bail on short packets */ KASSERT(m->m_flags & M_PKTHDR); if (m->m_pkthdr.len < sizeof(ip)) - return 0; + goto out; m_copydata(m, 0, sizeof(ip), &ip); @@ -229,21 +249,26 @@ gif_encapcheck(struct mbuf *m, int off, int proto, void *arg) case 4: if (sc->gif_psrc->sa_family != AF_INET || sc->gif_pdst->sa_family != AF_INET) - return 0; - return gif_encapcheck4(m, off, proto, arg); + goto out; + ret = gif_encapcheck4(m, off, proto, arg); + goto out; #endif #ifdef INET6 case 6: if (m->m_pkthdr.len < sizeof(struct ip6_hdr)) - return 0; + goto out; if (sc->gif_psrc->sa_family != AF_INET6 || sc->gif_pdst->sa_family != AF_INET6) - return 0; - return gif_encapcheck6(m, off, proto, arg); + goto out; + ret = gif_encapcheck6(m, off, proto, arg); + goto out; #endif default: - return 0; + goto out; } +out: + GIF_READ_UNLOCK(sc); + return ret; } #endif @@ -297,9 +322,12 @@ gif_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, if ((error = gif_check_nesting(ifp, m)) != 0) { m_free(m); - goto end; + ifp->if_oerrors++; + return error; } + GIF_READ_LOCK(sc); + m->m_flags &= ~(M_BCAST|M_MCAST); if (!(ifp->if_flags & IFF_UP) || sc->gif_psrc == NULL || sc->gif_pdst == NULL || @@ -337,6 +365,8 @@ gif_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, error = 0; end: + GIF_READ_UNLOCK(sc); + if (error) ifp->if_oerrors++; return error; @@ -348,12 +378,16 @@ gifintr(void *arg) struct gif_softc *sc; struct ifnet *ifp; struct mbuf *m; - int family; + int family, outer_family; int len; int s; int error; + KASSERT(arg != NULL); + sc = arg; + + GIF_READ_LOCK(sc); ifp = &sc->gif_if; atomic_inc_uint(&sc->gif_si_refs); @@ -364,6 +398,7 @@ gifintr(void *arg) * softint_schedule(). */ if (sc->gif_pdst == NULL || sc->gif_psrc == NULL) { + GIF_READ_UNLOCK(sc); IFQ_PURGE(&ifp->if_snd); if (atomic_dec_uint_nv(&sc->gif_si_refs) == 0) { @@ -374,10 +409,13 @@ gifintr(void *arg) return; } + outer_family = sc->gif_psrc->sa_family; + GIF_READ_UNLOCK(sc); + /* output processing */ while (1) { s = splnet(); - IFQ_DEQUEUE(&sc->gif_if.if_snd, m); + IFQ_DEQUEUE(&ifp->if_snd, m); splx(s); if (m == NULL) break; @@ -397,7 +435,7 @@ gifintr(void *arg) len = m->m_pkthdr.len; /* dispatch to output logic based on outer AF */ - switch (sc->gif_psrc->sa_family) { + switch (outer_family) { #ifdef INET case AF_INET: mutex_enter(softnet_lock); @@ -629,7 +667,9 @@ gif_ioctl(struct ifnet *ifp, u_long cmd, void *data) #ifdef INET6 case SIOCGIFPSRCADDR_IN6: #endif /* INET6 */ + GIF_READ_LOCK(sc); if (sc->gif_psrc == NULL) { + GIF_READ_UNLOCK(sc); error = EADDRNOTAVAIL; goto bad; } @@ -649,19 +689,25 @@ gif_ioctl(struct ifnet *ifp, u_long cmd, void *data) break; #endif /* INET6 */ default: + GIF_READ_UNLOCK(sc); error = EADDRNOTAVAIL; goto bad; } - if (src->sa_len > size) + if (src->sa_len > size) { + GIF_READ_UNLOCK(sc); return EINVAL; + } memcpy(dst, src, src->sa_len); + GIF_READ_UNLOCK(sc); break; case SIOCGIFPDSTADDR: #ifdef INET6 case SIOCGIFPDSTADDR_IN6: #endif /* INET6 */ + GIF_READ_LOCK(sc); if (sc->gif_pdst == NULL) { + GIF_READ_UNLOCK(sc); error = EADDRNOTAVAIL; goto bad; } @@ -681,16 +727,22 @@ gif_ioctl(struct ifnet *ifp, u_long cmd, void *data) break; #endif /* INET6 */ default: + GIF_READ_UNLOCK(sc); error = EADDRNOTAVAIL; goto bad; } - if (src->sa_len > size) + if (src->sa_len > size) { + GIF_READ_UNLOCK(sc); return EINVAL; + } memcpy(dst, src, src->sa_len); + GIF_READ_UNLOCK(sc); break; case SIOCGLIFPHYADDR: + GIF_READ_LOCK(sc); if (sc->gif_psrc == NULL || sc->gif_pdst == NULL) { + GIF_READ_UNLOCK(sc); error = EADDRNOTAVAIL; goto bad; } @@ -700,8 +752,10 @@ gif_ioctl(struct ifnet *ifp, u_long cmd, void *data) dst = (struct sockaddr *) &(((struct if_laddrreq *)data)->addr); size = sizeof(((struct if_laddrreq *)data)->addr); - if (src->sa_len > size) + if (src->sa_len > size) { + GIF_READ_UNLOCK(sc); return EINVAL; + } memcpy(dst, src, src->sa_len); /* copy dst */ @@ -709,9 +763,12 @@ gif_ioctl(struct ifnet *ifp, u_long cmd, void *data) dst = (struct sockaddr *) &(((struct if_laddrreq *)data)->dstaddr); size = sizeof(((struct if_laddrreq *)data)->dstaddr); - if (src->sa_len > size) + if (src->sa_len > size) { + GIF_READ_UNLOCK(sc); return EINVAL; + } memcpy(dst, src, src->sa_len); + GIF_READ_UNLOCK(sc); break; default: @@ -783,37 +840,49 @@ gif_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst) struct sockaddr *osrc, *odst; struct sockaddr *nsrc, *ndst; void *osi; - int s; int error; +#ifndef GIF_MPSAFE + int s; s = splsoftnet(); +#endif + GIF_LIST_READ_LOCK(); LIST_FOREACH(sc2, &gif_softc_list, gif_list) { if (sc2 == sc) continue; - if (!sc2->gif_pdst || !sc2->gif_psrc) + + GIF_READ_LOCK(sc2); + if (!sc2->gif_pdst || !sc2->gif_psrc) { + GIF_READ_UNLOCK(sc2); continue; + } /* can't configure same pair of address onto two gifs */ if (sockaddr_cmp(sc2->gif_pdst, dst) == 0 && sockaddr_cmp(sc2->gif_psrc, src) == 0) { /* continue to use the old configureation. */ - splx(s); - return EADDRNOTAVAIL; + GIF_READ_UNLOCK(sc2); + GIF_LIST_READ_UNLOCK(); + error = EADDRNOTAVAIL; + goto out; } - + GIF_READ_UNLOCK(sc2); /* XXX both end must be valid? (I mean, not 0.0.0.0) */ } + GIF_LIST_READ_UNLOCK(); if ((nsrc = sockaddr_dup(src, M_WAITOK)) == NULL) { - splx(s); - return ENOMEM; + error = ENOMEM; + goto out; } if ((ndst = sockaddr_dup(dst, M_WAITOK)) == NULL) { sockaddr_free(nsrc); - splx(s); - return ENOMEM; + error = ENOMEM; + goto out; } + GIF_WRITE_LOCK(sc); + /* Firstly, clear old configurations. */ if (sc->gif_si) { osrc = sc->gif_psrc; @@ -828,9 +897,11 @@ gif_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst) * any more. However, there are below 2 fears of other CPUs. * (a) gif_output() has done softint_schedule(),and softint * (gifintr()) is waiting for execution - * (b) gifintr() is already running + * (b) gifintr() is already running, and waiting for + * GIF_READ_LOCK(sc) * see also gifintr() */ + GIF_WRITE_UNLOCK(sc); /* * To avoid the above fears, wait for gifintr() completion of @@ -844,6 +915,7 @@ gif_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst) mutex_exit(sc->gif_si_lock); softint_disestablish(osi); + GIF_WRITE_LOCK(sc); sc->gif_psrc = osrc; sc->gif_pdst = odst; osrc = NULL; @@ -874,7 +946,11 @@ gif_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst) continue; } - sc->gif_si = softint_establish(SOFTINT_NET, gifintr, sc); + sc->gif_si = softint_establish(SOFTINT_NET +#ifdef GIF_MPSAFE + | SOFTINT_MPSAFE +#endif + ,gifintr, sc); if (sc->gif_si == NULL) { (void)gif_encap_detach(sc); @@ -906,7 +982,11 @@ gif_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst) else ifp->if_flags &= ~IFF_RUNNING; + GIF_WRITE_UNLOCK(sc); +out: +#ifndef GIF_MPSAFE splx(s); +#endif return error; } @@ -916,9 +996,13 @@ gif_delete_tunnel(struct ifnet *ifp) struct gif_softc *sc = ifp->if_softc; struct sockaddr *osrc, *odst; void *osi; +#ifndef GIF_MPSAFE int s; s = splsoftnet(); +#endif + + GIF_WRITE_LOCK(sc); if (sc->gif_si) { osrc = sc->gif_psrc; @@ -928,6 +1012,7 @@ gif_delete_tunnel(struct ifnet *ifp) sc->gif_psrc = NULL; sc->gif_pdst = NULL; sc->gif_si = NULL; + GIF_WRITE_UNLOCK(sc); mutex_enter(sc->gif_si_lock); while (sc->gif_si_refs > 0) { @@ -937,6 +1022,7 @@ gif_delete_tunnel(struct ifnet *ifp) mutex_exit(sc->gif_si_lock); softint_disestablish(osi); + GIF_WRITE_LOCK(sc); sc->gif_psrc = osrc; sc->gif_pdst = odst; } @@ -960,5 +1046,9 @@ gif_delete_tunnel(struct ifnet *ifp) ifp->if_flags |= IFF_RUNNING; else ifp->if_flags &= ~IFF_RUNNING; + + GIF_WRITE_UNLOCK(sc); +#ifndef GIF_MPSAFE splx(s); +#endif } diff --git a/sys/net/if_gif.h b/sys/net/if_gif.h index 481fbbd..368b43b 100644 --- a/sys/net/if_gif.h +++ b/sys/net/if_gif.h @@ -40,14 +40,50 @@ #include #include #include +#include #ifdef _KERNEL_OPT #include "opt_inet.h" +#include "opt_net_mpsafe.h" #endif #include /* xxx sigh, why route have struct route instead of pointer? */ +#ifdef NET_MPSAFE +#define GIF_MPSAFE 1 +#endif + +#ifdef GIF_MPSAFE +#define GIF_READ_LOCK(sc) rw_enter((sc)->gif_lock, RW_READER) +#define GIF_READ_UNLOCK(sc) rw_exit((sc)->gif_lock) +#define GIF_READ_LOCKED(sc) rw_read_held((sc)->gif_lock) +#define GIF_WRITE_LOCK(sc) rw_enter((sc)->gif_lock, RW_WRITER) +#define GIF_WRITE_UNLOCK(sc) rw_exit((sc)->gif_lock) +#define GIF_WRITE_LOCKED(sc) rw_write_held((sc)->gif_lock) + +#define GIF_LIST_READ_LOCK() rw_enter(gif_softc_list_lock, RW_READER) +#define GIF_LIST_READ_UNLOCK() rw_exit(gif_softc_list_lock) +#define GIF_LIST_READ_LOCKED() rw_read_held(gif_softc_list_lock) +#define GIF_LIST_WRITE_LOCK() rw_enter(gif_softc_list_lock, RW_WRITER) +#define GIF_LIST_WRITE_UNLOCK() rw_exit(gif_softc_list_lock) +#define GIF_LIST_WRITE_LOCKED() rw_write_held(gif_softc_list_lock) +#else /* !GIF_MPSAFE */ +#define GIF_READ_LOCK(sc) +#define GIF_READ_UNLOCK(sc) +#define GIF_READ_LOCKED(sc) (true) +#define GIF_WRITE_LOCK(sc) +#define GIF_WRITE_UNLOCK(sc) +#define GIF_WRITE_LOCKED(sc) (true) + +#define GIF_LIST_READ_LOCK() +#define GIF_LIST_READ_UNLOCK() +#define GIF_LIST_READ_LOCKED() (true) +#define GIF_LIST_WRITE_LOCK() +#define GIF_LIST_WRITE_UNLOCK() +#define GIF_LIST_WRITE_LOCKED() (true) +#endif /* !GIF_MPSAFE */ + struct encaptab; struct gif_softc { @@ -62,6 +98,7 @@ struct gif_softc { const struct encaptab *encap_cookie6; LIST_ENTRY(gif_softc) gif_list; /* list of all gifs */ void *gif_si; /* softintr handle */ + krwlock_t *gif_lock; /* lock for softc */ struct si_sync { /* can access without gif_lock */ unsigned int si_refs; /* reference count for gif_si */ @@ -95,6 +132,16 @@ int gif_encapcheck(struct mbuf *, int, int, void *); /* * Locking notes: + * - Updates of gif_softc_list are serialized by gif_softc_list_lock (a rwlock) + * - Below members of gif_softc are protected by gif_lock (a rwlock) + * - gif_psrc + * - gif_pdst + * - encap_cookie4 + * - encap_cookie6 + * - gif_si * - All members of struct si_sync are protected by si_lock (an adaptive mutex) + * + * Lock order: + * softnet_lock -> gif_lock -> gif_si_lock -> encap_lock(@ip_encap.c) */ #endif /* !_NET_IF_GIF_H_ */ diff --git a/sys/netinet/in_gif.c b/sys/netinet/in_gif.c index 647809b..4f2b1a3 100644 --- a/sys/netinet/in_gif.c +++ b/sys/netinet/in_gif.c @@ -96,8 +96,8 @@ in_gif_output(struct ifnet *ifp, int family, struct mbuf *m) { struct rtentry *rt; struct gif_softc *sc = ifp->if_softc; - struct sockaddr_in *sin_src = satosin(sc->gif_psrc); - struct sockaddr_in *sin_dst = satosin(sc->gif_pdst); + struct sockaddr_in *sin_src; + struct sockaddr_in *sin_dst; struct ip iphdr; /* capsule IP header, host byte ordered */ int proto, error; u_int8_t tos; @@ -106,11 +106,17 @@ in_gif_output(struct ifnet *ifp, int family, struct mbuf *m) struct sockaddr_in dst4; } u; + GIF_READ_LOCK(sc); + + sin_src = satosin(sc->gif_psrc); + sin_dst = satosin(sc->gif_pdst); + if (sin_src == NULL || sin_dst == NULL || sin_src->sin_family != AF_INET || sin_dst->sin_family != AF_INET) { m_freem(m); - return EAFNOSUPPORT; + error = EAFNOSUPPORT; + goto out; } switch (family) { @@ -122,8 +128,10 @@ in_gif_output(struct ifnet *ifp, int family, struct mbuf *m) proto = IPPROTO_IPV4; if (m->m_len < sizeof(*ip)) { m = m_pullup(m, sizeof(*ip)); - if (m == NULL) - return ENOBUFS; + if (m == NULL) { + error = ENOBUFS; + goto out; + } } ip = mtod(m, const struct ip *); tos = ip->ip_tos; @@ -137,8 +145,10 @@ in_gif_output(struct ifnet *ifp, int family, struct mbuf *m) proto = IPPROTO_IPV6; if (m->m_len < sizeof(*ip6)) { m = m_pullup(m, sizeof(*ip6)); - if (m == NULL) - return ENOBUFS; + if (m == NULL) { + error = ENOBUFS; + goto out; + } } ip6 = mtod(m, const struct ip6_hdr *); tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; @@ -151,7 +161,8 @@ in_gif_output(struct ifnet *ifp, int family, struct mbuf *m) family); #endif m_freem(m); - return EAFNOSUPPORT; + error = EAFNOSUPPORT; + goto out; } memset(&iphdr, 0, sizeof(iphdr)); @@ -161,7 +172,8 @@ in_gif_output(struct ifnet *ifp, int family, struct mbuf *m) iphdr.ip_dst = sin_dst->sin_addr; else { m_freem(m); - return ENETUNREACH; + error = ENETUNREACH; + goto out; } iphdr.ip_p = proto; /* version will be set in ip_output() */ @@ -177,24 +189,34 @@ in_gif_output(struct ifnet *ifp, int family, struct mbuf *m) /* XXX Is m_pullup really necessary after M_PREPEND? */ if (m != NULL && M_UNWRITABLE(m, sizeof(struct ip))) m = m_pullup(m, sizeof(struct ip)); - if (m == NULL) - return ENOBUFS; + if (m == NULL) { + error = ENOBUFS; + goto out; + } bcopy(&iphdr, mtod(m, struct ip *), sizeof(struct ip)); sockaddr_in_init(&u.dst4, &sin_dst->sin_addr, 0); + /* + * XXX future work + * rtcache should be protected somehow (or be removed possibly). + */ if ((rt = rtcache_lookup(&sc->gif_ro, &u.dst)) == NULL) { m_freem(m); - return ENETUNREACH; + error = ENETUNREACH; + goto out; } /* If the route constitutes infinite encapsulation, punt. */ if (rt->rt_ifp == ifp) { rtcache_free(&sc->gif_ro); m_freem(m); - return ENETUNREACH; /*XXX*/ + error = ENETUNREACH; /*XXX*/ + goto out; } error = ip_output(m, NULL, &sc->gif_ro, 0, NULL, NULL); +out: + GIF_READ_UNLOCK(sc); return (error); } @@ -224,18 +246,23 @@ in_gif_input(struct mbuf *m, ...) } #ifndef GIF_ENCAPCHECK struct gif_softc *sc = (struct gif_softc *)gifp->if_softc; + + GIF_READ_LOCK(sc); /* other CPU do delete_tunnel */ if (sc->gif_psrc == NULL || sc->gif_pdst == NULL) { + GIF_READ_UNLOCK(sc); m_freem(m); ip_statinc(IP_STAT_NOGIF); return; } if (!gif_validate4(ip, sc, m->m_pkthdr.rcvif)) { + GIF_READ_UNLOCK(sc); m_freem(m); ip_statinc(IP_STAT_NOGIF); return; } + GIF_READ_UNLOCK(sc); #endif otos = ip->ip_tos; m_adj(m, off); @@ -297,6 +324,8 @@ gif_validate4(const struct ip *ip, struct gif_softc *sc, struct ifnet *ifp) struct sockaddr_in *src, *dst; struct in_ifaddr *ia4; + KASSERT(GIF_READ_LOCKED(sc)); + src = satosin(sc->gif_psrc); dst = satosin(sc->gif_pdst); @@ -361,6 +390,8 @@ gif_encapcheck4(struct mbuf *m, int off, int proto, void *arg) /* sanity check done in caller */ sc = arg; + KASSERT(GIF_READ_LOCKED(sc)); + m_copydata(m, 0, sizeof(ip), &ip); ifp = ((m->m_flags & M_PKTHDR) != 0) ? m->m_pkthdr.rcvif : NULL; @@ -371,6 +402,9 @@ gif_encapcheck4(struct mbuf *m, int off, int proto, void *arg) int in_gif_attach(struct gif_softc *sc) { + + KASSERT(GIF_WRITE_LOCKED(sc)); + #ifndef GIF_ENCAPCHECK struct sockaddr_in mask4; @@ -397,6 +431,8 @@ in_gif_detach(struct gif_softc *sc) { int error; + KASSERT(GIF_WRITE_LOCKED(sc)); + error = encap_detach(sc->encap_cookie4); if (error == 0) sc->encap_cookie4 = NULL; diff --git a/sys/netinet/ip_encap.c b/sys/netinet/ip_encap.c index c0feb2b..f6bd39f 100644 --- a/sys/netinet/ip_encap.c +++ b/sys/netinet/ip_encap.c @@ -75,6 +75,7 @@ __KERNEL_RCSID(0, "$NetBSD: ip_encap.c,v 1.47 2015/12/09 06:00:51 knakahara Exp #ifdef _KERNEL_OPT #include "opt_mrouting.h" #include "opt_inet.h" +#include "opt_net_mpsafe.h" #endif #include @@ -86,6 +87,8 @@ __KERNEL_RCSID(0, "$NetBSD: ip_encap.c,v 1.47 2015/12/09 06:00:51 knakahara Exp #include #include #include +#include +#include #include #include @@ -110,6 +113,10 @@ __KERNEL_RCSID(0, "$NetBSD: ip_encap.c,v 1.47 2015/12/09 06:00:51 knakahara Exp #include +#ifdef NET_MPSAFE +#define ENCAP_MPSAFE 1 +#endif + enum direction { INBOUND, OUTBOUND }; #ifdef INET @@ -127,6 +134,28 @@ static void encap_fillarg(struct mbuf *, const struct encaptab *); LIST_HEAD(, encaptab) encaptab = LIST_HEAD_INITIALIZER(&encaptab); +#ifdef ENCAP_MPSAFE +/* lock for encap_head list, encaptab list and encap elements */ +static ONCE_DECL(encap_init_control); +static krwlock_t *encaptab_whole_lock; + +static int encap_init_once(void); + +#define ENCAP_WHOLE_READ_LOCK() rw_enter(encaptab_whole_lock,RW_READER) +#define ENCAP_WHOLE_READ_UNLOCK() rw_exit(encaptab_whole_lock) +#define ENCAP_WHOLE_READ_LOCKED() rw_read_held(encaptab_whole_lock) +#define ENCAP_WHOLE_WRITE_LOCK() rw_enter(encaptab_whole_lock,RW_WRITER) +#define ENCAP_WHOLE_WRITE_UNLOCK() rw_exit(encaptab_whole_lock) +#define ENCAP_WHOLE_WRITE_LOCKED() rw_write_held(encaptab_whole_lock) +#else /* !ENCAP_MPSAFE */ +#define ENCAP_WHOLE_READ_LOCK() +#define ENCAP_WHOLE_READ_UNLOCK() +#define ENCAP_WHOLE_READ_LOCKED() (true) +#define ENCAP_WHOLE_WRITE_LOCK() +#define ENCAP_WHOLE_WRITE_UNLOCK() +#define ENCAP_WHOLE_WRITE_LOCKED() (true) +#endif /* !ENCAP_MPSAFE */ + extern int max_keylen; /* radix.c */ struct radix_node_head *encap_head[2]; /* 0 for AF_INET, 1 for AF_INET6 */ @@ -168,13 +197,16 @@ encap4_lookup(struct mbuf *m, int off, int proto, enum direction dir) struct ip_pack4 pack; struct encaptab *ep, *match; int prio, matchprio; - struct radix_node_head *rnh = encap_rnh(AF_INET); + struct radix_node_head *rnh; struct radix_node *rn; KASSERT(m->m_len >= sizeof(*ip)); + KASSERT(ENCAP_WHOLE_READ_LOCKED()); ip = mtod(m, struct ip *); + rnh = encap_rnh(AF_INET); + memset(&pack, 0, sizeof(pack)); pack.p.sp_len = sizeof(pack); pack.mine.sin_family = pack.yours.sin_family = AF_INET; @@ -252,6 +284,7 @@ encap4_input(struct mbuf *m, ...) proto = va_arg(ap, int); va_end(ap); + ENCAP_WHOLE_READ_LOCK(); match = encap4_lookup(m, off, proto, INBOUND); if (match) { @@ -259,11 +292,15 @@ encap4_input(struct mbuf *m, ...) psw = match->psw; if (psw && psw->pr_input) { encap_fillarg(m, match); + ENCAP_WHOLE_READ_UNLOCK(); (*psw->pr_input)(m, off, proto); - } else + } else { + ENCAP_WHOLE_READ_UNLOCK(); m_freem(m); + } return; } + ENCAP_WHOLE_READ_UNLOCK(); /* last resort: inject to raw socket */ rip_input(m, off, proto); @@ -278,13 +315,16 @@ encap6_lookup(struct mbuf *m, int off, int proto, enum direction dir) struct ip_pack6 pack; int prio, matchprio; struct encaptab *ep, *match; - struct radix_node_head *rnh = encap_rnh(AF_INET6); + struct radix_node_head *rnh; struct radix_node *rn; KASSERT(m->m_len >= sizeof(*ip6)); + KASSERT(ENCAP_WHOLE_READ_LOCKED()); ip6 = mtod(m, struct ip6_hdr *); + rnh = encap_rnh(AF_INET6); + memset(&pack, 0, sizeof(pack)); pack.p.sp_len = sizeof(pack); pack.mine.sin6_family = pack.yours.sin6_family = AF_INET6; @@ -336,6 +376,7 @@ encap6_input(struct mbuf **mp, int *offp, int proto) const struct ip6protosw *psw; struct encaptab *match; + ENCAP_WHOLE_READ_LOCK(); match = encap6_lookup(m, *offp, proto, INBOUND); if (match) { @@ -343,12 +384,15 @@ encap6_input(struct mbuf **mp, int *offp, int proto) psw = (const struct ip6protosw *)match->psw; if (psw && psw->pr_input) { encap_fillarg(m, match); + ENCAP_WHOLE_READ_UNLOCK(); return (*psw->pr_input)(mp, offp, proto); } else { + ENCAP_WHOLE_READ_UNLOCK(); m_freem(m); return IPPROTO_DONE; } } + ENCAP_WHOLE_READ_UNLOCK(); /* last resort: inject to raw socket */ return rip6_input(mp, offp, proto); @@ -358,9 +402,13 @@ encap6_input(struct mbuf **mp, int *offp, int proto) static int encap_add(struct encaptab *ep) { - struct radix_node_head *rnh = encap_rnh(ep->af); + struct radix_node_head *rnh; int error = 0; + KASSERT(ENCAP_WHOLE_WRITE_LOCKED()); + + rnh = encap_rnh(ep->af); + LIST_INSERT_HEAD(&encaptab, ep, chain); if (!ep->func && rnh) { if (!rnh->rnh_addaddr((void *)ep->addrpack, @@ -379,9 +427,12 @@ encap_add(struct encaptab *ep) static int encap_remove(struct encaptab *ep) { - struct radix_node_head *rnh = encap_rnh(ep->af); + struct radix_node_head *rnh; int error = 0; + KASSERT(ENCAP_WHOLE_WRITE_LOCKED()); + + rnh = encap_rnh(ep->af); LIST_REMOVE(ep, chain); if (!ep->func && rnh) { if (!rnh->rnh_deladdr((void *)ep->addrpack, @@ -426,6 +477,17 @@ encap_afcheck(int af, const struct sockaddr *sp, const struct sockaddr *dp) return 0; } +#ifdef ENCAP_MPSAFE +static int +encap_init_once(void) +{ + + encaptab_whole_lock = rw_obj_alloc(); + + return 0; +} +#endif + /* * sp (src ptr) is always my side, and dp (dst ptr) is always remote side. * length of mask (sm and dm) is assumed to be same as sp/dp. @@ -439,20 +501,29 @@ encap_attach(int af, int proto, { struct encaptab *ep; int error; +#ifndef ENCAP_MPSAFE int s; +#endif size_t l; struct ip_pack4 *pack4; #ifdef INET6 struct ip_pack6 *pack6; #endif +#ifdef ENCAP_MPSAFE + RUN_ONCE(&encap_init_control, encap_init_once); +#endif + +#ifndef ENCAP_MPSAFE s = splsoftnet(); +#endif /* sanity check on args */ error = encap_afcheck(af, sp, dp); if (error) goto fail; /* check if anyone have already attached with exactly same config */ + ENCAP_WHOLE_READ_LOCK(); LIST_FOREACH(ep, &encaptab, chain) { if (ep->af != af) continue; @@ -475,9 +546,11 @@ encap_attach(int af, int proto, memcmp(ep->dstmask, dm, dp->sa_len) != 0) continue; + ENCAP_WHOLE_READ_UNLOCK(); error = EEXIST; goto fail; } + ENCAP_WHOLE_READ_UNLOCK(); switch (af) { case AF_INET: @@ -541,12 +614,16 @@ encap_attach(int af, int proto, ep->psw = psw; ep->arg = arg; + ENCAP_WHOLE_WRITE_LOCK(); error = encap_add(ep); + ENCAP_WHOLE_WRITE_UNLOCK(); if (error) goto gc; error = 0; +#ifndef ENCAP_MPSAFE splx(s); +#endif return ep; gc: @@ -557,7 +634,9 @@ gc: if (ep) kmem_free(ep, sizeof(*ep)); fail: +#ifndef ENCAP_MPSAFE splx(s); +#endif return NULL; } @@ -568,9 +647,17 @@ encap_attach_func(int af, int proto, { struct encaptab *ep; int error; +#ifndef ENCAP_MPSAFE int s; +#endif + +#ifdef ENCAP_MPSAFE + RUN_ONCE(&encap_init_control, encap_init_once); +#endif +#ifndef ENCAP_MPSAFE s = splsoftnet(); +#endif /* sanity check on args */ if (!func) { error = EINVAL; @@ -594,16 +681,22 @@ encap_attach_func(int af, int proto, ep->psw = psw; ep->arg = arg; + ENCAP_WHOLE_WRITE_LOCK(); error = encap_add(ep); + ENCAP_WHOLE_WRITE_UNLOCK(); if (error) goto fail; error = 0; +#ifndef ENCAP_MPSAFE splx(s); +#endif return ep; fail: +#ifndef ENCAP_MPSAFE splx(s); +#endif return NULL; } @@ -650,9 +743,11 @@ encap6_ctlinput(int cmd, const struct sockaddr *sa, void *d0) /* * Check to see if we have a valid encap configuration. */ + ENCAP_WHOLE_READ_LOCK(); match = encap6_lookup(m, off, nxt, OUTBOUND); if (match) valid++; + ENCAP_WHOLE_READ_UNLOCK(); /* * Depending on the value of "valid" and routing table @@ -670,6 +765,7 @@ encap6_ctlinput(int cmd, const struct sockaddr *sa, void *d0) } /* inform all listeners */ + ENCAP_WHOLE_READ_LOCK(); LIST_FOREACH(ep, &encaptab, chain) { if (ep->af != AF_INET6) continue; @@ -680,9 +776,11 @@ encap6_ctlinput(int cmd, const struct sockaddr *sa, void *d0) /* XXX need to pass ep->arg or ep itself to listeners */ psw = (const struct ip6protosw *)ep->psw; - if (psw && psw->pr_ctlinput) + if (psw && psw->pr_ctlinput) { (*psw->pr_ctlinput)(cmd, sa, d); + } } + ENCAP_WHOLE_READ_UNLOCK(); rip6_ctlinput(cmd, sa, d0); return NULL; @@ -694,23 +792,26 @@ encap_detach(const struct encaptab *cookie) { const struct encaptab *ep = cookie; struct encaptab *p, *np; - int error; + int error = ENOENT; + ENCAP_WHOLE_WRITE_LOCK(); LIST_FOREACH_SAFE(p, &encaptab, chain, np) { if (p == ep) { error = encap_remove(p); if (error) - return error; + goto out; if (!ep->func) { kmem_free(p->addrpack, ep->addrpack->sa_len); kmem_free(p->maskpack, ep->maskpack->sa_len); } kmem_free(p, sizeof(*p)); /*XXX*/ - return 0; + goto out; } } - return ENOENT; +out: + ENCAP_WHOLE_WRITE_UNLOCK(); + return error; } static struct radix_node_head * @@ -752,6 +853,8 @@ encap_fillarg(struct mbuf *m, const struct encaptab *ep) { struct m_tag *mtag; + KASSERT(ENCAP_WHOLE_READ_LOCKED()); + mtag = m_tag_get(PACKET_TAG_ENCAP, sizeof(void *), M_NOWAIT); if (mtag) { *(void **)(mtag + 1) = ep->arg; @@ -773,3 +876,14 @@ encap_getarg(struct mbuf *m) } return p; } + +/* + * Locking notes: + * - Updates of encaptab list are serialized by encaptab_whole_lock (a rwlock) + * - Items of encaptab list are also protected by encaptab_whole_lock + * - encap_head[2] are also protected by encaptab_whole_lock + * + * Note: + * The struct ip{,6}protosw instances are statically defined, + * so encap{,6}_input() can call (*psw->pr_input)() without lock. + */ diff --git a/sys/netinet6/in6_gif.c b/sys/netinet6/in6_gif.c index 302e855..187b0e5 100644 --- a/sys/netinet6/in6_gif.c +++ b/sys/netinet6/in6_gif.c @@ -78,6 +78,7 @@ static int gif_validate6(const struct ip6_hdr *, struct gif_softc *, int ip6_gif_hlim = GIF_HLIM; extern LIST_HEAD(, gif_softc) gif_softc_list; +extern krwlock_t *gif_softc_list_lock; extern const struct ip6protosw in6_gif_protosw; @@ -90,8 +91,8 @@ in6_gif_output(struct ifnet *ifp, int family, struct mbuf *m) { struct rtentry *rt; struct gif_softc *sc = ifp->if_softc; - struct sockaddr_in6 *sin6_src = satosin6(sc->gif_psrc); - struct sockaddr_in6 *sin6_dst = satosin6(sc->gif_pdst); + struct sockaddr_in6 *sin6_src; + struct sockaddr_in6 *sin6_dst; struct ip6_hdr *ip6; int proto, error; u_int8_t itos, otos; @@ -100,11 +101,17 @@ in6_gif_output(struct ifnet *ifp, int family, struct mbuf *m) struct sockaddr_in6 dst6; } u; + GIF_READ_LOCK(sc); + + sin6_src = satosin6(sc->gif_psrc); + sin6_dst = satosin6(sc->gif_pdst); + if (sin6_src == NULL || sin6_dst == NULL || sin6_src->sin6_family != AF_INET6 || sin6_dst->sin6_family != AF_INET6) { m_freem(m); - return EAFNOSUPPORT; + error = EAFNOSUPPORT; + goto out; } switch (family) { @@ -116,8 +123,10 @@ in6_gif_output(struct ifnet *ifp, int family, struct mbuf *m) proto = IPPROTO_IPV4; if (m->m_len < sizeof(*ip)) { m = m_pullup(m, sizeof(*ip)); - if (!m) - return ENOBUFS; + if (!m) { + error = ENOBUFS; + goto out; + } } ip = mtod(m, struct ip *); itos = ip->ip_tos; @@ -130,8 +139,10 @@ in6_gif_output(struct ifnet *ifp, int family, struct mbuf *m) proto = IPPROTO_IPV6; if (m->m_len < sizeof(*ip6)) { m = m_pullup(m, sizeof(*ip6)); - if (!m) - return ENOBUFS; + if (!m) { + error = ENOBUFS; + goto out; + } } ip6 = mtod(m, struct ip6_hdr *); itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; @@ -144,15 +155,18 @@ in6_gif_output(struct ifnet *ifp, int family, struct mbuf *m) family); #endif m_freem(m); - return EAFNOSUPPORT; + error = EAFNOSUPPORT; + goto out; } /* prepend new IP header */ M_PREPEND(m, sizeof(struct ip6_hdr), M_DONTWAIT); if (m && m->m_len < sizeof(struct ip6_hdr)) m = m_pullup(m, sizeof(struct ip6_hdr)); - if (m == NULL) - return ENOBUFS; + if (m == NULL) { + error = ENOBUFS; + goto out; + } ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_flow = 0; @@ -169,7 +183,8 @@ in6_gif_output(struct ifnet *ifp, int family, struct mbuf *m) ip6->ip6_dst = sin6_dst->sin6_addr; else { m_freem(m); - return ENETUNREACH; + error = ENETUNREACH; + goto out; } if (ifp->if_flags & IFF_LINK1) ip_ecn_ingress(ECN_ALLOWED, &otos, &itos); @@ -179,16 +194,22 @@ in6_gif_output(struct ifnet *ifp, int family, struct mbuf *m) ip6->ip6_flow |= htonl((u_int32_t)otos << 20); sockaddr_in6_init(&u.dst6, &sin6_dst->sin6_addr, 0, 0, 0); + /* + * XXX future work + * rtcache should be protected somehow (or be removed possibly). + */ if ((rt = rtcache_lookup(&sc->gif_ro, &u.dst)) == NULL) { m_freem(m); - return ENETUNREACH; + error = ENETUNREACH; + goto out; } /* If the route constitutes infinite encapsulation, punt. */ if (rt->rt_ifp == ifp) { rtcache_free(&sc->gif_ro); m_freem(m); - return ENETUNREACH; /* XXX */ + error = ENETUNREACH; /* XXX */ + goto out; } #ifdef IPV6_MINMTU @@ -202,6 +223,8 @@ in6_gif_output(struct ifnet *ifp, int family, struct mbuf *m) error = ip6_output(m, 0, &sc->gif_ro, 0, NULL, NULL, NULL); #endif +out: + GIF_READ_UNLOCK(sc); return (error); } @@ -225,18 +248,22 @@ in6_gif_input(struct mbuf **mp, int *offp, int proto) } #ifndef GIF_ENCAPCHECK struct gif_softc *sc = (struct gif_softc *)gifp->if_softc; + GIF_READ_LOCK(sc); /* other CPU do delete_tunnel */ if (sc->gif_psrc == NULL || sc->gif_pdst == NULL) { + GIF_READ_UNLOCK(sc); m_freem(m); IP6_STATINC(IP6_STAT_NOGIF); return IPPROTO_DONE; } if (!gif_validate6(ip6, sc, m->m_pkthdr.rcvif)) { + GIF_READ_UNLOCK(sc); m_freem(m); IP6_STATINC(IP6_STAT_NOGIF); return IPPROTO_DONE; } + GIF_READ_UNLOCK(sc); #endif otos = ip6->ip6_flow; @@ -300,6 +327,8 @@ gif_validate6(const struct ip6_hdr *ip6, struct gif_softc *sc, { const struct sockaddr_in6 *src, *dst; + KASSERT(GIF_READ_LOCKED(sc)); + src = satosin6(sc->gif_psrc); dst = satosin6(sc->gif_pdst); @@ -352,6 +381,8 @@ gif_encapcheck6(struct mbuf *m, int off, int proto, void *arg) /* sanity check done in caller */ sc = arg; + KASSERT(GIF_READ_LOCKED(sc)); + m_copydata(m, 0, sizeof(ip6), (void *)&ip6); ifp = ((m->m_flags & M_PKTHDR) != 0) ? m->m_pkthdr.rcvif : NULL; @@ -362,6 +393,9 @@ gif_encapcheck6(struct mbuf *m, int off, int proto, void *arg) int in6_gif_attach(struct gif_softc *sc) { + + KASSERT(GIF_WRITE_LOCKED(sc)); + #ifndef GIF_ENCAPCHECK struct sockaddr_in6 mask6; @@ -389,6 +423,8 @@ in6_gif_detach(struct gif_softc *sc) { int error; + KASSERT(GIF_WRITE_LOCKED(sc)); + error = encap_detach(sc->encap_cookie6); if (error == 0) sc->encap_cookie6 = NULL; @@ -433,19 +469,37 @@ in6_gif_ctlinput(int cmd, const struct sockaddr *sa, void *d) * XXX slow. sc (or sc->encap_cookie6) should be passed from * ip_encap.c. */ + + GIF_LIST_READ_LOCK(); LIST_FOREACH(sc, &gif_softc_list, gif_list) { - if ((sc->gif_if.if_flags & IFF_RUNNING) == 0) + /* + * To avoid gif_ro corruption which is being read by + * in6_gif_output(), write lock is required here. + */ + GIF_WRITE_LOCK(sc); + if ((sc->gif_if.if_flags & IFF_RUNNING) == 0) { + GIF_WRITE_UNLOCK(sc); continue; - if (sc->gif_psrc->sa_family != AF_INET6) + } + if (sc->gif_psrc->sa_family != AF_INET6) { + GIF_WRITE_UNLOCK(sc); continue; + } + /* + * XXX future work + * rtcache should be protected somehow (or be removed possibly). + */ dst6 = satocsin6(rtcache_getdst(&sc->gif_ro)); /* XXX scope */ if (dst6 == NULL) ; else if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &dst6->sin6_addr)) rtcache_free(&sc->gif_ro); + + GIF_WRITE_UNLOCK(sc); } + GIF_LIST_READ_UNLOCK(); return NULL; }