diff --git a/sys/compat/common/if_43.c b/sys/compat/common/if_43.c index 0200c5a..86d9922 100644 --- a/sys/compat/common/if_43.c +++ b/sys/compat/common/if_43.c @@ -216,7 +216,7 @@ compat_ifioctl(struct socket *so, u_long ocmd, u_long cmd, void *data, { int error; struct ifreq *ifr = (struct ifreq *)data; - struct ifnet *ifp = ifunit(ifr->ifr_name); + struct ifnet *ifp = ifget(ifr->ifr_name); struct sockaddr *sa; if (ifp == NULL) @@ -265,5 +265,6 @@ compat_ifioctl(struct socket *so, u_long ocmd, u_long cmd, void *data, *(u_int16_t *)&ifr->ifr_addr = ((struct sockaddr *)&ifr->ifr_addr)->sa_family; } + ifput(ifp); return error; } diff --git a/sys/compat/common/uipc_syscalls_50.c b/sys/compat/common/uipc_syscalls_50.c index 116b717..36c060b 100644 --- a/sys/compat/common/uipc_syscalls_50.c +++ b/sys/compat/common/uipc_syscalls_50.c @@ -62,16 +62,16 @@ compat_ifdatareq(struct lwp *l, u_long cmd, void *data) { struct oifdatareq *ifdr = data; struct ifnet *ifp; - int error; + int error = 0; - ifp = ifunit(ifdr->ifdr_name); + ifp = ifget(ifdr->ifdr_name); if (ifp == NULL) return ENXIO; switch (cmd) { case SIOCGIFDATA: ifdatan2o(&ifdr->ifdr_data, &ifp->if_data); - return 0; + break; case SIOCZIFDATA: if (l != NULL) { @@ -80,7 +80,7 @@ compat_ifdatareq(struct lwp *l, u_long cmd, void *data) KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd, NULL); if (error != 0) - return error; + goto out; } ifdatan2o(&ifdr->ifdr_data, &ifp->if_data); /* @@ -89,10 +89,13 @@ compat_ifdatareq(struct lwp *l, u_long cmd, void *data) */ memset(&ifp->if_data.ifi_ipackets, 0, sizeof(ifp->if_data) - offsetof(struct if_data, ifi_ipackets)); - return 0; + break; default: - return EINVAL; + error = EINVAL; } +out: + ifput(ifp); + return error; } #endif diff --git a/sys/net/agr/if_agr.c b/sys/net/agr/if_agr.c index ba70493..747bd3f 100644 --- a/sys/net/agr/if_agr.c +++ b/sys/net/agr/if_agr.c @@ -457,7 +457,7 @@ agr_setconfig(struct agr_softc *sc, const struct agrreq *ar) if (error) { return error; } - ifp_port = ifunit(ifname); + ifp_port = ifget(ifname); if (ifp_port == NULL) { return ENOENT; } @@ -477,6 +477,7 @@ agr_setconfig(struct agr_softc *sc, const struct agrreq *ar) break; } agr_ports_unlock(sc); + ifput(ifp_port); return error; } diff --git a/sys/net/if.c b/sys/net/if.c index 8c86414..8f28a22 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -198,9 +198,33 @@ int ifqmaxlen = IFQ_MAXLEN; * - Some critical sections of IFNET_FOREACH are blockable/sleepable. * We have to restructure them and put bock/sleep operations (e.g., copyout) * out of them to following pserialize's restrictions. - * - ifnet object reference - * - They can be held not only via IFNET_FOREACH but also via ifunit - * and a pointer embedded to another object (e.g., rtentry) + */ + +/* + * ifget/ifput - ifnet object reference + * A replacement of ifunit API. ifget returns an ifnet object as same as ifunit, + * but additionally it takes a reference to the ifnet object. The kernel won't + * free the ifnet object until there is no reference to it. ifput needs to be + * called when finished working on the ifnet object to release it. ifget may + * return NULL when a target interface is absent or being destroyed. + * + * + * Typical usage is like this: + * + * ifp = ifget(name); + * if (ifp == NULL) + * return ENXIO; + * // do something on ifp + * ifput(ifp); + * + * It uses an adaptive lock so it cannot be used in hardware interrupt. + * + * Note that ifget needs no locking, however, ifput needs locking + * to call cv_wait safely. It's good to remove the locking somehow. + * + * TODO: + * - We have to deal with ifnet pointers embedded to other objects, e.g., rtentry. + * An embedded pointer of ifnet can be expired without knowing by its host object. */ static int if_rt_walktree(struct rtentry *, void *); @@ -681,6 +705,8 @@ if_attach(ifnet_t *ifp) ifp->if_capenable = 0; ifp->if_csum_flags_tx = 0; ifp->if_csum_flags_rx = 0; + ifp->if_dying = false; + refcount_init(&ifp->if_refcount, "ifp"); #ifdef ALTQ ifp->if_snd.altq_type = 0; @@ -820,8 +846,11 @@ if_detach(struct ifnet *ifp) TAILQ_REMOVE(&ifnet_list, ifp, if_list); pserialize_perform(ifnet_psz); + refcount_wait(ifp->if_refcount, &ifnet_mtx); IFNET_UNLOCK(); + refcount_destroy(ifp->if_refcount); + /* * Do an if_down() to give protocols a chance to do something. */ @@ -1070,7 +1099,11 @@ if_clone_destroy(const char *name) if (ifc == NULL) return EINVAL; + IFNET_LOCK(); ifp = ifunit(name); + if (ifp != NULL) + ifp->if_dying = true; + IFNET_UNLOCK(); if (ifp == NULL) return ENXIO; @@ -1663,7 +1696,7 @@ ifpromisc(struct ifnet *ifp, int pswitch) struct ifnet * ifunit(const char *name) { - struct ifnet *ifp; + struct ifnet *ifp = NULL; const char *cp = name; u_int unit = 0; u_int i; @@ -1680,20 +1713,48 @@ ifunit(const char *name) */ if (i == IFNAMSIZ || (cp != name && *cp == '\0')) { if (unit >= if_indexlim) - return NULL; + goto out; ifp = ifindex2ifnet[unit]; - if (ifp == NULL || ifp->if_output == if_nulloutput) - return NULL; - return ifp; + if (ifp == NULL || ifp->if_dying || + ifp->if_output == if_nulloutput) + ifp = NULL; + goto out; } IFNET_FOREACH(ifp) { - if (ifp->if_output == if_nulloutput) + if (ifp->if_dying || ifp->if_output == if_nulloutput) continue; if (strcmp(ifp->if_xname, name) == 0) - return ifp; + goto out; + } + ifp = NULL; +out: + return ifp; +} + +struct ifnet * +ifget(const char *name) +{ + struct ifnet *ifp; + int s; + + IFNET_RENTER(s); + ifp = ifunit(name); + if (ifp != NULL) + refcount_hold(ifp->if_refcount); + IFNET_REXIT(s); + + return ifp; +} + +void +ifput(struct ifnet *ifp) +{ + if (ifp != NULL) { + IFNET_LOCK(); + refcount_release(ifp->if_refcount); + IFNET_UNLOCK(); } - return NULL; } ifnet_t * @@ -1990,7 +2051,7 @@ doifioctl(struct socket *so, u_long cmd, void *data, struct lwp *l) #endif ifr = data; - ifp = ifunit(ifr->ifr_name); + ifp = ifget(ifr->ifr_name); switch (cmd) { case SIOCIFCREATE: @@ -2000,9 +2061,12 @@ doifioctl(struct socket *so, u_long cmd, void *data, struct lwp *l) KAUTH_NETWORK_INTERFACE, KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd, NULL); - if (error != 0) + if (error != 0) { + ifput(ifp); return error; + } } + ifput(ifp); mutex_enter(&if_clone_mtx); r = (cmd == SIOCIFCREATE) ? if_clone_create(ifr->ifr_name) : @@ -2011,7 +2075,9 @@ doifioctl(struct socket *so, u_long cmd, void *data, struct lwp *l) return r; case SIOCIFGCLONERS: - return if_clone_list((struct if_clonereq *)data); + r = if_clone_list((struct if_clonereq *)data); + ifput(ifp); + return r; } if (ifp == NULL) @@ -2049,8 +2115,10 @@ doifioctl(struct socket *so, u_long cmd, void *data, struct lwp *l) KAUTH_NETWORK_INTERFACE, KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd, NULL); - if (error != 0) + if (error != 0) { + ifput(ifp); return error; + } } } @@ -2086,6 +2154,7 @@ doifioctl(struct socket *so, u_long cmd, void *data, struct lwp *l) #endif ifnet_lock_exit(ifp->if_ioctl_lock); + ifput(ifp); return error; } diff --git a/sys/net/if.h b/sys/net/if.h index 97c4f1b..fed2c0e 100644 --- a/sys/net/if.h +++ b/sys/net/if.h @@ -210,11 +210,14 @@ struct ifqueue { }; struct ifnet_lock; +struct refcount; #ifdef _KERNEL #include #include +#include #include +#include struct ifnet_lock { kmutex_t il_lock; /* Protects the critical section. */ @@ -234,6 +237,65 @@ struct ifnet_lock { * before they leave. */ }; + +struct refcount { + kcondvar_t rc_cv; + unsigned int rc_refs; + bool rc_waiting; +}; + +static inline void +refcount_init(struct refcount **rc, const char *wmesg) +{ + struct refcount *rcp; + + rcp = kmem_alloc(sizeof(**rc), KM_SLEEP); + KASSERT(rcp != NULL); + + cv_init(&rcp->rc_cv, wmesg); + rcp->rc_refs = 0; + rcp->rc_waiting = false; + + *rc = rcp; +} + +static inline void +refcount_destroy(struct refcount *rcp) +{ + cv_destroy(&rcp->rc_cv); + kmem_free(rcp, sizeof(*rcp)); +} + +static inline void +refcount_hold(struct refcount *rc) +{ + atomic_inc_uint(&rc->rc_refs); +} + +static inline void +refcount_release(struct refcount *rc) +{ + if (__predict_false(atomic_dec_uint_nv(&rc->rc_refs) == 0 && + rc->rc_waiting)) + cv_broadcast(&rc->rc_cv); +} + +static inline int +refcount_refs(struct refcount *rc) +{ + return rc->rc_refs; +} + +static inline void +refcount_wait(struct refcount *rc, kmutex_t *lock) +{ + rc->rc_waiting = true; + membar_sync(); + while (__predict_false(rc->rc_refs > 0)) { + cv_wait(&rc->rc_cv, lock); + } + rc->rc_waiting = false; +} #endif /* _KERNEL */ /* @@ -342,6 +404,8 @@ typedef struct ifnet { const struct sockaddr *); int (*if_setflags)(struct ifnet *, const short); struct ifnet_lock *if_ioctl_lock; + struct refcount *if_refcount; + bool if_dying; } ifnet_t; #define if_mtu if_data.ifi_mtu @@ -878,6 +942,8 @@ extern int (*ifioctl)(struct socket *, u_long, void *, struct lwp *); int ifioctl_common(struct ifnet *, u_long, void *); int ifpromisc(struct ifnet *, int); struct ifnet *ifunit(const char *); +struct ifnet *ifget(const char *); +void ifput(struct ifnet *ifp); int if_addr_init(ifnet_t *, struct ifaddr *, bool); int if_mcast_op(ifnet_t *, const unsigned long, const struct sockaddr *); int if_flags_set(struct ifnet *, const short); diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c index 1ea2169..8decea7 100644 --- a/sys/net/if_bridge.c +++ b/sys/net/if_bridge.c @@ -784,29 +784,41 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg) struct ifnet *ifs; int error = 0; - ifs = ifunit(req->ifbr_ifsname); + ifs = ifget(req->ifbr_ifsname); if (ifs == NULL) - return (ENOENT); + return ENOENT; - if (sc->sc_if.if_mtu != ifs->if_mtu) - return (EINVAL); + if (sc->sc_if.if_mtu != ifs->if_mtu) { + error = EINVAL; + goto out; + } - if (ifs->if_bridge == sc) - return (EEXIST); + if (ifs->if_bridge == sc) { + error = EEXIST; + goto out; + } - if (ifs->if_bridge != NULL) - return (EBUSY); + if (ifs->if_bridge != NULL) { + error = EBUSY; + goto out; + } - if (ifs->if_input != ether_input) - return EINVAL; + if (ifs->if_input != ether_input) { + error = EINVAL; + goto out; + } /* FIXME: doesn't work with non-IFF_SIMPLEX interfaces */ - if ((ifs->if_flags & IFF_SIMPLEX) == 0) - return EINVAL; + if ((ifs->if_flags & IFF_SIMPLEX) == 0) { + error = EINVAL; + goto out; + } bif = malloc(sizeof(*bif), M_DEVBUF, M_NOWAIT); - if (bif == NULL) - return (ENOMEM); + if (bif == NULL) { + error = ENOMEM; + goto out; + } switch (ifs->if_type) { case IFT_ETHER: @@ -848,7 +860,8 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg) if (bif != NULL) free(bif, M_DEVBUF); } - return (error); + ifput(ifs); + return error; } static int diff --git a/sys/net/if_pppoe.c b/sys/net/if_pppoe.c index 8e6f246..b768c94 100644 --- a/sys/net/if_pppoe.c +++ b/sys/net/if_pppoe.c @@ -884,11 +884,16 @@ pppoe_ioctl(struct ifnet *ifp, unsigned long cmd, void *data) if (parms->eth_ifname[0] != 0) { struct ifnet *eth_if; - eth_if = ifunit(parms->eth_ifname); - if (eth_if == NULL || eth_if->if_dlt != DLT_EN10MB) { + eth_if = ifget(parms->eth_ifname); + if (eth_if == NULL) { sc->sc_eth_if = NULL; return ENXIO; } + if (eth_if->if_dlt != DLT_EN10MB) { + sc->sc_eth_if = NULL; + ifput(eth_if); + return ENXIO; + } if (sc->sc_sppp.pp_if.if_mtu != eth_if->if_mtu - PPPOE_OVERHEAD) { @@ -896,6 +901,7 @@ pppoe_ioctl(struct ifnet *ifp, unsigned long cmd, void *data) PPPOE_OVERHEAD; } sc->sc_eth_if = eth_if; + ifput(eth_if); } if (parms->ac_name != NULL) { size_t s; diff --git a/sys/net/if_vlan.c b/sys/net/if_vlan.c index 9610895..016d4a7 100644 --- a/sys/net/if_vlan.c +++ b/sys/net/if_vlan.c @@ -513,11 +513,13 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, void *data) error = EINVAL; /* check for valid tag */ break; } - if ((pr = ifunit(vlr.vlr_parent)) == 0) { + if ((pr = ifget(vlr.vlr_parent)) == NULL) { error = ENOENT; break; } - if ((error = vlan_config(ifv, pr)) != 0) + error = vlan_config(ifv, pr); + ifput(pr); + if (error != 0) break; ifv->ifv_tag = vlr.vlr_tag; ifp->if_flags |= IFF_RUNNING; diff --git a/sys/net/npf/npf_if.c b/sys/net/npf/npf_if.c index 36c9725..0052e08 100644 --- a/sys/net/npf/npf_if.c +++ b/sys/net/npf/npf_if.c @@ -123,8 +123,9 @@ npf_ifmap_register(const char *ifname) strlcpy(nim->n_ifname, ifname, IFNAMSIZ); KERNEL_LOCK(1, NULL); - if ((ifp = ifunit(ifname)) != NULL) { + if ((ifp = ifget(ifname)) != NULL) { ifp->if_pf_kif = (void *)(uintptr_t)i; + ifput(ifp); } KERNEL_UNLOCK_ONE(NULL); out: diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c index e2b4ee2..df020d0 100644 --- a/sys/netinet/ip_carp.c +++ b/sys/netinet/ip_carp.c @@ -1976,10 +1976,12 @@ carp_ioctl(struct ifnet *ifp, u_long cmd, void *data) break; error = 1; if (carpr.carpr_carpdev[0] != '\0' && - (cdev = ifunit(carpr.carpr_carpdev)) == NULL) - return (EINVAL); - if ((error = carp_set_ifp(sc, cdev))) - return (error); + (cdev = ifget(carpr.carpr_carpdev)) == NULL) + return EINVAL; + error = carp_set_ifp(sc, cdev); + ifput(cdev); + if (error) + return error; if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) { switch (carpr.carpr_state) { case BACKUP: diff --git a/sys/netnatm/natm.c b/sys/netnatm/natm.c index 99445e7..bec37b8 100644 --- a/sys/netnatm/natm.c +++ b/sys/netnatm/natm.c @@ -154,11 +154,15 @@ natm_connect(struct socket *so, struct mbuf *nam, struct lwp *l) * convert interface string to ifp, validate. */ - ifp = ifunit(snatm->snatm_if); - if (ifp == NULL || (ifp->if_flags & IFF_RUNNING) == 0) { + ifp = ifget(snatm->snatm_if); + if (ifp == NULL) + return ENXIO; + if (ifp->if_flags & IFF_RUNNING == 0) { + ifput(ifp); return ENXIO; } if (ifp->if_output != atm_output) { + ifput(ifp); return EAFNOSUPPORT; } @@ -166,8 +170,10 @@ natm_connect(struct socket *so, struct mbuf *nam, struct lwp *l) * register us with the NATM PCB layer */ - if (npcb_add(npcb, ifp, snatm->snatm_vci, snatm->snatm_vpi) != npcb) + if (npcb_add(npcb, ifp, snatm->snatm_vci, snatm->snatm_vpi) != npcb) { + ifput(ifp); return EADDRINUSE; + } /* * enable rx @@ -184,6 +190,7 @@ natm_connect(struct socket *so, struct mbuf *nam, struct lwp *l) return EIO; } splx(s2); + ifput(ifp); soisconnected(so); return error;