diff --git a/sys/dist/pf/net/pf.c b/sys/dist/pf/net/pf.c index d8168d9..6ab14e9 100644 --- a/sys/dist/pf/net/pf.c +++ b/sys/dist/pf/net/pf.c @@ -2980,6 +2980,7 @@ pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer) if ((rt = rtcache_init_noclone(rop)) != NULL) { mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr); mss = max(tcp_mssdflt, mss); + rtcache_unref(rt, rop); } rtcache_free(rop); #endif @@ -5068,6 +5069,7 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif) } u; struct route ro; int ret = 1; + struct rtentry *rt; bzero(&ro, sizeof(ro)); switch (af) { @@ -5084,7 +5086,10 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif) } rtcache_setdst(&ro, &u.dst); - ret = rtcache_init(&ro) != NULL ? 1 : 0; + rt = rtcache_init(&ro); + ret = rt != NULL ? 1 : 0; + if (rt != NULL) + rtcache_unref(rt, &ro); rtcache_free(&ro); return (ret); @@ -5300,6 +5305,7 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, if (rt->rt_flags & RTF_GATEWAY) dst = rt->rt_gateway; + rtcache_unref(rt, ro); /* FIXME dst is NOMPSAFE */ } else { if (TAILQ_EMPTY(&r->rpool.list)) { DPFPRINTF(PF_DEBUG_URGENT, diff --git a/sys/external/bsd/ipf/netinet/ip_fil_netbsd.c b/sys/external/bsd/ipf/netinet/ip_fil_netbsd.c index c6df2a9b..7ae49fd 100644 --- a/sys/external/bsd/ipf/netinet/ip_fil_netbsd.c +++ b/sys/external/bsd/ipf/netinet/ip_fil_netbsd.c @@ -1330,6 +1330,7 @@ done: softc->ipf_frouteok[1]++; # if __NetBSD_Version__ >= 499001100 + rtcache_unref(rt, ro); rtcache_free(ro); # else if (rt) { @@ -1467,6 +1468,7 @@ ipf_fastroute6(struct mbuf *m0, struct mbuf **mpp, fr_info_t *fin, } bad: # if __NetBSD_Version__ >= 499001100 + rtcache_unref(rt, ro); rtcache_free(ro); # else if (ro->ro_rt != NULL) { @@ -1501,6 +1503,7 @@ ipf_verifysrc(fr_info_t *fin) rc = 0; else rc = (fin->fin_ifp == rt->rt_ifp); + rtcache_unref(rt, &iproute); rtcache_free(&iproute); #else dst = (struct sockaddr_in *)&iproute.ro_dst; diff --git a/sys/net/if.c b/sys/net/if.c index ebef3a1..9912e5a 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -1586,6 +1586,7 @@ ifa_psref_init(struct ifaddr *ifa) void ifaref(struct ifaddr *ifa) { + KASSERT(!ISSET(ifa->ifa_flags, IFA_DESTROYING)); ifa->ifa_refcnt++; } @@ -1600,6 +1601,13 @@ ifafree(struct ifaddr *ifa) } } +bool +ifa_is_destroying(struct ifaddr *ifa) +{ + + return ISSET(ifa->ifa_flags, IFA_DESTROYING); +} + void ifa_insert(struct ifnet *ifp, struct ifaddr *ifa) { diff --git a/sys/net/if.h b/sys/net/if.h index 0599d1e..01fd721 100644 --- a/sys/net/if.h +++ b/sys/net/if.h @@ -602,6 +602,7 @@ struct ifaddr { #endif }; #define IFA_ROUTE RTF_UP /* (0x01) route installed */ +#define IFA_DESTROYING 0x2 /* * Message format for use in obtaining information about interfaces from @@ -996,6 +997,7 @@ void ifa_psref_init(struct ifaddr *); void ifa_acquire(struct ifaddr *, struct psref *); void ifa_release(struct ifaddr *, struct psref *); bool ifa_held(struct ifaddr *); +bool ifa_is_destroying(struct ifaddr *); void ifaref(struct ifaddr *); void ifafree(struct ifaddr *); diff --git a/sys/net/if_faith.c b/sys/net/if_faith.c index 25d44a9..63d963b 100644 --- a/sys/net/if_faith.c +++ b/sys/net/if_faith.c @@ -322,7 +322,7 @@ faithprefix(struct in6_addr *in6) else ret = 0; if (rt) - rtfree(rt); + rt_unref(rt); return ret; } #endif diff --git a/sys/net/if_mpls.c b/sys/net/if_mpls.c index bac784f..1db8227 100644 --- a/sys/net/if_mpls.c +++ b/sys/net/if_mpls.c @@ -304,7 +304,7 @@ mpls_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, } err = mpls_send_frame(m, rt1->rt_ifp, rt); - rtfree(rt1); + rt_unref(rt1); return err; } @@ -481,7 +481,7 @@ done: if (error != 0 && m != NULL) m_freem(m); if (rt != NULL) - rtfree(rt); + rt_unref(rt); return error; } diff --git a/sys/net/if_stf.c b/sys/net/if_stf.c index 0afbdbc..55a0abd 100644 --- a/sys/net/if_stf.c +++ b/sys/net/if_stf.c @@ -446,11 +446,13 @@ stf_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, /* If the route constitutes infinite encapsulation, punt. */ if (rt->rt_ifp == ifp) { + rtcache_unref(rt, &sc->sc_ro); rtcache_free(&sc->sc_ro); m_freem(m); ifp->if_oerrors++; return ENETUNREACH; } + rtcache_unref(rt, &sc->sc_ro); ifp->if_opackets++; ifp->if_obytes += m->m_pkthdr.len - sizeof(struct ip); @@ -533,10 +535,10 @@ stf_checkaddr4(struct stf_softc *sc, const struct in_addr *in, (uint32_t)ntohl(sin.sin_addr.s_addr)); #endif if (rt) - rtfree(rt); + rt_unref(rt); return -1; } - rtfree(rt); + rt_unref(rt); } return 0; diff --git a/sys/net/radix.c b/sys/net/radix.c index 7bfafec..bcb5fb7 100644 --- a/sys/net/radix.c +++ b/sys/net/radix.c @@ -1053,7 +1053,8 @@ rn_delayedinit(void **head, int off) { struct delayinit *di; - KASSERT(radix_initialized == 0); + if (radix_initialized) + return; di = kmem_alloc(sizeof(*di), KM_SLEEP); di->head = head; diff --git a/sys/net/route.c b/sys/net/route.c index 9cbeeaf..b32af25 100644 --- a/sys/net/route.c +++ b/sys/net/route.c @@ -117,6 +117,9 @@ __KERNEL_RCSID(0, "$NetBSD: route.c,v 1.182 2016/11/15 01:50:06 ozaki-r Exp $"); #include #include #include +#include +#include +#include #include #include @@ -131,6 +134,13 @@ __KERNEL_RCSID(0, "$NetBSD: route.c,v 1.182 2016/11/15 01:50:06 ozaki-r Exp $"); #define rtcache_debug() 0 #endif /* RTFLUSH_DEBUG */ +#ifdef RT_DEBUG +#define RT_REFCNT_TRACE(rt) printf("%s:%d: rt=%p refcnt=%d\n", \ + __func__, __LINE__, (rt), (rt)->rt_refcnt) +#else +#define RT_REFCNT_TRACE(rt) do {} while (0) +#endif + struct rtstat rtstat; static int rttrash; /* routes not in table but not freed */ @@ -147,6 +157,105 @@ static void rt_timer_queue_remove_all(struct rttimer_queue *); static void rt_timer_remove_all(struct rtentry *); static void rt_timer_timer(void *); +/* + * Locking notes: + * - The routing table is protected by a global rwlock + * - API: RT_RLOCK and friends + * - rtcaches are protected by a global rwlock + * - API: RTCACHE_RLOCK and friends + * - References to a rtentry is managed by reference counting and psref + * - Reference couting is used for temporal reference when a rtentry + * is fetched from the routing table + * - psref is used for temporal reference when a rtentry is fetched + * from a rtcache + * - struct route (rtcache) has struct psref, so we cannot obtain + * a reference twice on the same struct route + * - Befere destroying or updating a rtentry, we have to wait for + * all references left (see below for details) + * - APIs + * - An obtained rtentry via rtalloc1 or rtrequest* must be + * unreferenced by rt_unref + * - An obtained rtentry via rtcache_* must be unreferenced by + * rtcache_unref + * - TODO: once we get a lockless routing table, we should use only + * psref for rtentries + * - rtentry destruction + * - A rtentry is destroyed (freed) only when we call rtrequest(RTM_DELETE) + * - If a caller of rtrequest grabs a reference of a rtentry, the caller + * has a responsibility to destroy the rtentry by itself by calling + * rt_free + * - If not, rtrequest itself does that + * - If rt_free is called in softint, the actual destruction routine is + * deferred to a workqueue + * - rtentry update + * - When updating a rtentry, RTF_UPDATING flag is set + * - If a rtentry is set RTF_UPDATING, fetching the rtentry from + * the routing table or a rtcache results in either of the following + * cases: + * - if the caller runs in softint, the caller fails to fetch + * - otherwise, the caller waits for the update completed and retries + * to fetch (probably succeed to fetch for the second time) + */ + +/* + * Global locks for the routing table and rtcaches. + * Locking order: rtcache_lock => rt_lock + */ +static krwlock_t rt_lock __cacheline_aligned; +#ifdef NET_MPSAFE +#define RT_RLOCK() rw_enter(&rt_lock, RW_READER) +#define RT_WLOCK() rw_enter(&rt_lock, RW_WRITER) +#define RT_UNLOCK() rw_exit(&rt_lock) +#define RT_LOCKED() rw_lock_held(&rt_lock) +#define RT_ASSERT_WLOCK() KASSERT(rw_write_held(&rt_lock)) +#else +#define RT_RLOCK() do {} while (0) +#define RT_WLOCK() do {} while (0) +#define RT_UNLOCK() do {} while (0) +#define RT_LOCKED() false +#define RT_ASSERT_WLOCK() do {} while (0) +#endif + +static krwlock_t rtcache_lock __cacheline_aligned; +#ifdef NET_MPSAFE +#define RTCACHE_RLOCK() rw_enter(&rtcache_lock, RW_READER) +#define RTCACHE_WLOCK() rw_enter(&rtcache_lock, RW_WRITER) +#define RTCACHE_UNLOCK() rw_exit(&rtcache_lock) +#define RTCACHE_ASSERT_WLOCK() KASSERT(rw_write_held(&rtcache_lock)) +#define RTCACHE_WLOCKED() rw_write_held(&rtcache_lock) +#else +#define RTCACHE_RLOCK() do {} while (0) +#define RTCACHE_WLOCK() do {} while (0) +#define RTCACHE_UNLOCK() do {} while (0) +#define RTCACHE_ASSERT_WLOCK() do {} while (0) +#define RTCACHE_WLOCKED() false +#endif + +/* + * mutex and cv that are used to wait for references to a rtentry left + * before updating the rtentry. + */ +static struct { + kmutex_t lock; + kcondvar_t cv; + bool ongoing; + const struct lwp *lwp; +} rt_update_global __cacheline_aligned; + +/* + * A workqueue and stuff that are used to defer the destruction routine + * of rtentries. + */ +static struct { + struct workqueue *wq; + struct work wk; + kmutex_t lock; + struct rtentry *queue[10]; +} rt_free_global __cacheline_aligned; + +/* psref for rtentry */ +static struct psref_class *rt_psref_class __read_mostly; + #ifdef RTFLUSH_DEBUG static int _rtcache_debug = 0; #endif /* RTFLUSH_DEBUG */ @@ -163,6 +272,23 @@ static void rtcache_clear(struct route *); static void rtcache_clear_rtentry(int, struct rtentry *); static void rtcache_invalidate(struct dom_rtlist *); +static void rt_ref(struct rtentry *); + +static struct rtentry * + rtalloc1_locked(const struct sockaddr *, int, bool); +static struct rtentry * + rtcache_validate_locked(struct route *); +static void rtcache_free_locked(struct route *); +static int rtcache_setdst_locked(struct route *, const struct sockaddr *); + +static void rtcache_ref(struct rtentry *, struct route *); + +static void rt_update_wait(void); + +static bool rt_wait_ok(void); +static void rt_wait_refcnt(const char *, struct rtentry *, int); +static void rt_wait_psref(struct rtentry *); + #ifdef DDB static void db_print_sa(const struct sockaddr *); static void db_print_ifa(struct ifaddr *); @@ -320,14 +446,28 @@ route_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, return result; } +static void rt_free_work(struct work *, void *); + void rt_init(void) { + int error; #ifdef RTFLUSH_DEBUG sysctl_net_rtcache_setup(NULL); #endif + mutex_init(&rt_free_global.lock, MUTEX_DEFAULT, IPL_SOFTNET); + rt_psref_class = psref_class_create("rtentry", IPL_SOFTNET); + + error = workqueue_create(&rt_free_global.wq, "rt_free", + rt_free_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE); + if (error) + panic("%s: workqueue_create failed (%d)\n", __func__, error); + + mutex_init(&rt_update_global.lock, MUTEX_DEFAULT, IPL_SOFTNET); + cv_init(&rt_update_global.cv, "rt_update"); + pool_init(&rtentry_pool, sizeof(struct rtentry), 0, 0, 0, "rtentpl", NULL, IPL_SOFTNET); pool_init(&rttimer_pool, sizeof(struct rttimer), 0, 0, 0, "rttmrpl", @@ -351,7 +491,9 @@ rtflushall(int family) if ((dom = pffinddomain(family)) == NULL) return; + RTCACHE_WLOCK(); rtcache_invalidate(&dom->dom_rtcache); + RTCACHE_UNLOCK(); } static void @@ -359,6 +501,8 @@ rtcache(struct route *ro) { struct domain *dom; + RTCACHE_ASSERT_WLOCK(); + rtcache_invariants(ro); KASSERT(ro->_ro_rt != NULL); KASSERT(ro->ro_invalid == false); @@ -406,12 +550,13 @@ dump_rt(const struct rtentry *rt) * will be incremented. The caller has to rtfree it by itself. */ struct rtentry * -rtalloc1(const struct sockaddr *dst, int report) +rtalloc1_locked(const struct sockaddr *dst, int report, bool wait_ok) { rtbl_t *rtbl; struct rtentry *rt; int s; +retry: s = splsoftnet(); rtbl = rt_gettable(dst->sa_family); if (rtbl == NULL) @@ -421,7 +566,34 @@ rtalloc1(const struct sockaddr *dst, int report) if (rt == NULL) goto miss; - rt->rt_refcnt++; + if (!ISSET(rt->rt_flags, RTF_UP)) + goto miss; + + if (ISSET(rt->rt_flags, RTF_UPDATING) && + /* XXX updater should be always able to acquire */ + curlwp != rt_update_global.lwp) { + bool need_lock = false; + if (!wait_ok || !rt_wait_ok()) + goto miss; + RT_UNLOCK(); + splx(s); + + /* XXX need more proper solution */ + if (RTCACHE_WLOCKED()) { + RTCACHE_UNLOCK(); + need_lock = true; + } + + /* We can wait until the update is complete */ + rt_update_wait(); + + if (need_lock) + RTCACHE_WLOCK(); + goto retry; + } + + rt_ref(rt); + RT_REFCNT_TRACE(rt); splx(s); return rt; @@ -438,49 +610,211 @@ miss: return NULL; } -#if defined(DEBUG) && !defined(NET_MPSAFE) -/* - * Check the following constraint for each rtcache: - * if a rtcache holds a rtentry, the rtentry's refcnt is more than zero, - * i.e., the rtentry should be referenced at least by the rtcache. - */ +struct rtentry * +rtalloc1(const struct sockaddr *dst, int report) +{ + struct rtentry *rt; + + RT_RLOCK(); + rt = rtalloc1_locked(dst, report, true); + RT_UNLOCK(); + + return rt; +} + static void -rtcache_check_rtrefcnt(int family) +rt_ref(struct rtentry *rt) { - struct domain *dom = pffinddomain(family); - struct route *ro; - if (dom == NULL) - return; - - LIST_FOREACH(ro, &dom->dom_rtcache, ro_rtcache_next) - KDASSERT(ro->_ro_rt == NULL || ro->_ro_rt->rt_refcnt > 0); + KASSERT(rt->rt_refcnt >= 0); + atomic_inc_uint(&rt->rt_refcnt); } -#endif void -rtfree(struct rtentry *rt) +rt_unref(struct rtentry *rt) { - struct ifaddr *ifa; KASSERT(rt != NULL); + KASSERTMSG(rt->rt_refcnt > 0, "refcnt=%d", rt->rt_refcnt); + + atomic_dec_uint(&rt->rt_refcnt); + if (!ISSET(rt->rt_flags, RTF_UP) || ISSET(rt->rt_flags, RTF_UPDATING)) { + mutex_enter(&rt_free_global.lock); + cv_broadcast(&rt->rt_cv); + mutex_exit(&rt_free_global.lock); + } +} + +static bool +rt_wait_ok(void) +{ + + KASSERT(!cpu_intr_p()); + return !cpu_softintr_p(); +} + +void +rt_wait_refcnt(const char *title, struct rtentry *rt, int cnt) +{ + mutex_enter(&rt_free_global.lock); + while (rt->rt_refcnt > cnt) { + log(LOG_DEBUG, "%s: %s waiting (refcnt=%d)\n", + __func__, title, rt->rt_refcnt); + cv_wait(&rt->rt_cv, &rt_free_global.lock); + log(LOG_DEBUG, "%s: %s waited (refcnt=%d)\n", + __func__, title, rt->rt_refcnt); + } + mutex_exit(&rt_free_global.lock); +} + +void +rt_wait_psref(struct rtentry *rt) +{ + + psref_target_destroy(&rt->rt_psref, rt_psref_class); + psref_target_init(&rt->rt_psref, rt_psref_class); +} + +static void +_rt_free(struct rtentry *rt) +{ + struct ifaddr *ifa; + + /* + * Need to avoid a deadlock on rt_wait_refcnt of update + * and a conflict on psref_target_destroy of update. + */ + rt_update_wait(); + + RT_REFCNT_TRACE(rt); + KASSERTMSG(rt->rt_refcnt >= 0, "refcnt=%d", rt->rt_refcnt); + rt_wait_refcnt("free", rt, 0); + psref_target_destroy(&rt->rt_psref, rt_psref_class); + + rt_assert_inactive(rt); + rttrash--; + ifa = rt->rt_ifa; + rt->rt_ifa = NULL; + ifafree(ifa); + rt->rt_ifp = NULL; + cv_destroy(&rt->rt_cv); + rt_destroy(rt); + pool_put(&rtentry_pool, rt); +} + +static void +rt_free_work(struct work *wk, void *arg) +{ + int i; + struct rtentry *rt; + +restart: + mutex_enter(&rt_free_global.lock); + for (i = 0; i < sizeof(rt_free_global.queue); i++) { + if (rt_free_global.queue[i] == NULL) + continue; + rt = rt_free_global.queue[i]; + rt_free_global.queue[i] = NULL; + mutex_exit(&rt_free_global.lock); + + atomic_dec_uint(&rt->rt_refcnt); + _rt_free(rt); + goto restart; + } + mutex_exit(&rt_free_global.lock); +} + +void +rt_free(struct rtentry *rt) +{ + KASSERT(rt->rt_refcnt > 0); + if (!rt_wait_ok()) { + int i; + mutex_enter(&rt_free_global.lock); + for (i = 0; i < sizeof(rt_free_global.queue); i++) { + if (rt_free_global.queue[i] == NULL) { + rt_free_global.queue[i] = rt; + break; + } + } + KASSERT(i < sizeof(rt_free_global.queue)); + rt_ref(rt); + mutex_exit(&rt_free_global.lock); + workqueue_enqueue(rt_free_global.wq, &rt_free_global.wk, NULL); + } else { + atomic_dec_uint(&rt->rt_refcnt); + _rt_free(rt); + } +} + +static void +rt_update_wait(void) +{ + + mutex_enter(&rt_update_global.lock); + while (rt_update_global.ongoing) { + log(LOG_DEBUG, "%s: waiting lwp=%p\n", __func__, curlwp); + cv_wait(&rt_update_global.cv, &rt_update_global.lock); + log(LOG_DEBUG, "%s: waited lwp=%p\n", __func__, curlwp); + } + mutex_exit(&rt_update_global.lock); +} + +int +rt_update_prepare(struct rtentry *rt) +{ + + log(LOG_DEBUG, "%s: updating rt=%p lwp=%p\n", __func__, rt, curlwp); + + RTCACHE_WLOCK(); + RT_WLOCK(); + /* If the entry is being destroyed, don't proceed the update. */ + if (!ISSET(rt->rt_flags, RTF_UP)) { + RT_UNLOCK(); + RTCACHE_UNLOCK(); + return -1; + } + rt->rt_flags |= RTF_UPDATING; + RT_UNLOCK(); + RTCACHE_UNLOCK(); - rt->rt_refcnt--; -#if defined(DEBUG) && !defined(NET_MPSAFE) - if (rt_getkey(rt) != NULL) - rtcache_check_rtrefcnt(rt_getkey(rt)->sa_family); -#endif - if (rt->rt_refcnt == 0 && (rt->rt_flags & RTF_UP) == 0) { - rt_assert_inactive(rt); - rttrash--; - ifa = rt->rt_ifa; - rt->rt_ifa = NULL; - ifafree(ifa); - rt->rt_ifp = NULL; - rt_destroy(rt); - pool_put(&rtentry_pool, rt); + mutex_enter(&rt_update_global.lock); + while (rt_update_global.ongoing) { + log(LOG_DEBUG, "%s: waiting ongoing updating rt=%p lwp=%p\n", + __func__, rt, curlwp); + cv_wait(&rt_update_global.cv, &rt_update_global.lock); + log(LOG_DEBUG, "%s: waited ongoing updating rt=%p lwp=%p\n", + __func__, rt, curlwp); } + rt_update_global.ongoing = true; + /* XXX need it to avoid rt_update_wait by updater itself. */ + rt_update_global.lwp = curlwp; + mutex_exit(&rt_update_global.lock); + + rt_wait_refcnt("update", rt, 1); + rt_wait_psref(rt); + + return 0; +} + +void +rt_update_finish(struct rtentry *rt) +{ + + RTCACHE_WLOCK(); + RT_WLOCK(); + rt->rt_flags &= ~RTF_UPDATING; + RT_UNLOCK(); + RTCACHE_UNLOCK(); + + mutex_enter(&rt_update_global.lock); + rt_update_global.ongoing = false; + rt_update_global.lwp = NULL; + cv_broadcast(&rt_update_global.cv); + mutex_exit(&rt_update_global.lock); + + log(LOG_DEBUG, "%s: updated rt=%p lwp=%p\n", __func__, rt, curlwp); } /* @@ -549,7 +883,7 @@ rtredirect(const struct sockaddr *dst, const struct sockaddr *gateway, */ create: if (rt != NULL) - rtfree(rt); + rt_unref(rt); flags |= RTF_GATEWAY | RTF_DYNAMIC; memset(&info, 0, sizeof(info)); info.rti_info[RTAX_DST] = dst; @@ -567,6 +901,10 @@ rtredirect(const struct sockaddr *dst, const struct sockaddr *gateway, * Smash the current notion of the gateway to * this destination. Should check about netmask!!! */ + /* + * FIXME NOMPAFE: the rtentry is updated with the existence + * of refeferences of it. + */ error = rt_setgate(rt, gateway); if (error == 0) { rt->rt_flags |= RTF_MODIFIED; @@ -581,7 +919,7 @@ done: if (rtp != NULL && !error) *rtp = rt; else - rtfree(rt); + rt_unref(rt); } out: if (error) @@ -622,8 +960,6 @@ rtdeletemsg(struct rtentry *rt) rt_missmsg(RTM_DELETE, &info, info.rti_flags, error); - if (error == 0) - rtfree(retrt); return error; } @@ -674,7 +1010,7 @@ ifa_ifwithroute_psref(int flags, const struct sockaddr *dst, if (ifa != NULL) ifa_acquire(ifa, psref); pserialize_read_exit(s); - rtfree(rt); + rt_unref(rt); if (ifa == NULL) return NULL; } @@ -733,7 +1069,10 @@ rtrequest_newmsg(const int req, const struct sockaddr *dst, KASSERT(ret_nrt != NULL); rt_newmsg(req, ret_nrt); /* tell user process */ - rtfree(ret_nrt); + if (req == RTM_DELETE) + rt_free(ret_nrt); + else + rt_unref(ret_nrt); return 0; } @@ -824,8 +1163,11 @@ rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt) int bound = 0; struct ifnet *ifp = NULL; bool need_to_release_ifa = true; + bool need_unlock = true; #define senderr(x) { error = x ; goto bad; } + RT_WLOCK(); + bound = curlwp_bind(); if ((rtbl = rt_gettable(dst->sa_family)) == NULL) senderr(ESRCH); @@ -856,16 +1198,21 @@ rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt) ifa = NULL; } rttrash++; + if (ret_nrt) { + *ret_nrt = rt; + rt_ref(rt); + RT_REFCNT_TRACE(rt); + } + RT_UNLOCK(); + need_unlock = false; rt_timer_remove_all(rt); - if (ret_nrt) { - *ret_nrt = rt; - rt->rt_refcnt++; - } else if (rt->rt_refcnt <= 0) { - /* Adjust the refcount */ - rt->rt_refcnt++; - rtfree(rt); - } rtcache_clear_rtentry(dst->sa_family, rt); + if (ret_nrt == NULL) { + /* Adjust the refcount */ + rt_ref(rt); + RT_REFCNT_TRACE(rt); + rt_free(rt); + } break; case RTM_ADD: @@ -920,12 +1267,15 @@ rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt) } else rt->rt_ifp = ifa->ifa_ifp; pserialize_read_exit(ss); + cv_init(&rt->rt_cv, "rtentry"); + psref_target_init(&rt->rt_psref, rt_psref_class); RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); rc = rt_addaddr(rtbl, rt, netmask); RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); if (rc != 0) { ifafree(ifa); /* for rt_set_ifa above */ + cv_destroy(&rt->rt_cv); rt_destroy(rt); pool_put(&rtentry_pool, rt); senderr(rc); @@ -941,8 +1291,11 @@ rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt) RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); if (ret_nrt) { *ret_nrt = rt; - rt->rt_refcnt++; + rt_ref(rt); + RT_REFCNT_TRACE(rt); } + RT_UNLOCK(); + need_unlock = false; rtflushall(dst->sa_family); break; case RTM_GET: @@ -955,7 +1308,8 @@ rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt) senderr(ESRCH); if (ret_nrt != NULL) { *ret_nrt = rt; - rt->rt_refcnt++; + rt_ref(rt); + RT_REFCNT_TRACE(rt); } break; } @@ -964,6 +1318,8 @@ bad: ifa_release(ifa, &psref_ifa); if_put(ifp, &psref_ifp); curlwp_bindx(bound); + if (need_unlock) + RT_UNLOCK(); splx(s); return error; } @@ -989,7 +1345,13 @@ rt_setgate(struct rtentry *rt, const struct sockaddr *gate) RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); if (rt->rt_flags & RTF_GATEWAY) { - struct rtentry *gwrt = rtalloc1(gate, 1); + struct rtentry *gwrt; + + /* XXX we cannot call rtalloc1 if holding the rt lock */ + if (RT_LOCKED()) + gwrt = rtalloc1_locked(gate, 1, false); + else + gwrt = rtalloc1(gate, 1); /* * If we switched gateways, grab the MTU from the new * gateway route if the current MTU, if the current MTU is @@ -1005,7 +1367,7 @@ rt_setgate(struct rtentry *rt, const struct sockaddr *gate) rt->rt_rmx.rmx_mtu > gwrt->rt_rmx.rmx_mtu) { rt->rt_rmx.rmx_mtu = gwrt->rt_rmx.rmx_mtu; } - rtfree(gwrt); + rt_unref(gwrt); } } KASSERT(rt->_rt_key != NULL); @@ -1076,11 +1438,11 @@ rtinit(struct ifaddr *ifa, int cmd, int flags) } if ((rt = rtalloc1(dst, 0)) != NULL) { if (rt->rt_ifa != ifa) { - rtfree(rt); + rt_unref(rt); return (flags & RTF_HOST) ? EHOSTUNREACH : ENETUNREACH; } - rtfree(rt); + rt_unref(rt); } } memset(&info, 0, sizeof(info)); @@ -1103,16 +1465,29 @@ rtinit(struct ifaddr *ifa, int cmd, int flags) return error; rt = nrt; + RT_REFCNT_TRACE(rt); switch (cmd) { case RTM_DELETE: rt_newmsg(cmd, rt); + rt_free(rt); break; case RTM_LLINFO_UPD: if (cmd == RTM_LLINFO_UPD && ifa->ifa_rtrequest != NULL) ifa->ifa_rtrequest(RTM_LLINFO_UPD, rt, &info); rt_newmsg(RTM_CHANGE, rt); + rt_unref(rt); break; case RTM_ADD: + /* + * FIXME NOMPAFE: the rtentry is updated with the existence + * of refeferences of it. + */ + /* + * XXX it looks just reverting rt_ifa replaced by ifa_rtrequest + * called via rtrequest1. Can we just prevent the replacement + * somehow and remove the following code? And also doesn't + * calling ifa_rtrequest(RTM_ADD) replace rt_ifa again? + */ if (rt->rt_ifa != ifa) { printf("rtinit: wrong ifa (%p) was (%p)\n", ifa, rt->rt_ifa); @@ -1126,9 +1501,10 @@ rtinit(struct ifaddr *ifa, int cmd, int flags) ifa->ifa_rtrequest(RTM_ADD, rt, &info); } rt_newmsg(cmd, rt); + rt_unref(rt); + RT_REFCNT_TRACE(rt); break; } - rtfree(rt); return error; } @@ -1171,14 +1547,15 @@ rt_ifa_addlocal(struct ifaddr *ifa) #ifdef RT_DEBUG dump_rt(nrt); #endif - rtfree(nrt); + rt_unref(nrt); + RT_REFCNT_TRACE(nrt); } } else { e = 0; rt_newaddrmsg(RTM_NEWADDR, ifa, 0, NULL); } if (rt != NULL) - rtfree(rt); + rt_unref(rt); return e; } @@ -1212,6 +1589,11 @@ rt_ifa_remlocal(struct ifaddr *ifa, struct ifaddr *alt_ifa) */ if (alt_ifa == NULL) { e = rtdeletemsg(rt); + if (e == 0) { + rt_unref(rt); + rt_free(rt); + rt = NULL; + } rt_newaddrmsg(RTM_DELADDR, ifa, 0, NULL); } else { rt_replace_ifa(rt, alt_ifa); @@ -1220,7 +1602,7 @@ rt_ifa_remlocal(struct ifaddr *ifa, struct ifaddr *alt_ifa) } else rt_newaddrmsg(RTM_DELADDR, ifa, 0, NULL); if (rt != NULL) - rtfree(rt); + rt_unref(rt); return e; } @@ -1252,6 +1634,10 @@ rt_timer_init(void) assert(rt_init_done == 0); + /* XXX should be in rt_init */ + rw_init(&rt_lock); + rw_init(&rtcache_lock); + LIST_INIT(&rttimer_queue_head); callout_init(&rt_timer_ch, CALLOUT_MPSAFE); error = workqueue_create(&rt_timer_wq, "rt_timer", @@ -1277,7 +1663,9 @@ rt_timer_queue_create(u_int timeout) rtq->rtq_timeout = timeout; TAILQ_INIT(&rtq->rtq_head); + RT_WLOCK(); LIST_INSERT_HEAD(&rttimer_queue_head, rtq, rtq_link); + RT_UNLOCK(); return rtq; } @@ -1294,12 +1682,17 @@ rt_timer_queue_remove_all(struct rttimer_queue *rtq) { struct rttimer *r; + RT_ASSERT_WLOCK(); + while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) { LIST_REMOVE(r, rtt_link); TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next); + rt_ref(r->rtt_rt); /* XXX */ + RT_REFCNT_TRACE(r->rtt_rt); + RT_UNLOCK(); (*r->rtt_func)(r->rtt_rt, r); - rtfree(r->rtt_rt); pool_put(&rttimer_pool, r); + RT_WLOCK(); if (rtq->rtq_count > 0) rtq->rtq_count--; else @@ -1312,9 +1705,10 @@ void rt_timer_queue_destroy(struct rttimer_queue *rtq) { + RT_WLOCK(); rt_timer_queue_remove_all(rtq); - LIST_REMOVE(rtq, rtq_link); + RT_UNLOCK(); /* * Caller is responsible for freeing the rttimer_queue structure. @@ -1332,6 +1726,7 @@ rt_timer_remove_all(struct rtentry *rt) { struct rttimer *r; + RT_WLOCK(); while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) { LIST_REMOVE(r, rtt_link); TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next); @@ -1340,8 +1735,8 @@ rt_timer_remove_all(struct rtentry *rt) else printf("rt_timer_remove_all: rtq_count reached 0\n"); pool_put(&rttimer_pool, r); - rt->rt_refcnt--; /* XXX */ } + RT_UNLOCK(); } int @@ -1352,6 +1747,7 @@ rt_timer_add(struct rtentry *rt, struct rttimer *r; KASSERT(func != NULL); + RT_WLOCK(); /* * If there's already a timer with this action, destroy it before * we add a new one. @@ -1367,16 +1763,16 @@ rt_timer_add(struct rtentry *rt, r->rtt_queue->rtq_count--; else printf("rt_timer_add: rtq_count reached 0\n"); - rtfree(r->rtt_rt); } else { r = pool_get(&rttimer_pool, PR_NOWAIT); - if (r == NULL) + if (r == NULL) { + RT_UNLOCK(); return ENOBUFS; + } } memset(r, 0, sizeof(*r)); - rt->rt_refcnt++; r->rtt_rt = rt; r->rtt_time = time_uptime; r->rtt_func = func; @@ -1385,6 +1781,8 @@ rt_timer_add(struct rtentry *rt, TAILQ_INSERT_TAIL(&queue->rtq_head, r, rtt_next); r->rtt_queue->rtq_count++; + RT_UNLOCK(); + return 0; } @@ -1393,24 +1791,26 @@ rt_timer_work(struct work *wk, void *arg) { struct rttimer_queue *rtq; struct rttimer *r; - int s; - s = splsoftnet(); + RT_WLOCK(); LIST_FOREACH(rtq, &rttimer_queue_head, rtq_link) { while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL && (r->rtt_time + rtq->rtq_timeout) < time_uptime) { LIST_REMOVE(r, rtt_link); TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next); + rt_ref(r->rtt_rt); /* XXX */ + RT_REFCNT_TRACE(r->rtt_rt); + RT_UNLOCK(); (*r->rtt_func)(r->rtt_rt, r); - rtfree(r->rtt_rt); pool_put(&rttimer_pool, r); + RT_WLOCK(); if (rtq->rtq_count > 0) rtq->rtq_count--; else printf("rt_timer_timer: rtq_count reached 0\n"); } } - splx(s); + RT_UNLOCK(); callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL); } @@ -1425,14 +1825,24 @@ rt_timer_timer(void *arg) static struct rtentry * _rtcache_init(struct route *ro, int flag) { + struct rtentry *rt; + rtcache_invariants(ro); KASSERT(ro->_ro_rt == NULL); + RTCACHE_ASSERT_WLOCK(); if (rtcache_getdst(ro) == NULL) return NULL; ro->ro_invalid = false; - if ((ro->_ro_rt = rtalloc1(rtcache_getdst(ro), flag)) != NULL) + rt = rtalloc1(rtcache_getdst(ro), flag); + if (rt != NULL && ISSET(rt->rt_flags, RTF_UP)) { + ro->_ro_rt = rt; + KASSERT(!ISSET(rt->rt_flags, RTF_UPDATING)); + rtcache_ref(rt, ro); + rt_unref(rt); rtcache(ro); + } else if (rt != NULL) + rt_unref(rt); rtcache_invariants(ro); return ro->_ro_rt; @@ -1441,51 +1851,161 @@ _rtcache_init(struct route *ro, int flag) struct rtentry * rtcache_init(struct route *ro) { - return _rtcache_init(ro, 1); + struct rtentry *rt; + RTCACHE_WLOCK(); + rt = _rtcache_init(ro, 1); + RTCACHE_UNLOCK(); + return rt; } struct rtentry * rtcache_init_noclone(struct route *ro) { - return _rtcache_init(ro, 0); + struct rtentry *rt; + RTCACHE_WLOCK(); + rt = _rtcache_init(ro, 0); + RTCACHE_UNLOCK(); + return rt; } struct rtentry * rtcache_update(struct route *ro, int clone) { + struct rtentry *rt; + RTCACHE_WLOCK(); rtcache_clear(ro); - return _rtcache_init(ro, clone); + rt = _rtcache_init(ro, clone); + RTCACHE_UNLOCK(); + return rt; } void -rtcache_copy(struct route *new_ro, const struct route *old_ro) +rtcache_copy(struct route *new_ro, struct route *old_ro) { struct rtentry *rt; + int ret; KASSERT(new_ro != old_ro); rtcache_invariants(new_ro); rtcache_invariants(old_ro); - if ((rt = rtcache_validate(old_ro)) != NULL) - rt->rt_refcnt++; + rt = rtcache_validate(old_ro); - if (rtcache_getdst(old_ro) == NULL || - rtcache_setdst(new_ro, rtcache_getdst(old_ro)) != 0) - return; + if (rtcache_getdst(old_ro) == NULL) + goto out; + ret = rtcache_setdst(new_ro, rtcache_getdst(old_ro)); + if (ret != 0) + goto out; + RTCACHE_WLOCK(); new_ro->ro_invalid = false; if ((new_ro->_ro_rt = rt) != NULL) rtcache(new_ro); rtcache_invariants(new_ro); + RTCACHE_UNLOCK(); +out: + rtcache_unref(rt, old_ro); + return; } static struct dom_rtlist invalid_routes = LIST_HEAD_INITIALIZER(dom_rtlist); +#ifdef RT_DEBUG +static void +rtcache_trace(const char *func, struct rtentry *rt, struct route *ro) +{ + char dst[64]; + + sockaddr_format(ro->ro_sa, dst, 64); + printf("trace: %s:\tdst=%s cpu=%d lwp=%p psref=%p target=%p\n", func, dst, + cpu_index(curcpu()), curlwp, &ro->ro_psref, &rt->rt_psref); +} +#define RTCACHE_PSREF_TRACE(rt, ro) rtcache_trace(__func__, (rt), (ro)) +#else +#define RTCACHE_PSREF_TRACE(rt, ro) do {} while (0) +#endif + +static void +rtcache_ref(struct rtentry *rt, struct route *ro) +{ + + KASSERT(rt != NULL); + +#ifdef NET_MPSAFE + RTCACHE_PSREF_TRACE(rt, ro); + ro->ro_bound = curlwp_bind(); + psref_acquire(&ro->ro_psref, &rt->rt_psref, rt_psref_class); +#endif +} + +void +rtcache_unref(struct rtentry *rt, struct route *ro) +{ + + if (rt == NULL) + return; + +#ifdef NET_MPSAFE + psref_release(&ro->ro_psref, &rt->rt_psref, rt_psref_class); + curlwp_bindx(ro->ro_bound); + RTCACHE_PSREF_TRACE(rt, ro); +#endif +} + +static struct rtentry * +rtcache_validate_locked(struct route *ro) +{ + struct rtentry *rt = NULL; + +retry: + rt = ro->_ro_rt; + rtcache_invariants(ro); + + if (ro->ro_invalid) { + rt = NULL; + goto out; + } + + RT_RLOCK(); + if (rt != NULL && (rt->rt_flags & RTF_UP) != 0 && rt->rt_ifp != NULL) { + if (ISSET(rt->rt_flags, RTF_UPDATING)) { + if (rt_wait_ok()) { + RT_UNLOCK(); + RTCACHE_UNLOCK(); + /* We can wait until the update is complete */ + rt_update_wait(); + RTCACHE_RLOCK(); + goto retry; + } else { + rt = NULL; + } + } else + rtcache_ref(rt, ro); + } else + rt = NULL; + RT_UNLOCK(); +out: + return rt; +} + +struct rtentry * +rtcache_validate(struct route *ro) +{ + struct rtentry *rt; + + RTCACHE_RLOCK(); + rt = rtcache_validate_locked(ro); + RTCACHE_UNLOCK(); + return rt; +} + static void rtcache_invalidate(struct dom_rtlist *rtlist) { struct route *ro; + RTCACHE_ASSERT_WLOCK(); + while ((ro = LIST_FIRST(rtlist)) != NULL) { rtcache_invariants(ro); KASSERT(ro->_ro_rt != NULL); @@ -1505,66 +2025,84 @@ rtcache_clear_rtentry(int family, struct rtentry *rt) if ((dom = pffinddomain(family)) == NULL) return; + RTCACHE_WLOCK(); LIST_FOREACH_SAFE(ro, &dom->dom_rtcache, ro_rtcache_next, nro) { if (ro->_ro_rt == rt) rtcache_clear(ro); } + RTCACHE_UNLOCK(); } static void rtcache_clear(struct route *ro) { + + RTCACHE_ASSERT_WLOCK(); + rtcache_invariants(ro); if (ro->_ro_rt == NULL) return; LIST_REMOVE(ro, ro_rtcache_next); - rtfree(ro->_ro_rt); ro->_ro_rt = NULL; ro->ro_invalid = false; rtcache_invariants(ro); } struct rtentry * -rtcache_lookup2(struct route *ro, const struct sockaddr *dst, int clone, - int *hitp) +rtcache_lookup2(struct route *ro, const struct sockaddr *dst, + int clone, int *hitp) { const struct sockaddr *odst; struct rtentry *rt = NULL; + RTCACHE_RLOCK(); odst = rtcache_getdst(ro); - if (odst == NULL) + if (odst == NULL) { + RTCACHE_UNLOCK(); + RTCACHE_WLOCK(); goto miss; + } if (sockaddr_cmp(odst, dst) != 0) { - rtcache_free(ro); + RTCACHE_UNLOCK(); + RTCACHE_WLOCK(); + rtcache_free_locked(ro); goto miss; } - rt = rtcache_validate(ro); + rt = rtcache_validate_locked(ro); if (rt == NULL) { + RTCACHE_UNLOCK(); + RTCACHE_WLOCK(); rtcache_clear(ro); goto miss; } - *hitp = 1; rtcache_invariants(ro); + RTCACHE_UNLOCK(); + if (hitp != NULL) + *hitp = 1; return rt; miss: - *hitp = 0; - if (rtcache_setdst(ro, dst) == 0) + if (hitp != NULL) + *hitp = 0; + if (rtcache_setdst_locked(ro, dst) == 0) rt = _rtcache_init(ro, clone); rtcache_invariants(ro); + RTCACHE_UNLOCK(); return rt; } -void -rtcache_free(struct route *ro) +static void +rtcache_free_locked(struct route *ro) { + + RTCACHE_ASSERT_WLOCK(); rtcache_clear(ro); if (ro->ro_sa != NULL) { sockaddr_free(ro->ro_sa); @@ -1573,11 +2111,22 @@ rtcache_free(struct route *ro) rtcache_invariants(ro); } -int -rtcache_setdst(struct route *ro, const struct sockaddr *sa) +void +rtcache_free(struct route *ro) +{ + + RTCACHE_WLOCK(); + rtcache_free_locked(ro); + RTCACHE_UNLOCK(); +} + +static int +rtcache_setdst_locked(struct route *ro, const struct sockaddr *sa) { KASSERT(sa != NULL); + RTCACHE_ASSERT_WLOCK(); + rtcache_invariants(ro); if (ro->ro_sa != NULL) { if (ro->ro_sa->sa_family == sa->sa_family) { @@ -1587,7 +2136,7 @@ rtcache_setdst(struct route *ro, const struct sockaddr *sa) return 0; } /* free ro_sa, wrong family */ - rtcache_free(ro); + rtcache_free_locked(ro); } KASSERT(ro->_ro_rt == NULL); @@ -1600,6 +2149,18 @@ rtcache_setdst(struct route *ro, const struct sockaddr *sa) return 0; } +int +rtcache_setdst(struct route *ro, const struct sockaddr *sa) +{ + int error; + + RTCACHE_WLOCK(); + error = rtcache_setdst_locked(ro, sa); + RTCACHE_UNLOCK(); + + return error; +} + const struct sockaddr * rt_settag(struct rtentry *rt, const struct sockaddr *tag) { @@ -1645,14 +2206,17 @@ rt_delete_matched_entries(sa_family_t family, int (*f)(struct rtentry *, void *) int error; struct rtentry *rt, *retrt = NULL; + RT_RLOCK(); s = splsoftnet(); rt = rtbl_search_matched_entry(family, f, v); if (rt == NULL) { splx(s); + RT_UNLOCK(); return; } rt->rt_refcnt++; splx(s); + RT_UNLOCK(); error = rtrequest(RTM_DELETE, rt_getkey(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags, &retrt); @@ -1660,11 +2224,11 @@ rt_delete_matched_entries(sa_family_t family, int (*f)(struct rtentry *, void *) KASSERT(retrt == rt); KASSERT((retrt->rt_flags & RTF_UP) == 0); retrt->rt_ifp = NULL; - rtfree(rt); - rtfree(retrt); + rt_unref(rt); + rt_free(retrt); } else if (error == ESRCH) { /* Someone deleted the entry already. */ - rtfree(rt); + rt_unref(rt); } else { log(LOG_ERR, "%s: unable to delete rtentry @ %p, " "error = %d\n", rt->rt_ifp->if_xname, rt, error); @@ -1673,6 +2237,18 @@ rt_delete_matched_entries(sa_family_t family, int (*f)(struct rtentry *, void *) } } +int +rt_walktree(sa_family_t family, int (*f)(struct rtentry *, void *), void *v) +{ + int error; + + RT_RLOCK(); + error = rtbl_walktree(family, f, v); + RT_UNLOCK(); + + return error; +} + #ifdef DDB #include diff --git a/sys/net/route.h b/sys/net/route.h index b657fbb..d041b75 100644 --- a/sys/net/route.h +++ b/sys/net/route.h @@ -38,6 +38,12 @@ #include #include #include +#ifdef _KERNEL +#include +#include +#include +#include +#endif #if !(defined(_KERNEL) || defined(_STANDALONE)) #include @@ -60,6 +66,10 @@ struct route { struct sockaddr *ro_sa; LIST_ENTRY(route) ro_rtcache_next; bool ro_invalid; +#ifdef _KERNEL + struct psref ro_psref; + int ro_bound; +#endif }; /* @@ -115,6 +125,10 @@ struct rtentry { struct rtentry *rt_parent; /* parent of cloned route */ struct sockaddr *_rt_key; struct sockaddr *rt_tag; /* route tagging info */ +#ifdef _KERNEL + kcondvar_t rt_cv; + struct psref_target rt_psref; +#endif }; static inline const struct sockaddr * @@ -159,6 +173,7 @@ struct ortentry { #define RTF_ANNOUNCE 0x20000 /* announce new ARP or NDP entry */ #define RTF_LOCAL 0x40000 /* route represents a local address */ #define RTF_BROADCAST 0x80000 /* route represents a bcast address */ +#define RTF_UPDATING 0x100000 /* route is updating */ /* * Routing statistics. @@ -376,10 +391,15 @@ struct rttimer_queue * rt_timer_queue_create(u_int); void rt_timer_queue_destroy(struct rttimer_queue *); +void rt_free(struct rtentry *); +void rt_unref(struct rtentry *); + +int rt_update_prepare(struct rtentry *); +void rt_update_finish(struct rtentry *); + void rt_newmsg(const int, const struct rtentry *); struct rtentry * rtalloc1(const struct sockaddr *, int); -void rtfree(struct rtentry *); int rtinit(struct ifaddr *, int, int); void rtredirect(const struct sockaddr *, const struct sockaddr *, const struct sockaddr *, int, const struct sockaddr *, @@ -410,6 +430,7 @@ struct sockaddr * int rt_check_reject_route(const struct rtentry *, const struct ifnet *); void rt_delete_matched_entries(sa_family_t, int (*)(struct rtentry *, void *), void *); +int rt_walktree(sa_family_t, int (*)(struct rtentry *, void *), void *); static inline void rt_assert_referenced(const struct rtentry *rt) @@ -418,7 +439,7 @@ rt_assert_referenced(const struct rtentry *rt) KASSERT(rt->rt_refcnt > 0); } -void rtcache_copy(struct route *, const struct route *); +void rtcache_copy(struct route *, struct route *); void rtcache_free(struct route *); struct rtentry * rtcache_init(struct route *); @@ -436,7 +457,6 @@ rtcache_invariants(const struct route *ro) { KASSERT(ro->ro_sa != NULL || ro->_ro_rt == NULL); KASSERT(!ro->ro_invalid || ro->_ro_rt != NULL); - KASSERT(ro->_ro_rt == NULL || ro->_ro_rt->rt_refcnt > 0); } static inline struct rtentry * @@ -456,29 +476,15 @@ rtcache_lookup(struct route *ro, const struct sockaddr *dst) static inline const struct sockaddr * rtcache_getdst(const struct route *ro) { + rtcache_invariants(ro); return ro->ro_sa; } -/* If the cache is not empty, and the cached route is still present - * in the routing table, return the cached route. Otherwise, return - * NULL. - */ -static inline struct rtentry * -rtcache_validate(const struct route *ro) -{ - struct rtentry *rt = ro->_ro_rt; +struct rtentry * + rtcache_validate(struct route *); - rtcache_invariants(ro); - - if (ro->ro_invalid) - return NULL; - - if (rt != NULL && (rt->rt_flags & RTF_UP) != 0 && rt->rt_ifp != NULL) - return rt; - return NULL; - -} +void rtcache_unref(struct rtentry *, struct route *); /* rtsock */ void rt_ieee80211msg(struct ifnet *, int, void *, size_t); @@ -505,7 +511,7 @@ struct rtentry * struct rtentry * rt_matchaddr(rtbl_t *, const struct sockaddr *); int rt_refines(const struct sockaddr *, const struct sockaddr *); -int rt_walktree(sa_family_t, int (*)(struct rtentry *, void *), void *); +int rtbl_walktree(sa_family_t, int (*)(struct rtentry *, void *), void *); struct rtentry * rtbl_search_matched_entry(sa_family_t, int (*)(struct rtentry *, void *), void *); diff --git a/sys/net/rtbl.c b/sys/net/rtbl.c index 63924a6..33e5db7 100644 --- a/sys/net/rtbl.c +++ b/sys/net/rtbl.c @@ -190,7 +190,7 @@ rt_walktree_visitor(struct radix_node *rn, void *v) } int -rt_walktree(sa_family_t family, int (*f)(struct rtentry *, void *), void *v) +rtbl_walktree(sa_family_t family, int (*f)(struct rtentry *, void *), void *v) { rtbl_t *t = rt_tables[family]; struct rtwalk rw; diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c index f263f0a..9a4d574d4 100644 --- a/sys/net/rtsock.c +++ b/sys/net/rtsock.c @@ -445,7 +445,7 @@ route_get_sdl_index(struct rt_addrinfo *info, int *sdl_index) * due to changing to ifplo0. */ *sdl_index = satosdl(nrt->rt_gateway)->sdl_index; - rtfree(nrt); + rt_unref(nrt); return 0; } @@ -636,7 +636,8 @@ route_output_change(struct rtentry *rt, struct rt_addrinfo *info, } if (ifa) { struct ifaddr *oifa = rt->rt_ifa; - if (oifa != ifa) { + if (oifa != ifa && + !ifa_is_destroying(ifa) && !if_is_deactivated(new_ifp)) { if (oifa && oifa->ifa_rtrequest) oifa->ifa_rtrequest(RTM_DELETE, rt, info); rt_replace_ifa(rt, ifa); @@ -646,7 +647,8 @@ route_output_change(struct rtentry *rt, struct rt_addrinfo *info, ifa_release(ifa, &psref_ifa); } ifa_release(new_ifa, &psref_new_ifa); - if (new_ifp && rt->rt_ifp != new_ifp) + if (new_ifp && rt->rt_ifp != new_ifp + && !if_is_deactivated(new_ifp)) rt->rt_ifp = new_ifp; rt_setmetrics(rtm->rtm_inits, rtm, rt); if (rt->rt_flags != info->rti_flags) @@ -674,6 +676,7 @@ COMPATNAME(route_output)(struct mbuf *m, struct socket *so) sa_family_t family; struct sockaddr_dl sdl; int bound = curlwp_bind(); + bool do_rt_free = false; #define senderr(e) do { error = e; goto flush;} while (/*CONSTCOND*/ 0) if (m == NULL || ((m->m_len < sizeof(int32_t)) && @@ -784,7 +787,7 @@ COMPATNAME(route_output)(struct mbuf *m, struct socket *so) error = rtrequest1(rtm->rtm_type, &info, &saved_nrt); if (error == 0) { rt_setmetrics(rtm->rtm_inits, rtm, saved_nrt); - rtfree(saved_nrt); + rt_unref(saved_nrt); } break; @@ -804,6 +807,7 @@ COMPATNAME(route_output)(struct mbuf *m, struct socket *so) break; rt = saved_nrt; + do_rt_free = true; info.rti_info[RTAX_DST] = rt_getkey(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rt_mask(rt); @@ -883,7 +887,11 @@ COMPATNAME(route_output)(struct mbuf *m, struct socket *so) break; case RTM_CHANGE: - error = route_output_change(rt, &info, rtm); + error = rt_update_prepare(rt); + if (error == 0) { + error = route_output_change(rt, &info, rtm); + rt_update_finish(rt); + } if (error != 0) goto flush; /*FALLTHROUGH*/ @@ -914,8 +922,12 @@ flush: */ if (old_rtm != NULL) Free(old_rtm); - if (rt) - rtfree(rt); + if (rt) { + if (do_rt_free) + rt_free(rt); + else + rt_unref(rt); + } { struct rawcb *rp = NULL; /* diff --git a/sys/netatalk/ddp_input.c b/sys/netatalk/ddp_input.c index 32926d5..44bfb1b 100644 --- a/sys/netatalk/ddp_input.c +++ b/sys/netatalk/ddp_input.c @@ -279,9 +279,11 @@ ddp_input(struct mbuf *m, struct ifnet *ifp, struct elaphdr *elh, int phase) } #endif if (ddp_firewall && (rt == NULL || rt->rt_ifp != ifp)) { + rtcache_unref(rt, &forwro); m_freem(m); return; } + rtcache_unref(rt, &forwro); ddpe.deh_hops++; ddpe.deh_bytes = htonl(ddpe.deh_bytes); memcpy((void *) deh, (void *) & ddpe, sizeof(u_short));/*XXX*/ diff --git a/sys/netatalk/ddp_output.c b/sys/netatalk/ddp_output.c index d081780..fa4f4c1 100644 --- a/sys/netatalk/ddp_output.c +++ b/sys/netatalk/ddp_output.c @@ -122,6 +122,7 @@ ddp_route(struct mbuf *m, struct route *ro) struct ifnet *ifp = NULL; uint16_t net; uint8_t loopback = 0; + int error; if ((rt = rtcache_validate(ro)) != NULL && (ifp = rt->rt_ifp) != NULL) { const struct sockaddr_at *dst = satocsat(rtcache_getdst(ro)); @@ -153,7 +154,8 @@ ddp_route(struct mbuf *m, struct route *ro) printf("%s: no address found\n", __func__); #endif m_freem(m); - return EINVAL; + error = EINVAL; + goto out; } /* * There are several places in the kernel where data is added to @@ -163,8 +165,10 @@ ddp_route(struct mbuf *m, struct route *ro) */ if (!(aa->aa_flags & AFA_PHASE2)) { M_PREPEND(m, SZ_ELAPHDR, M_DONTWAIT); - if (m == NULL) - return ENOBUFS; + if (m == NULL) { + error = ENOBUFS; + goto out; + } elh = mtod(m, struct elaphdr *); elh->el_snode = satosat(&aa->aa_addr)->sat_addr.s_node; @@ -203,5 +207,9 @@ ddp_route(struct mbuf *m, struct route *ro) #endif looutput(lo0ifp, copym, rtcache_getdst(ro), NULL); } - return if_output_lock(ifp, ifp, m, (struct sockaddr *)&gate, NULL); + + error = if_output_lock(ifp, ifp, m, (struct sockaddr *)&gate, NULL); +out: + rtcache_unref(rt, ro); + return error; } diff --git a/sys/netatalk/ddp_usrreq.c b/sys/netatalk/ddp_usrreq.c index c0933dd..562642b 100644 --- a/sys/netatalk/ddp_usrreq.c +++ b/sys/netatalk/ddp_usrreq.c @@ -226,6 +226,7 @@ at_pcbconnect(struct ddpcb *ddp, struct sockaddr_at *sat) if (aa == NULL || (cdst->sat_addr.s_net != (hintnet ? hintnet : sat->sat_addr.s_net) || cdst->sat_addr.s_node != sat->sat_addr.s_node)) { + rtcache_unref(rt, ro); rtcache_free(ro); rt = NULL; } @@ -254,6 +255,7 @@ at_pcbconnect(struct ddpcb *ddp, struct sockaddr_at *sat) } } else aa = NULL; + rtcache_unref(rt, ro); if (aa == NULL) return ENETUNREACH; ddp->ddp_fsat = *sat; diff --git a/sys/netinet/if_arp.c b/sys/netinet/if_arp.c index 413c442..8e06cf1 100644 --- a/sys/netinet/if_arp.c +++ b/sys/netinet/if_arp.c @@ -894,7 +894,7 @@ notfound: arprequest(ifp, &satocsin(_rt->rt_ifa->ifa_addr)->sin_addr, &satocsin(dst)->sin_addr, enaddr); - rtfree(_rt); + rt_unref(_rt); } return error; } diff --git a/sys/netinet/if_atm.c b/sys/netinet/if_atm.c index 91d18a7..43ba030 100644 --- a/sys/netinet/if_atm.c +++ b/sys/netinet/if_atm.c @@ -219,7 +219,7 @@ atmresolve(const struct rtentry *rt0, struct mbuf *m, const struct sockaddr *dst if ((rt->rt_flags & RTF_GATEWAY) != 0 || /* XXX: are we using LLINFO? */ rt->rt_gateway->sa_family != AF_LINK) { - rtfree(rt); + rt_unref(rt); goto bad; } } @@ -241,12 +241,12 @@ atmresolve(const struct rtentry *rt0, struct mbuf *m, const struct sockaddr *dst if (sdl->sdl_family == AF_LINK && sdl->sdl_alen == sizeof(*desten)) { memcpy(desten, CLLADDR(sdl), sdl->sdl_alen); if (rt != NULL) - rtfree(rt); + rt_unref(rt); return (1); /* ok, go for it! */ } if (rt != NULL) - rtfree(rt); + rt_unref(rt); /* * we got an entry, but it doesn't have valid link address diff --git a/sys/netinet/in.c b/sys/netinet/in.c index aa66f32..27d7b52 100644 --- a/sys/netinet/in.c +++ b/sys/netinet/in.c @@ -848,6 +848,7 @@ in_purgeaddr(struct ifaddr *ifa) KASSERT(!ifa_held(ifa)); + ifa->ifa_flags |= IFA_DESTROYING; in_scrubaddr(ia); mutex_enter(&in_ifaddr_lock); @@ -1772,7 +1773,7 @@ in_selectsrc(struct sockaddr_in *sin, struct route *ro, } if (ia == NULL) { *errorp = EADDRNOTAVAIL; - return NULL; + goto out; } } /* @@ -1802,7 +1803,8 @@ in_selectsrc(struct sockaddr_in *sin, struct route *ro, if (ia != NULL) ia4_release(ia, psref); *errorp = EADDRNOTAVAIL; - return NULL; + ia = NULL; + goto out; } pserialize_read_exit(s); } @@ -1812,7 +1814,7 @@ in_selectsrc(struct sockaddr_in *sin, struct route *ro, sintosa(sin))); if (ia == NULL) { *errorp = EADDRNOTAVAIL; - return NULL; + goto out; } /* FIXME NOMPSAFE */ ia4_acquire(ia, psref); @@ -1821,6 +1823,8 @@ in_selectsrc(struct sockaddr_in *sin, struct route *ro, else printf("%s: missing ifa_getifa\n", __func__); #endif +out: + rtcache_unref(rt, ro); return ia; } @@ -1987,7 +1991,7 @@ in_lltable_rtcheck(struct ifnet *ifp, u_int flags, const struct sockaddr *l3addr error = 0; error: - rtfree(rt); + rt_unref(rt); return error; } diff --git a/sys/netinet/in_gif.c b/sys/netinet/in_gif.c index 36102bd..9f52b86 100644 --- a/sys/netinet/in_gif.c +++ b/sys/netinet/in_gif.c @@ -177,10 +177,12 @@ in_gif_output(struct ifnet *ifp, int family, struct mbuf *m) /* If the route constitutes infinite encapsulation, punt. */ if (rt->rt_ifp == ifp) { + rtcache_unref(rt, &sc->gif_ro); rtcache_free(&sc->gif_ro); m_freem(m); return ENETUNREACH; /*XXX*/ } + rtcache_unref(rt, &sc->gif_ro); error = ip_output(m, NULL, &sc->gif_ro, 0, NULL, NULL); return (error); @@ -329,10 +331,10 @@ gif_validate4(const struct ip *ip, struct gif_softc *sc, struct ifnet *ifp) (u_int32_t)ntohl(u.sin.sin_addr.s_addr)); #endif if (rt != NULL) - rtfree(rt); + rt_unref(rt); return 0; } - rtfree(rt); + rt_unref(rt); } return 32 * 2; diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index 72c93d5..20e3cda 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -777,8 +777,11 @@ in_pcbpurgeif(struct inpcbtable *table, struct ifnet *ifp) if (inp->inp_af != AF_INET) continue; if ((rt = rtcache_validate(&inp->inp_route)) != NULL && - rt->rt_ifp == ifp) + rt->rt_ifp == ifp) { + rtcache_unref(rt, &inp->inp_route); in_rtchange(inp, 0); + } else + rtcache_unref(rt, &inp->inp_route); } } @@ -805,10 +808,17 @@ in_losing(struct inpcb *inp) info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rt_mask(rt); rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0); - if (rt->rt_flags & RTF_DYNAMIC) - (void) rtrequest(RTM_DELETE, rt_getkey(rt), - rt->rt_gateway, rt_mask(rt), rt->rt_flags, - NULL); + if (rt->rt_flags & RTF_DYNAMIC) { + int error; + struct rtentry *nrt; + + error = rtrequest(RTM_DELETE, rt_getkey(rt), + rt->rt_gateway, rt_mask(rt), rt->rt_flags, &nrt); + rtcache_unref(rt, &inp->inp_route); + if (error == 0) + rt_free(nrt); + } else + rtcache_unref(rt, &inp->inp_route); /* * A new route can be allocated * the next time output is attempted. @@ -1088,3 +1098,10 @@ in_pcbrtentry(struct inpcb *inp) sockaddr_in_init(&u.dst4, &inp->inp_faddr, 0); return rtcache_lookup(ro, &u.dst); } + +void +in_pcbrtentry_unref(struct rtentry *rt, struct inpcb *inp) +{ + + rtcache_unref(rt, &inp->inp_route); +} diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h index 241e188..6eecf1c 100644 --- a/sys/netinet/in_pcb.h +++ b/sys/netinet/in_pcb.h @@ -160,6 +160,7 @@ void in_setpeeraddr(struct inpcb *, struct sockaddr_in *); void in_setsockaddr(struct inpcb *, struct sockaddr_in *); struct rtentry * in_pcbrtentry(struct inpcb *); +void in_pcbrtentry_unref(struct rtentry *, struct inpcb *); #endif #endif /* !_NETINET_IN_PCB_H_ */ diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c index 864b089..30976f5 100644 --- a/sys/netinet/ip_carp.c +++ b/sys/netinet/ip_carp.c @@ -397,7 +397,7 @@ carp_setroute(struct carp_softc *sc, int cmd) hr_otherif = (rt && rt->rt_ifp != &sc->sc_if && (rt->rt_flags & RTF_CONNECTED)); if (rt != NULL) { - rtfree(rt); + rt_unref(rt); rt = NULL; } @@ -441,7 +441,7 @@ carp_setroute(struct carp_softc *sc, int cmd) break; } if (rt != NULL) { - rtfree(rt); + rt_unref(rt); rt = NULL; } break; diff --git a/sys/netinet/ip_etherip.c b/sys/netinet/ip_etherip.c index 0d8180b..43cebb0 100644 --- a/sys/netinet/ip_etherip.c +++ b/sys/netinet/ip_etherip.c @@ -167,10 +167,12 @@ ip_etherip_output(struct ifnet *ifp, struct mbuf *m) /* if it constitutes infinite encapsulation, punt. */ if (rt->rt_ifp == ifp) { + rtcache_unref(rt, &sc->sc_ro); rtcache_free(&sc->sc_ro); m_freem(m); return ENETUNREACH; /*XXX*/ } + rtcache_unref(rt, &sc->sc_ro); error = ip_output(m, NULL, &sc->sc_ro, 0, NULL, NULL); diff --git a/sys/netinet/ip_flow.c b/sys/netinet/ip_flow.c index 3f81631..ed9cecb 100644 --- a/sys/netinet/ip_flow.c +++ b/sys/netinet/ip_flow.c @@ -208,7 +208,7 @@ ipflow_fastforward(struct mbuf *m) struct ip *ip; struct ip ip_store; struct ipflow *ipf; - struct rtentry *rt; + struct rtentry *rt = NULL; const struct sockaddr *dst; int error; int iplen; @@ -258,7 +258,7 @@ ipflow_fastforward(struct mbuf *m) M_CSUM_IPv4_BAD)) { case M_CSUM_IPv4|M_CSUM_IPv4_BAD: m_put_rcvif(ifp, &s); - goto out; + goto out_unref; case M_CSUM_IPv4: /* Checksum was okay. */ @@ -268,7 +268,7 @@ ipflow_fastforward(struct mbuf *m) /* Must compute it ourselves. */ if (in_cksum(m, sizeof(struct ip)) != 0) { m_put_rcvif(ifp, &s); - goto out; + goto out_unref; } break; } @@ -277,16 +277,16 @@ ipflow_fastforward(struct mbuf *m) /* * Route and interface still up? */ - if ((rt = rtcache_validate(&ipf->ipf_ro)) == NULL || - (rt->rt_ifp->if_flags & IFF_UP) == 0 || + rt = rtcache_validate(&ipf->ipf_ro); + if (rt == NULL || (rt->rt_ifp->if_flags & IFF_UP) == 0 || (rt->rt_flags & (RTF_BLACKHOLE | RTF_BROADCAST)) != 0) - goto out; + goto out_unref; /* * Packet size OK? TTL? */ if (m->m_pkthdr.len > rt->rt_ifp->if_mtu || ip->ip_ttl <= IPTTLDEC) - goto out; + goto out_unref; /* * Clear any in-bound checksum flags for this packet. @@ -359,7 +359,9 @@ ipflow_fastforward(struct mbuf *m) ipf->ipf_errors++; } ret = 1; - out: +out_unref: + rtcache_unref(rt, &ipf->ipf_ro); +out: mutex_exit(&ipflow_lock); return ret; } @@ -370,8 +372,11 @@ ipflow_addstats(struct ipflow *ipf) struct rtentry *rt; uint64_t *ips; - if ((rt = rtcache_validate(&ipf->ipf_ro)) != NULL) + rt = rtcache_validate(&ipf->ipf_ro); + if (rt != NULL) { rt->rt_use += ipf->ipf_uses; + rtcache_unref(rt, &ipf->ipf_ro); + } ips = IP_STAT_GETREF(); ips[IP_STAT_CANTFORWARD] += ipf->ipf_errors + ipf->ipf_dropped; @@ -431,12 +436,15 @@ ipflow_reap(bool just_one) struct ipflow *maybe_ipf = TAILQ_LAST(&ipflowlist, ipflowhead); TAILQ_FOREACH(ipf, &ipflowlist, ipf_list) { + struct rtentry *rt; /* * If this no longer points to a valid route * reclaim it. */ - if (rtcache_validate(&ipf->ipf_ro) == NULL) + rt = rtcache_validate(&ipf->ipf_ro); + if (rt == NULL) goto done; + rtcache_unref(rt, &ipf->ipf_ro); /* * choose the one that's been least recently * used or has had the least uses in the @@ -488,6 +496,7 @@ ipflow_slowtimo_work(struct work *wk, void *arg) } else { ipf->ipf_last_uses = ipf->ipf_uses; rt->rt_use += ipf->ipf_uses; + rtcache_unref(rt, &ipf->ipf_ro); ips = IP_STAT_GETREF(); ips[IP_STAT_TOTAL] += ipf->ipf_uses; ips[IP_STAT_FORWARD] += ipf->ipf_uses; @@ -515,7 +524,7 @@ ipflow_slowtimo(void) } void -ipflow_create(const struct route *ro, struct mbuf *m) +ipflow_create(struct route *ro, struct mbuf *m) { const struct ip *const ip = mtod(m, const struct ip *); struct ipflow *ipf; diff --git a/sys/netinet/ip_icmp.c b/sys/netinet/ip_icmp.c index d92fa8e..fa4a108 100644 --- a/sys/netinet/ip_icmp.c +++ b/sys/netinet/ip_icmp.c @@ -652,7 +652,7 @@ reflect: } } if (rt != NULL) - rtfree(rt); + rt_unref(rt); pfctlinput(PRC_REDIRECT_HOST, sintosa(&icmpsrc)); #if defined(IPSEC) @@ -1148,16 +1148,19 @@ icmp_mtudisc(struct icmp *icp, struct in_addr faddr) error = rtrequest(RTM_ADD, dst, rt->rt_gateway, NULL, RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt); if (error) { - rtfree(rt); + rt_unref(rt); return; } nrt->rt_rmx = rt->rt_rmx; - rtfree(rt); + rt_unref(rt); rt = nrt; } + + if (ip_mtudisc_timeout_q == NULL) + ip_mtudisc_timeout_q = rt_timer_queue_create(ip_mtudisc_timeout); error = rt_timer_add(rt, icmp_mtudisc_timeout, ip_mtudisc_timeout_q); if (error) { - rtfree(rt); + rt_unref(rt); return; } @@ -1205,8 +1208,8 @@ icmp_mtudisc(struct icmp *icp, struct in_addr faddr) } } - if (rt) - rtfree(rt); + if (rt != NULL) + rt_unref(rt); /* * Notify protocols that the MTU for this destination diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index bc5619f..05f2c26 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -287,7 +287,7 @@ static void ipintr(void *); static void ip_input(struct mbuf *); static void ip_forward(struct mbuf *, int, struct ifnet *); static bool ip_dooptions(struct mbuf *); -static struct in_ifaddr *ip_rtaddr(struct in_addr); +static struct in_ifaddr *ip_rtaddr(struct in_addr, struct psref *); static void sysctl_net_inet_ip_setup(struct sysctllog **); static struct in_ifaddr *ip_match_our_address(struct ifnet *, struct ip *, @@ -335,7 +335,6 @@ ip_init(void) ip_ids = ip_id_init(); ip_id = time_uptime & 0xfffff; - ip_mtudisc_timeout_q = rt_timer_queue_create(ip_mtudisc_timeout); #ifdef GATEWAY ipflow_init(); #endif @@ -943,7 +942,8 @@ ip_dooptions(struct mbuf *m) * address is on directly accessible net. */ case IPOPT_LSRR: - case IPOPT_SSRR: + case IPOPT_SSRR: { + struct psref psref; if (ip_allowsrcrt == 0) { type = ICMP_UNREACH; code = ICMP_UNREACH_NET_PROHIB; @@ -989,34 +989,35 @@ ip_dooptions(struct mbuf *m) */ memcpy((void *)&ipaddr.sin_addr, (void *)(cp + off), sizeof(ipaddr.sin_addr)); - s = pserialize_read_enter(); if (opt == IPOPT_SSRR) { - ifa = ifa_ifwithladdr(sintosa(&ipaddr)); + ifa = ifa_ifwithladdr_psref(sintosa(&ipaddr), + &psref); if (ifa != NULL) ia = ifatoia(ifa); else ia = NULL; } else { - ia = ip_rtaddr(ipaddr.sin_addr); + ia = ip_rtaddr(ipaddr.sin_addr, &psref); } if (ia == NULL) { type = ICMP_UNREACH; code = ICMP_UNREACH_SRCFAIL; - pserialize_read_exit(s); goto bad; } ip->ip_dst = ipaddr.sin_addr; bcopy((void *)&ia->ia_addr.sin_addr, (void *)(cp + off), sizeof(struct in_addr)); - pserialize_read_exit(s); + ia4_release(ia, &psref); cp[IPOPT_OFFSET] += sizeof(struct in_addr); /* * Let ip_intr's mcast routing check handle mcast pkts */ forward = !IN_MULTICAST(ip->ip_dst.s_addr); break; + } - case IPOPT_RR: + case IPOPT_RR: { + struct psref psref; if (optlen < IPOPT_OFFSET + sizeof(*cp)) { code = &cp[IPOPT_OLEN] - (u_char *)ip; goto bad; @@ -1037,12 +1038,10 @@ ip_dooptions(struct mbuf *m) * locate outgoing interface; if we're the destination, * use the incoming interface (should be same). */ - s = pserialize_read_enter(); - ifa = ifa_ifwithaddr(sintosa(&ipaddr)); + ifa = ifa_ifwithaddr_psref(sintosa(&ipaddr), &psref); if (ifa == NULL) { - ia = ip_rtaddr(ipaddr.sin_addr); + ia = ip_rtaddr(ipaddr.sin_addr, &psref); if (ia == NULL) { - pserialize_read_exit(s); type = ICMP_UNREACH; code = ICMP_UNREACH_HOST; goto bad; @@ -1052,9 +1051,10 @@ ip_dooptions(struct mbuf *m) } bcopy((void *)&ia->ia_addr.sin_addr, (void *)(cp + off), sizeof(struct in_addr)); - pserialize_read_exit(s); + ia4_release(ia, &psref); cp[IPOPT_OFFSET] += sizeof(struct in_addr); break; + } case IPOPT_TS: code = cp - (u_char *)ip; @@ -1172,7 +1172,7 @@ bad: * return internet address info of interface to be used to get there. */ static struct in_ifaddr * -ip_rtaddr(struct in_addr dst) +ip_rtaddr(struct in_addr dst, struct psref *psref) { struct rtentry *rt; union { @@ -1183,13 +1183,16 @@ ip_rtaddr(struct in_addr dst) sockaddr_in_init(&u.dst4, &dst, 0); - SOFTNET_LOCK(); ro = percpu_getref(ipforward_rt_percpu); rt = rtcache_lookup(ro, &u.dst); - percpu_putref(ipforward_rt_percpu); - SOFTNET_UNLOCK(); - if (rt == NULL) + if (rt == NULL) { + percpu_putref(ipforward_rt_percpu); return NULL; + } + + ia4_acquire(ifatoia(rt->rt_ifa), psref); + rtcache_unref(rt, ro); + percpu_putref(ipforward_rt_percpu); return ifatoia(rt->rt_ifa); } @@ -1349,7 +1352,8 @@ ip_forward(struct mbuf *m, int srcrt, struct ifnet *rcvif) sockaddr_in_init(&u.dst4, &ip->ip_dst, 0); ro = percpu_getref(ipforward_rt_percpu); - if ((rt = rtcache_lookup(ro, &u.dst)) == NULL) { + rt = rtcache_lookup(ro, &u.dst); + if (rt == NULL) { percpu_putref(ipforward_rt_percpu); icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, dest, 0); return; @@ -1393,6 +1397,7 @@ ip_forward(struct mbuf *m, int srcrt, struct ifnet *rcvif) code = ICMP_REDIRECT_HOST; } } + rtcache_unref(rt, ro); error = ip_output(m, NULL, ro, (IP_FORWARDING | (ip_directedbcast ? IP_ALLOWBROADCAST : 0)), @@ -1450,8 +1455,10 @@ error: type = ICMP_UNREACH; code = ICMP_UNREACH_NEEDFRAG; - if ((rt = rtcache_validate(ro)) != NULL) + if ((rt = rtcache_validate(ro)) != NULL) { destmtu = rt->rt_ifp->if_mtu; + rtcache_unref(rt, ro); + } #ifdef IPSEC if (ipsec_used) (void)ipsec4_forward(mcopy, &destmtu); diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index 9663c93..16b5506 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -308,12 +308,15 @@ ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro, int flags, !in_hosteq(dst->sin_addr, ip->ip_dst))) rtcache_free(ro); - if ((rt = rtcache_validate(ro)) == NULL && - (rt = rtcache_update(ro, 1)) == NULL) { - dst = &u.dst4; - error = rtcache_setdst(ro, &u.dst); - if (error != 0) - goto bad; + rt = rtcache_validate(ro); + if (rt == NULL) { + rt = rtcache_update(ro, 1); + if (rt == NULL) { + dst = &u.dst4; + error = rtcache_setdst(ro, &u.dst); + if (error != 0) + goto bad; + } } bound = curlwp_bind(); @@ -361,10 +364,12 @@ ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro, int flags, error = EHOSTUNREACH; goto bad; } - /* - * XXX NOMPSAFE: depends on accessing rt->rt_ifa isn't racy. - * Revisit when working on rtentry MP-ification. - */ + if (ifa_is_destroying(rt->rt_ifa)) { + rtcache_unref(rt, ro); + IP_STATINC(IP_STAT_NOROUTE); + error = EHOSTUNREACH; + goto bad; + } ifa_acquire(rt->rt_ifa, &psref_ia); ia = ifatoia(rt->rt_ifa); ifp = rt->rt_ifp; @@ -765,6 +770,7 @@ sendit: } done: ia4_release(ia, &psref_ia); + rtcache_unref(rt, ro); if (ro == &iproute) { rtcache_free(&iproute); } @@ -1543,6 +1549,7 @@ ip_get_membership(const struct sockopt *sopt, struct ifnet **ifp, if (error != 0) return error; *ifp = (rt = rtcache_init(&ro)) != NULL ? rt->rt_ifp : NULL; + rtcache_unref(rt, &ro); rtcache_free(&ro); } else { *ifp = ip_multicast_if(&mreq.imr_interface, NULL); diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h index 504d25f..dbd6e85 100644 --- a/sys/netinet/ip_var.h +++ b/sys/netinet/ip_var.h @@ -245,7 +245,7 @@ int ip_if_output(struct ifnet * const, struct mbuf * const, /* IP Flow interface. */ void ipflow_init(void); void ipflow_poolinit(void); -void ipflow_create(const struct route *, struct mbuf *); +void ipflow_create(struct route *, struct mbuf *); void ipflow_slowtimo(void); int ipflow_invalidate_all(int); diff --git a/sys/netinet/sctp_output.c b/sys/netinet/sctp_output.c index f3b271f..e739a65 100644 --- a/sys/netinet/sctp_output.c +++ b/sys/netinet/sctp_output.c @@ -1083,6 +1083,7 @@ sctp_ipv4_source_address_selection(struct sctp_inpcb *inp, const struct sockaddr_in *to; struct rtentry *rt; uint8_t ipv4_scope, loopscope; + /* * Rules: * - Find the route if needed, cache if I can. @@ -1176,8 +1177,9 @@ sctp_ipv4_source_address_selection(struct sctp_inpcb *inp, * it is a negative list. Addresses being added * by asconf. */ - return (sctp_choose_v4_boundall(inp, stcb, net, rt, - ipv4_scope, loopscope, non_asoc_addr_ok)); + ans = sctp_choose_v4_boundall(inp, stcb, net, rt, + ipv4_scope, loopscope, non_asoc_addr_ok); + goto out; } /* * Three possiblities here: @@ -1199,15 +1201,19 @@ sctp_ipv4_source_address_selection(struct sctp_inpcb *inp, * the v6 address selection. */ if (stcb) { - return (sctp_choose_v4_boundspecific_stcb(inp, stcb, net, - rt, ipv4_scope, loopscope, non_asoc_addr_ok)); + ans = sctp_choose_v4_boundspecific_stcb(inp, stcb, net, + rt, ipv4_scope, loopscope, non_asoc_addr_ok); + goto out; } else { - return (sctp_choose_v4_boundspecific_inp(inp, rt, - ipv4_scope, loopscope)); + ans = sctp_choose_v4_boundspecific_inp(inp, rt, + ipv4_scope, loopscope); + goto out; } /* this should not be reached */ memset(&ans, 0, sizeof(ans)); - return (ans); +out: + rtcache_unref(rt, ro); + return ans; } @@ -1980,6 +1986,7 @@ sctp_ipv6_source_address_selection(struct sctp_inpcb *inp, /* we can't have a non-asoc address since we have no association */ rt_addr = sctp_choose_v6_boundspecific_inp(inp, rt, loc_scope, loopscope); } + rtcache_unref(rt, ro); if (rt_addr == NULL) { /* no suitable address? */ struct in6_addr in6; @@ -2150,7 +2157,9 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp, if (net == NULL) { ro = &iproute; memset(&iproute, 0, sizeof(iproute)); - rtcache_lookup(ro, to); + /* XXX */ + rt = rtcache_lookup(ro, to); + rtcache_unref(rt, ro); } else { ro = (struct route *)&net->ro; } @@ -2164,9 +2173,11 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp, ((struct sockaddr_in *)&net->_s_addr)->sin_addr = sctp_ipv4_source_address_selection(inp, stcb, ro, net, out_of_asoc_ok); - if (rtcache_validate(ro)) { + rt = rtcache_validate(ro); + if (rt != NULL) { net->src_addr_selected = 1; } + rtcache_unref(rt, ro); } ip->ip_src = ((struct sockaddr_in *)&net->_s_addr)->sin_addr; } else { @@ -2264,6 +2275,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp, net->src_addr_selected = 0; } } + rtcache_unref(rt, ro); return (ret); } #ifdef INET6 @@ -2312,7 +2324,9 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp, if (net == NULL) { memset(&ip6route, 0, sizeof(ip6route)); ro = (struct route *)&ip6route; - rtcache_lookup(ro, (struct sockaddr *) sin6); + /* XXX */ + rt = rtcache_lookup(ro, (struct sockaddr *) sin6); + rtcache_unref(rt, ro); } else { ro = (struct route *)&net->ro; } @@ -2429,6 +2443,8 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp, prev_scope = sin6->sin6_scope_id; prev_port = sin6->sin6_port; } + /* XXX NOMPSAFE need to hold ifp here */ + rtcache_unref(rt, ro); ret = ip6_output(m, ((struct in6pcb *)inp)->in6p_outputopts, ro, o_flgs, @@ -2460,6 +2476,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp, &stcb->asoc, rt->rt_rmx.rmx_mtu); } + rtcache_unref(rt, ro); } else if (ifp) { if (ND_IFINFO(ifp)->linkmtu && (stcb->asoc.smallest_mtu > ND_IFINFO(ifp)->linkmtu)) { @@ -3252,6 +3269,7 @@ sctp_send_initiate_ack(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int cnt_inits_to=0; uint16_t his_limit, i_want; int abort_flag, padval, sz_of; + struct rtentry *rt; if (stcb) { asoc = &stcb->asoc; @@ -3402,7 +3420,9 @@ sctp_send_initiate_ack(struct sctp_inpcb *inp, struct sctp_tcb *stcb, memset(&iproute, 0, sizeof(iproute)); ro = &iproute; - rtcache_lookup(ro, (struct sockaddr *) sin); + /* XXX */ + rt = rtcache_lookup(ro, (struct sockaddr *) sin); + rtcache_unref(rt, ro); addr = sctp_ipv4_source_address_selection(inp, NULL, ro, NULL, 0); stc.laddress[0] = addr.s_addr; @@ -3488,7 +3508,9 @@ sctp_send_initiate_ack(struct sctp_inpcb *inp, struct sctp_tcb *stcb, /* local from address */ memset(&iproute6, 0, sizeof(iproute6)); ro = (struct route *)&iproute6; - rtcache_lookup(ro, (struct sockaddr *) sin6); + /* XXX */ + rt = rtcache_lookup(ro, (struct sockaddr *) sin6); + rtcache_unref(rt, ro); addr = sctp_ipv6_source_address_selection(inp, NULL, ro, NULL, 0); memcpy(&stc.laddress, &addr, sizeof(struct in6_addr)); @@ -5401,6 +5423,7 @@ sctp_med_chunk_output(struct sctp_inpcb *inp, return (0); } TAILQ_FOREACH(net, &asoc->nets, sctp_next) { + struct rtentry *rt; /* how much can we send? */ if (net->ref_count < 2) { /* Ref-count of 1 so we cannot have data or control @@ -5413,7 +5436,8 @@ sctp_med_chunk_output(struct sctp_inpcb *inp, no_fragmentflg = 1; one_chunk = 0; - if (rtcache_validate(&net->ro)) { + rt = rtcache_validate(&net->ro); + if (rt != NULL) { /* if we have a route and an ifp * check to see if we have room to * send to this guy @@ -5425,8 +5449,10 @@ sctp_med_chunk_output(struct sctp_inpcb *inp, #ifdef SCTP_LOG_MAXBURST sctp_log_maxburst(net, ifp->if_snd.ifq_len, ifp->if_snd.ifq_maxlen, SCTP_MAX_IFP_APPLIED); #endif + rtcache_unref(rt, &net->ro); continue; } + rtcache_unref(rt, &net->ro); } if (((struct sockaddr *)&net->ro.ro_sa)->sa_family == AF_INET) { mtu = net->mtu - (sizeof(struct ip) + sizeof(struct sctphdr)); diff --git a/sys/netinet/sctp_pcb.c b/sys/netinet/sctp_pcb.c index 70c4f3a..03866f0 100644 --- a/sys/netinet/sctp_pcb.c +++ b/sys/netinet/sctp_pcb.c @@ -2061,6 +2061,7 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate) struct socket *so; struct sctp_socket_q_list *sq; int s, cnt; + struct rtentry *rt; s = splsoftnet(); SCTP_ASOC_CREATE_LOCK(inp); @@ -2173,7 +2174,9 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate) #endif inp->sctp_flags |= SCTP_PCB_FLAGS_SOCKET_ALLGONE; - rtcache_validate(&ip_pcb->inp_route); + /* XXX */ + rt = rtcache_validate(&ip_pcb->inp_route); + rtcache_unref(rt, &ip_pcb->inp_route); callout_stop(&inp->sctp_ep.signature_change.timer); callout_destroy(&inp->sctp_ep.signature_change.timer); @@ -2608,6 +2611,7 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr, */ TAILQ_INSERT_HEAD(&stcb->asoc.nets, net, sctp_next); } else if (rt->rt_ifp != netfirst_rt->rt_ifp) { + rtcache_unref(netfirst_rt, &netfirst->ro); /* * This one has a different interface than the one at the * top of the list. Place it ahead. @@ -2635,13 +2639,16 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr, TAILQ_INSERT_BEFORE(netfirst, net, sctp_next); break; } else if (netlook_rt->rt_ifp != rt->rt_ifp) { + rtcache_unref(netlook_rt, &netlook->ro); TAILQ_INSERT_AFTER(&stcb->asoc.nets, netlook, net, sctp_next); break; } + rtcache_unref(netlook_rt, &netlook->ro); /* Shift forward */ netfirst = netlook; } while (netlook != NULL); + rtcache_unref(netfirst_rt, &netfirst->ro); } /* got to have a primary set */ if (stcb->asoc.primary_destination == 0) { diff --git a/sys/netinet/sctp_timer.c b/sys/netinet/sctp_timer.c index ad26fad..f90f247 100644 --- a/sys/netinet/sctp_timer.c +++ b/sys/netinet/sctp_timer.c @@ -247,8 +247,10 @@ sctp_find_alternate_net(struct sctp_tcb *stcb, (!(alt->dest_state & SCTP_ADDR_UNCONFIRMED)) ) { /* Found a reachable address */ + rtcache_unref(rt, &alt->ro); break; } + rtcache_unref(rt, &alt->ro); mnet = alt; } while (alt != NULL); @@ -1346,6 +1348,7 @@ void sctp_pathmtu_timer(struct sctp_inpcb *inp, net->mtu = next_mtu; } } + rtcache_unref(rt, &net->ro); } /* restart the timer */ sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net); diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index d0b077e..538e5d0 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -262,11 +262,12 @@ static struct timeval tcp_ackdrop_ppslim_last; static inline void nd6_hint(struct tcpcb *tp) { - struct rtentry *rt; + struct rtentry *rt = NULL; if (tp != NULL && tp->t_in6pcb != NULL && tp->t_family == AF_INET6 && (rt = rtcache_validate(&tp->t_in6pcb->in6p_route)) != NULL) nd6_nud_hint(rt); + rtcache_unref(rt, &tp->t_in6pcb->in6p_route); } #else static inline void @@ -4528,7 +4529,7 @@ int syn_cache_respond(struct syn_cache *sc, struct mbuf *m) { #ifdef INET6 - struct rtentry *rt; + struct rtentry *rt = NULL; #endif struct route *ro; u_int8_t *optp; @@ -4809,6 +4810,7 @@ syn_cache_respond(struct syn_cache *sc, struct mbuf *m) case AF_INET6: ip6->ip6_hlim = in6_selecthlim(NULL, (rt = rtcache_validate(ro)) != NULL ? rt->rt_ifp : NULL); + rtcache_unref(rt, ro); error = ip6_output(m, NULL /*XXX*/, ro, 0, NULL, so, NULL); break; diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index 10e7f09..e131cbe 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -338,6 +338,14 @@ tcp_segsize(struct tcpcb *tp, int *txsegsizep, int *rxsegsizep, } } #endif +#ifdef INET + if (inp) + in_pcbrtentry_unref(rt, inp); +#endif +#ifdef INET6 + if (in6p) + in6_pcbrtentry_unref(rt, in6p); +#endif out: /* * Now we must make room for whatever extra TCP/IP options are in @@ -547,7 +555,7 @@ tcp_build_datapkt(struct tcpcb *tp, struct socket *so, int off, int tcp_output(struct tcpcb *tp) { - struct rtentry *rt; + struct rtentry *rt = NULL; struct socket *so; struct route *ro; long len, win; @@ -638,6 +646,10 @@ tcp_output(struct tcpcb *tp) #endif (rt = rtcache_validate(&tp->t_inpcb->inp_route)) != NULL && (rt->rt_ifp->if_capenable & IFCAP_TSOv4) != 0; + if (rt != NULL) { + rtcache_unref(rt, &tp->t_inpcb->inp_route); + rt = NULL; + } #endif /* defined(INET) */ #if defined(INET6) has_tso6 = tp->t_in6pcb != NULL && @@ -647,6 +659,8 @@ tcp_output(struct tcpcb *tp) #endif (rt = rtcache_validate(&tp->t_in6pcb->in6p_route)) != NULL && (rt->rt_ifp->if_capenable & IFCAP_TSOv6) != 0; + if (rt != NULL) + rtcache_unref(rt, &tp->t_in6pcb->in6p_route); #endif /* defined(INET6) */ has_tso = (has_tso4 || has_tso6) && !alwaysfrag; @@ -1134,6 +1148,14 @@ send: tp->snd_nxt = tp->iss; tp->t_ourmss = tcp_mss_to_advertise(synrt != NULL ? synrt->rt_ifp : NULL, af); +#ifdef INET + if (tp->t_inpcb) + in_pcbrtentry_unref(synrt, tp->t_inpcb); +#endif +#ifdef INET6 + if (tp->t_in6pcb) + in6_pcbrtentry_unref(synrt, tp->t_in6pcb); +#endif if ((tp->t_flags & TF_NOOPT) == 0 && OPT_FITS(4)) { opt[0] = TCPOPT_MAXSEG; opt[1] = 4; diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 812c2df..b4655a5 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -1156,7 +1156,7 @@ tcp_close(struct tcpcb *tp) #endif struct socket *so; #ifdef RTV_RTT - struct rtentry *rt; + struct rtentry *rt = NULL; #endif struct route *ro; int j; @@ -1245,6 +1245,7 @@ tcp_close(struct tcpcb *tp) rt->rt_rmx.rmx_ssthresh = i; } } + rtcache_unref(rt, ro); #endif /* RTV_RTT */ /* free the reassembly queue, if any */ TCP_REASS_LOCK(tp); @@ -1774,6 +1775,7 @@ tcp_mtudisc(struct inpcb *inp, int errno) * If this was not a host route, remove and realloc. */ if ((rt->rt_flags & RTF_HOST) == 0) { + in_pcbrtentry_unref(rt, inp); in_rtchange(inp, errno); if ((rt = in_pcbrtentry(inp)) == NULL) return; @@ -1791,6 +1793,7 @@ tcp_mtudisc(struct inpcb *inp, int errno) tp->snd_cwnd = TCP_INITIAL_WINDOW(tcp_init_win, rt->rt_rmx.rmx_mtu); + in_pcbrtentry_unref(rt, inp); } /* @@ -1833,6 +1836,7 @@ tcp6_mtudisc(struct in6pcb *in6p, int errno) * If this was not a host route, remove and realloc. */ if ((rt->rt_flags & RTF_HOST) == 0) { + in6_pcbrtentry_unref(rt, in6p); in6_rtchange(in6p, errno); rt = in6_pcbrtentry(in6p); if (rt == NULL) @@ -1851,6 +1855,7 @@ tcp6_mtudisc(struct in6pcb *in6p, int errno) tp->snd_cwnd = TCP_INITIAL_WINDOW(tcp_init_win, rt->rt_rmx.rmx_mtu); } + in6_pcbrtentry_unref(rt, in6p); } /* @@ -2039,6 +2044,16 @@ tcp_mss_from_peer(struct tcpcb *tp, int offer) tp->snd_ssthresh = max(2 * mss, rt->rt_rmx.rmx_ssthresh); } #endif +#if defined(RTV_SPIPE) || defined(RTV_SSTHRESH) +#ifdef INET + if (tp->t_inpcb) + in_pcbrtentry_unref(rt, tp->t_inpcb); +#endif +#ifdef INET6 + if (tp->t_in6pcb) + in6_pcbrtentry_unref(rt, tp->t_in6pcb); +#endif +#endif } /* @@ -2135,6 +2150,16 @@ tcp_established(struct tcpcb *tp) bufsize = sb_max; (void) sbreserve(&so->so_rcv, bufsize, so); } +#ifdef RTV_RPIPE +#ifdef INET + if (tp->t_inpcb) + in_pcbrtentry_unref(rt, tp->t_inpcb); +#endif +#ifdef INET6 + if (tp->t_in6pcb) + in6_pcbrtentry_unref(rt, tp->t_in6pcb); +#endif +#endif } /* @@ -2188,6 +2213,14 @@ tcp_rmx_rtt(struct tcpcb *tp) ((tp->t_srtt >> 2) + tp->t_rttvar) >> (1 + 2), tp->t_rttmin, TCPTV_REXMTMAX); } +#ifdef INET + if (tp->t_inpcb) + in_pcbrtentry_unref(rt, tp->t_inpcb); +#endif +#ifdef INET6 + if (tp->t_in6pcb) + in6_pcbrtentry_unref(rt, tp->t_in6pcb); +#endif #endif } diff --git a/sys/netinet6/frag6.c b/sys/netinet6/frag6.c index a8af59e..f6f99a4 100644 --- a/sys/netinet6/frag6.c +++ b/sys/netinet6/frag6.c @@ -182,6 +182,7 @@ frag6_input(struct mbuf **mp, int *offp, int proto) IP6_STATINC(IP6_STAT_REASSEMBLED); in6_ifstat_inc(dstifp, ifs6_reass_ok); *offp = offset; + rtcache_unref(rt, &ro); return ip6f->ip6f_nxt; } @@ -463,6 +464,7 @@ insert: IP6_STATINC(IP6_STAT_REASSEMBLED); in6_ifstat_inc(dstifp, ifs6_reass_ok); + rtcache_unref(rt, &ro); /* * Tell launch routine the next header @@ -480,6 +482,7 @@ insert: IP6_STATINC(IP6_STAT_FRAGDROPPED); m_freem(m); done: + rtcache_unref(rt, &ro); return IPPROTO_DONE; } diff --git a/sys/netinet6/icmp6.c b/sys/netinet6/icmp6.c index beee94e..03fa137 100644 --- a/sys/netinet6/icmp6.c +++ b/sys/netinet6/icmp6.c @@ -1164,7 +1164,7 @@ icmp6_mtudisc_update(struct ip6ctlparam *ip6cp, int validated) } } if (rt) { - rtfree(rt); + rt_unref(rt); } /* @@ -2259,7 +2259,7 @@ icmp6_redirect_input(struct mbuf *m, int off) "ICMP6 redirect rejected; no route " "with inet6 gateway found for redirect dst: %s\n", icmp6_redirect_diag(&src6, &reddst6, &redtgt6)); - rtfree(rt); + rt_unref(rt); goto bad; } @@ -2270,7 +2270,7 @@ icmp6_redirect_input(struct mbuf *m, int off) "not equal to gw-for-src=%s (must be same): %s\n", ip6_sprintf(gw6), icmp6_redirect_diag(&src6, &reddst6, &redtgt6)); - rtfree(rt); + rt_unref(rt); goto bad; } } else { @@ -2279,7 +2279,7 @@ icmp6_redirect_input(struct mbuf *m, int off) icmp6_redirect_diag(&src6, &reddst6, &redtgt6)); goto bad; } - rtfree(rt); + rt_unref(rt); rt = NULL; } if (IN6_IS_ADDR_MULTICAST(&reddst6)) { @@ -2373,7 +2373,7 @@ icmp6_redirect_input(struct mbuf *m, int off) if (newrt) { (void)rt_timer_add(newrt, icmp6_redirect_timeout, icmp6_redirect_timeout_q); - rtfree(newrt); + rt_unref(newrt); } } /* finally update cached route in each socket via pfctlinput */ @@ -2758,17 +2758,17 @@ icmp6_mtudisc_clone(struct sockaddr *dst) error = rtrequest(RTM_ADD, dst, rt->rt_gateway, NULL, RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt); if (error) { - rtfree(rt); + rt_unref(rt); return NULL; } nrt->rt_rmx = rt->rt_rmx; - rtfree(rt); + rt_unref(rt); rt = nrt; } error = rt_timer_add(rt, icmp6_mtudisc_timeout, icmp6_mtudisc_timeout_q); if (error) { - rtfree(rt); + rt_unref(rt); return NULL; } diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c index 50f5d38..4bbb504 100644 --- a/sys/netinet6/in6.c +++ b/sys/netinet6/in6.c @@ -1139,7 +1139,7 @@ in6_update_ifa1(struct ifnet *ifp, struct in6_aliasreq *ifra, if (memcmp(&mltaddr.sin6_addr, &satocsin6(rt_getkey(rt))->sin6_addr, MLTMASK_LEN)) { - rtfree(rt); + rt_unref(rt); rt = NULL; } else if (rt->rt_ifp != ifp) { IN6_DPRINTF("%s: rt_ifp %p -> %p (%s) " @@ -1167,7 +1167,7 @@ in6_update_ifa1(struct ifnet *ifp, struct in6_aliasreq *ifra, if (error) goto cleanup; } else { - rtfree(rt); + rt_unref(rt); } imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, 0); if (!imm) { @@ -1220,7 +1220,7 @@ in6_update_ifa1(struct ifnet *ifp, struct in6_aliasreq *ifra, if (memcmp(&mltaddr.sin6_addr, &satocsin6(rt_getkey(rt))->sin6_addr, 32 / NBBY)) { - rtfree(rt); + rt_unref(rt); rt = NULL; } else if (rt->rt_ifp != ifp) { IN6_DPRINTF("%s: rt_ifp %p -> %p (%s) " @@ -1248,7 +1248,7 @@ in6_update_ifa1(struct ifnet *ifp, struct in6_aliasreq *ifra, goto cleanup; #undef MLTMASK_LEN } else { - rtfree(rt); + rt_unref(rt); } imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, 0); if (!imm) { @@ -1333,6 +1333,10 @@ in6_purgeaddr(struct ifaddr *ifa) struct in6_ifaddr *ia = (struct in6_ifaddr *) ifa; struct in6_multi_mship *imm; + KASSERT(!ifa_held(ifa)); + + ifa->ifa_flags |= IFA_DESTROYING; + /* stop DAD processing */ nd6_dad_stop(ifa); @@ -2411,17 +2415,17 @@ in6_lltable_rtcheck(struct ifnet *ifp, if (ifa != NULL) { pserialize_read_exit(s); if (rt != NULL) - rtfree(rt); + rt_unref(rt); return 0; } pserialize_read_exit(s); log(LOG_INFO, "IPv6 address: \"%s\" is not on the network\n", ip6_sprintf(&((const struct sockaddr_in6 *)l3addr)->sin6_addr)); if (rt != NULL) - rtfree(rt); + rt_unref(rt); return EINVAL; } - rtfree(rt); + rt_unref(rt); return 0; } diff --git a/sys/netinet6/in6_gif.c b/sys/netinet6/in6_gif.c index 6472c42..dfb42d0 100644 --- a/sys/netinet6/in6_gif.c +++ b/sys/netinet6/in6_gif.c @@ -176,17 +176,20 @@ in6_gif_output(struct ifnet *ifp, int family, struct mbuf *m) ip6->ip6_flow |= htonl((u_int32_t)otos << 20); sockaddr_in6_init(&u.dst6, &sin6_dst->sin6_addr, 0, 0, 0); - if ((rt = rtcache_lookup(&sc->gif_ro, &u.dst)) == NULL) { + rt = rtcache_lookup(&sc->gif_ro, &u.dst); + if (rt == NULL) { m_freem(m); return ENETUNREACH; } /* If the route constitutes infinite encapsulation, punt. */ if (rt->rt_ifp == ifp) { + rtcache_unref(rt, &sc->gif_ro); rtcache_free(&sc->gif_ro); m_freem(m); return ENETUNREACH; /* XXX */ } + rtcache_unref(rt, &sc->gif_ro); #ifdef IPV6_MINMTU /* @@ -330,10 +333,10 @@ gif_validate6(const struct ip6_hdr *ip6, struct gif_softc *sc, ip6_sprintf(&u.sin6.sin6_addr)); #endif if (rt != NULL) - rtfree(rt); + rt_unref(rt); return 0; } - rtfree(rt); + rt_unref(rt); } return 128 * 2; diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c index 4671674..abc0eb5 100644 --- a/sys/netinet6/in6_pcb.c +++ b/sys/netinet6/in6_pcb.c @@ -696,7 +696,6 @@ in6_pcbnotify(struct inpcbtable *table, const struct sockaddr *dst, u_int fport_arg, const struct sockaddr *src, u_int lport_arg, int cmd, void *cmdarg, void (*notify)(struct in6pcb *, int)) { - struct rtentry *rt; struct inpcb_hdr *inph, *ninph; struct sockaddr_in6 sa6_src; const struct sockaddr_in6 *sa6_dst; @@ -738,6 +737,8 @@ in6_pcbnotify(struct inpcbtable *table, const struct sockaddr *dst, errno = inet6ctlerrmap[cmd]; TAILQ_FOREACH_SAFE(inph, &table->inpt_queue, inph_queue, ninph) { struct in6pcb *in6p = (struct in6pcb *)inph; + struct rtentry *rt = NULL; + if (in6p->in6p_af != AF_INET6) continue; @@ -783,9 +784,12 @@ in6_pcbnotify(struct inpcbtable *table, const struct sockaddr *dst, if (dst6 == NULL) ; else if (IN6_ARE_ADDR_EQUAL(&dst6->sin6_addr, - &sa6_dst->sin6_addr)) + &sa6_dst->sin6_addr)) { + rtcache_unref(rt, &in6p->in6p_route); goto do_notify; + } } + rtcache_unref(rt, &in6p->in6p_route); /* * If the error designates a new path MTU for a destination @@ -887,8 +891,11 @@ in6_pcbpurgeif(struct inpcbtable *table, struct ifnet *ifp) if (in6p->in6p_af != AF_INET6) continue; if ((rt = rtcache_validate(&in6p->in6p_route)) != NULL && - rt->rt_ifp == ifp) + rt->rt_ifp == ifp) { + rtcache_unref(rt, &in6p->in6p_route); in6_rtchange(in6p, 0); + } else + rtcache_unref(rt, &in6p->in6p_route); } } @@ -916,9 +923,16 @@ in6_losing(struct in6pcb *in6p) info.rti_info[RTAX_NETMASK] = rt_mask(rt); rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0); if (rt->rt_flags & RTF_DYNAMIC) { - (void)rtrequest(RTM_DELETE, rt_getkey(rt), - rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL); - } + int error; + struct rtentry *nrt; + + error = rtrequest(RTM_DELETE, rt_getkey(rt), + rt->rt_gateway, rt_mask(rt), rt->rt_flags, &nrt); + rtcache_unref(rt, &in6p->in6p_route); + if (error == 0) + rt_free(nrt); + } else + rtcache_unref(rt, &in6p->in6p_route); /* * A new route can be allocated * the next time output is attempted. @@ -1151,6 +1165,13 @@ in6_pcbrtentry(struct in6pcb *in6p) return rt; } +void +in6_pcbrtentry_unref(struct rtentry *rt, struct in6pcb *in6p) +{ + + rtcache_unref(rt, &in6p->in6p_route); +} + struct in6pcb * in6_pcblookup_connect(struct inpcbtable *table, const struct in6_addr *faddr6, u_int fport_arg, const struct in6_addr *laddr6, u_int lport_arg, diff --git a/sys/netinet6/in6_pcb.h b/sys/netinet6/in6_pcb.h index 08b6613..0e7a0f9 100644 --- a/sys/netinet6/in6_pcb.h +++ b/sys/netinet6/in6_pcb.h @@ -179,6 +179,8 @@ int in6_pcbsetport(struct sockaddr_in6 *, struct in6pcb *, struct lwp *); extern struct rtentry * in6_pcbrtentry(struct in6pcb *); +extern void + in6_pcbrtentry_unref(struct rtentry *, struct in6pcb *); extern struct in6pcb *in6_pcblookup_connect(struct inpcbtable *, const struct in6_addr *, u_int, const struct in6_addr *, u_int, int, struct vestigial_inpcb *); diff --git a/sys/netinet6/in6_src.c b/sys/netinet6/in6_src.c index e5d5be2..b8f66a9 100644 --- a/sys/netinet6/in6_src.c +++ b/sys/netinet6/in6_src.c @@ -599,6 +599,7 @@ in6_selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, } u; KASSERT(ro != NULL); + KASSERT(*ro != NULL); KASSERT(retrt != NULL); #if 0 @@ -638,10 +639,14 @@ in6_selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, rt = rtcache_lookup(ron, sin6tosa(sin6_next)); if (rt == NULL || (rt->rt_flags & RTF_GATEWAY) != 0 || !nd6_is_addr_neighbor(sin6_next, rt->rt_ifp)) { + if (rt != NULL) { + if (count_discard) + in6_ifstat_inc(rt->rt_ifp, + ifs6_out_discard); + rtcache_unref(rt, ron); + rt = NULL; + } rtcache_free(ron); - if (rt != NULL && count_discard) - in6_ifstat_inc(rt->rt_ifp, ifs6_out_discard); - rt = NULL; error = EHOSTUNREACH; goto done; } @@ -657,8 +662,8 @@ in6_selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, */ u.dst6 = *dstsock; u.dst6.sin6_scope_id = 0; + rt = rtcache_lookup1(*ro, &u.dst, 1); - if (rt == NULL) error = EHOSTUNREACH; @@ -691,7 +696,7 @@ in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, struct ip6_moptions *mopts, struct route *ro, struct ifnet **retifp, struct psref *psref) { - int error; + int error = 0; struct rtentry *rt = NULL; struct in6_addr *dst; struct in6_pktinfo *pi = NULL; @@ -743,8 +748,11 @@ getroute: * Although this may not be very harmful, it should still be confusing. * We thus reject the case here. */ - if ((rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE))) - return (rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); + if ((rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE))) { + error = (rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); + /* XXX: ifp can be returned with psref even if error */ + goto out; + } /* * Adjust the "outgoing" interface. If we're going to loop the packet @@ -760,8 +768,9 @@ getroute: *retifp = rt->rt_ifa->ifa_ifp; if_acquire_NOMPSAFE(*retifp, psref); } - - return (0); +out: + rtcache_unref(rt, ro); + return error; } /* @@ -791,9 +800,11 @@ in6_selecthlim_rt(struct in6pcb *in6p) return in6_selecthlim(in6p, NULL); rt = rtcache_validate(&in6p->in6p_route); - if (rt != NULL) - return in6_selecthlim(in6p, rt->rt_ifp); - else + if (rt != NULL) { + int ret = in6_selecthlim(in6p, rt->rt_ifp); + rtcache_unref(rt, &in6p->in6p_route); + return ret; + } else return in6_selecthlim(in6p, NULL); } diff --git a/sys/netinet6/ip6_etherip.c b/sys/netinet6/ip6_etherip.c index 4972b21..01362c6 100644 --- a/sys/netinet6/ip6_etherip.c +++ b/sys/netinet6/ip6_etherip.c @@ -172,10 +172,12 @@ ip6_etherip_output(struct ifnet *ifp, struct mbuf *m) } /* if it constitutes infinite encapsulation, punt. */ if (rt->rt_ifp == ifp) { + rtcache_unref(rt, &sc->sc_ro); rtcache_free(&sc->sc_ro); m_freem(m); return ENETUNREACH; /* XXX */ } + rtcache_unref(rt, &sc->sc_ro); /* * force fragmentation to minimum MTU, to avoid path MTU discovery. diff --git a/sys/netinet6/ip6_flow.c b/sys/netinet6/ip6_flow.c index 813aeae..be9552e 100644 --- a/sys/netinet6/ip6_flow.c +++ b/sys/netinet6/ip6_flow.c @@ -259,7 +259,7 @@ ip6flow_fastforward(struct mbuf **mp) { struct ip6flow *ip6f; struct ip6_hdr *ip6; - struct rtentry *rt; + struct rtentry *rt = NULL; struct mbuf *m; const struct sockaddr *dst; int error; @@ -327,14 +327,14 @@ ip6flow_fastforward(struct mbuf **mp) if ((rt = rtcache_validate(&ip6f->ip6f_ro)) == NULL || (rt->rt_ifp->if_flags & IFF_UP) == 0 || (rt->rt_flags & RTF_BLACKHOLE) != 0) - goto out; + goto out_unref; /* * Packet size greater than MTU? */ if (m->m_pkthdr.len > rt->rt_ifp->if_mtu) { /* Return to main IPv6 input function. */ - goto out; + goto out_unref; } /* @@ -343,7 +343,7 @@ ip6flow_fastforward(struct mbuf **mp) m->m_pkthdr.csum_flags = 0; if (ip6->ip6_hlim <= IPV6_HLIMDEC) - goto out; + goto out_unref; /* Decrement hop limit (same as TTL) */ ip6->ip6_hlim -= IPV6_HLIMDEC; @@ -373,7 +373,9 @@ ip6flow_fastforward(struct mbuf **mp) ip6f->ip6f_forwarded++; } ret = 1; - out: +out_unref: + rtcache_unref(rt, &ip6f->ip6f_ro); +out: mutex_exit(&ip6flow_lock); return ret; } @@ -382,12 +384,11 @@ ip6flow_fastforward(struct mbuf **mp) * Add the IPv6 flow statistics to the main IPv6 statistics. */ static void -ip6flow_addstats(const struct ip6flow *ip6f) +ip6flow_addstats_rt(struct rtentry *rt, struct ip6flow *ip6f) { - struct rtentry *rt; uint64_t *ip6s; - if ((rt = rtcache_validate(&ip6f->ip6f_ro)) != NULL) + if (rt != NULL) rt->rt_use += ip6f->ip6f_uses; ip6s = IP6_STAT_GETREF(); ip6s[IP6_STAT_FASTFORWARDFLOWS] = ip6flow_inuse; @@ -399,6 +400,16 @@ ip6flow_addstats(const struct ip6flow *ip6f) IP6_STAT_PUTREF(); } +static void +ip6flow_addstats(struct ip6flow *ip6f) +{ + struct rtentry *rt; + + rt = rtcache_validate(&ip6f->ip6f_ro); + ip6flow_addstats_rt(rt, ip6f); + rtcache_unref(rt, &ip6f->ip6f_ro); +} + /* * Add statistics and free the flow. */ @@ -452,12 +463,14 @@ ip6flow_reap_locked(int just_one) struct ip6flow *maybe_ip6f = TAILQ_LAST(&ip6flowlist, ip6flowhead); TAILQ_FOREACH(ip6f, &ip6flowlist, ip6f_list) { + struct rtentry *rt; /* * If this no longer points to a valid route - * reclaim it. */ - if (rtcache_validate(&ip6f->ip6f_ro) == NULL) + if ((rt = rtcache_validate(&ip6f->ip6f_ro)) == NULL) goto done; + rtcache_unref(rt, &ip6f->ip6f_ro); /* * choose the one that's been least recently * used or has had the least uses in the @@ -516,17 +529,19 @@ ip6flow_slowtimo_work(struct work *wk, void *arg) mutex_enter(&ip6flow_lock); for (ip6f = TAILQ_FIRST(&ip6flowlist); ip6f != NULL; ip6f = next_ip6f) { + struct rtentry *rt = NULL; next_ip6f = TAILQ_NEXT(ip6f, ip6f_list); if (PRT_SLOW_ISEXPIRED(ip6f->ip6f_timer) || - rtcache_validate(&ip6f->ip6f_ro) == NULL) { + (rt = rtcache_validate(&ip6f->ip6f_ro)) == NULL) { ip6flow_free(ip6f); } else { ip6f->ip6f_last_uses = ip6f->ip6f_uses; - ip6flow_addstats(ip6f); + ip6flow_addstats_rt(rt, ip6f); ip6f->ip6f_uses = 0; ip6f->ip6f_dropped = 0; ip6f->ip6f_forwarded = 0; } + rtcache_unref(rt, &ip6f->ip6f_ro); } mutex_exit(&ip6flow_lock); @@ -552,7 +567,7 @@ ip6flow_slowtimo(void) * IPv6 stack. Now create/update a flow. */ void -ip6flow_create(const struct route *ro, struct mbuf *m) +ip6flow_create(struct route *ro, struct mbuf *m) { const struct ip6_hdr *ip6; struct ip6flow *ip6f; diff --git a/sys/netinet6/ip6_forward.c b/sys/netinet6/ip6_forward.c index 74ffb18..ff97021 100644 --- a/sys/netinet6/ip6_forward.c +++ b/sys/netinet6/ip6_forward.c @@ -128,7 +128,7 @@ ip6_forward(struct mbuf *m, int srcrt) { struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); const struct sockaddr_in6 *dst; - struct rtentry *rt; + struct rtentry *rt = NULL; int error = 0, type = 0, code = 0; struct mbuf *mcopy = NULL; struct ifnet *origifp; /* maybe unnecessary */ @@ -136,7 +136,7 @@ ip6_forward(struct mbuf *m, int srcrt) struct in6_addr src_in6, dst_in6; struct ifnet *rcvif = NULL; struct psref psref; - struct route *ro; + struct route *ro = NULL; #ifdef IPSEC int needipsec = 0; struct secpolicy *sp = NULL; @@ -213,14 +213,14 @@ ip6_forward(struct mbuf *m, int srcrt) } u; sockaddr_in6_init(&u.dst6, &ip6->ip6_dst, 0, 0, 0); - if ((rt = rtcache_lookup(ro, &u.dst)) == NULL) { + rt = rtcache_lookup(ro, &u.dst); + if (rt == NULL) { IP6_STATINC(IP6_STAT_NOROUTE); /* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_noroute) */ if (mcopy) { icmp6_error(mcopy, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOROUTE, 0); } - percpu_putref(ip6_forward_rt_percpu); goto drop; } } else if ((rt = rtcache_validate(ro)) == NULL && @@ -235,11 +235,9 @@ ip6_forward(struct mbuf *m, int srcrt) icmp6_error(mcopy, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOROUTE, 0); } - percpu_putref(ip6_forward_rt_percpu); goto drop; } dst = satocsin6(rtcache_getdst(ro)); - percpu_putref(ip6_forward_rt_percpu); /* * Source scope check: if a packet can't be delivered to its @@ -324,11 +322,9 @@ ip6_forward(struct mbuf *m, int srcrt) */ if (rt->rt_ifp == rcvif && !srcrt && ip6_sendredirects && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0) { - ro = percpu_getref(ip6_forward_rt_percpu); if ((rt->rt_ifp->if_flags & IFF_POINTOPOINT) && nd6_is_addr_neighbor(satocsin6(rtcache_getdst(ro)), rt->rt_ifp)) { - percpu_putref(ip6_forward_rt_percpu); /* * If the incoming interface is equal to the outgoing * one, the link attached to the interface is @@ -348,7 +344,6 @@ ip6_forward(struct mbuf *m, int srcrt) ICMP6_DST_UNREACH_ADDR, 0); goto drop; } - percpu_putref(ip6_forward_rt_percpu); type = ND_REDIRECT; } @@ -417,10 +412,11 @@ ip6_forward(struct mbuf *m, int srcrt) IP6_STATINC(IP6_STAT_REDIRECTSENT); else { #ifdef GATEWAY - ro = percpu_getref(ip6_forward_rt_percpu); + /* Need to release rt here */ + rtcache_unref(rt, ro); + rt = NULL; if (m->m_flags & M_CANFASTFWD) ip6flow_create(ro, m); - percpu_putref(ip6_forward_rt_percpu); #endif if (mcopy) goto freecopy; @@ -464,6 +460,9 @@ ip6_forward(struct mbuf *m, int srcrt) drop: m_freem(m); out: + rtcache_unref(rt, ro); + if (ro != NULL) + percpu_putref(ip6_forward_rt_percpu); if (rcvif != NULL) m_put_rcvif_psref(rcvif, &psref); return; diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c index 155d14f..8d2a5d8 100644 --- a/sys/netinet6/ip6_input.c +++ b/sys/netinet6/ip6_input.c @@ -269,7 +269,7 @@ ip6_input(struct mbuf *m, struct ifnet *rcvif) int nxt, ours = 0, rh_present = 0; struct ifnet *deliverifp = NULL; int srcrt = 0; - const struct rtentry *rt; + struct rtentry *rt = NULL; union { struct sockaddr dst; struct sockaddr_in6 dst6; @@ -454,6 +454,7 @@ ip6_input(struct mbuf *m, struct ifnet *rcvif) goto bad; } + ro = percpu_getref(ip6_forward_rt_percpu); /* * Multicast check */ @@ -474,7 +475,7 @@ ip6_input(struct mbuf *m, struct ifnet *rcvif) ip6s[IP6_STAT_CANTFORWARD]++; IP6_STAT_PUTREF(); in6_ifstat_inc(rcvif, ifs6_in_discard); - goto bad; + goto bad_unref; } deliverifp = rcvif; goto hbhcheck; @@ -485,9 +486,7 @@ ip6_input(struct mbuf *m, struct ifnet *rcvif) /* * Unicast check */ - ro = percpu_getref(ip6_forward_rt_percpu); rt = rtcache_lookup2(ro, &u.dst, 1, &hit); - percpu_putref(ip6_forward_rt_percpu); if (hit) IP6_STATINC(IP6_STAT_FORWARD_CACHEHIT); else @@ -533,7 +532,7 @@ ip6_input(struct mbuf *m, struct ifnet *rcvif) ip6_sprintf(&ip6->ip6_src), ip6_sprintf(&ip6->ip6_dst)); - goto bad; + goto bad_unref; } } @@ -579,7 +578,7 @@ ip6_input(struct mbuf *m, struct ifnet *rcvif) if (!ip6_forwarding) { IP6_STATINC(IP6_STAT_CANTFORWARD); in6_ifstat_inc(rcvif, ifs6_in_discard); - goto bad; + goto bad_unref; } hbhcheck: @@ -618,6 +617,8 @@ ip6_input(struct mbuf *m, struct ifnet *rcvif) #if 0 /*touches NULL pointer*/ in6_ifstat_inc(rcvif, ifs6_in_discard); #endif + rtcache_unref(rt, ro); + percpu_putref(ip6_forward_rt_percpu); return; /* m have already been freed */ } @@ -641,12 +642,16 @@ ip6_input(struct mbuf *m, struct ifnet *rcvif) icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, (char *)&ip6->ip6_plen - (char *)ip6); + rtcache_unref(rt, ro); + percpu_putref(ip6_forward_rt_percpu); return; } IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr), sizeof(struct ip6_hbh)); if (hbh == NULL) { IP6_STATINC(IP6_STAT_TOOSHORT); + rtcache_unref(rt, ro); + percpu_putref(ip6_forward_rt_percpu); return; } KASSERT(IP6_HDR_ALIGNED_P(hbh)); @@ -670,7 +675,7 @@ ip6_input(struct mbuf *m, struct ifnet *rcvif) if (m->m_pkthdr.len - sizeof(struct ip6_hdr) < plen) { IP6_STATINC(IP6_STAT_TOOSHORT); in6_ifstat_inc(rcvif, ifs6_in_truncated); - goto bad; + goto bad_unref; } if (m->m_pkthdr.len > sizeof(struct ip6_hdr) + plen) { if (m->m_len == m->m_pkthdr.len) { @@ -700,13 +705,17 @@ ip6_input(struct mbuf *m, struct ifnet *rcvif) SOFTNET_UNLOCK(); if (error != 0) { + rtcache_unref(rt, ro); + percpu_putref(ip6_forward_rt_percpu); IP6_STATINC(IP6_STAT_CANTFORWARD); goto bad; } } if (!ours) - goto bad; + goto bad_unref; } else if (!ours) { + rtcache_unref(rt, ro); + percpu_putref(ip6_forward_rt_percpu); ip6_forward(m, srcrt); return; } @@ -726,7 +735,7 @@ ip6_input(struct mbuf *m, struct ifnet *rcvif) IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) { IP6_STATINC(IP6_STAT_BADSCOPE); in6_ifstat_inc(rcvif, ifs6_in_addrerr); - goto bad; + goto bad_unref; } /* @@ -746,6 +755,12 @@ ip6_input(struct mbuf *m, struct ifnet *rcvif) in6_ifstat_inc(deliverifp, ifs6_in_deliver); nest = 0; + if (rt != NULL) { + rtcache_unref(rt, ro); + rt = NULL; + } + percpu_putref(ip6_forward_rt_percpu); + rh_present = 0; while (nxt != IPPROTO_DONE) { if (ip6_hdrnestlimit && (++nest > ip6_hdrnestlimit)) { @@ -797,8 +812,13 @@ ip6_input(struct mbuf *m, struct ifnet *rcvif) SOFTNET_UNLOCK(); } return; + + bad_unref: + rtcache_unref(rt, ro); + percpu_putref(ip6_forward_rt_percpu); bad: m_freem(m); + return; } /* diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c index ed3d2ba..90f4171 100644 --- a/sys/netinet6/ip6_output.c +++ b/sys/netinet6/ip6_output.c @@ -695,8 +695,10 @@ ip6_output( sockaddr_in6_init(&u.dst6, &finaldst, 0, 0, 0); rt_pmtu = rtcache_lookup(ro_pmtu, &u.dst); } else - rt_pmtu = rtcache_validate(ro_pmtu); + rt_pmtu = rt; error = ip6_getpmtu(rt_pmtu, ifp, &mtu, &alwaysfrag); + if (rt_pmtu != NULL && rt_pmtu != rt) + rtcache_unref(rt_pmtu, ro_pmtu); if (error != 0) goto bad; @@ -1037,6 +1039,7 @@ sendorfree: IP6_STATINC(IP6_STAT_FRAGMENTED); done: + rtcache_unref(rt, ro); if (ro == &ip6route) rtcache_free(&ip6route); @@ -1869,6 +1872,7 @@ else \ sockaddr_in6_init(&u.dst6, &in6p->in6p_faddr, 0, 0, 0); rt = rtcache_lookup(ro, &u.dst); error = ip6_getpmtu(rt, NULL, &pmtu, NULL); + rtcache_unref(rt, ro); if (error) break; if (pmtu > IPV6_MAXPACKET) @@ -2405,7 +2409,10 @@ ip6_get_membership(const struct sockopt *sopt, struct ifnet **ifp, void *v, error = rtcache_setdst(&ro, &u.dst); if (error != 0) return error; - *ifp = (rt = rtcache_init(&ro)) != NULL ? rt->rt_ifp : NULL; + rt = rtcache_init(&ro); + *ifp = rt != NULL ? rt->rt_ifp : NULL; + /* FIXME *ifp is NOMPSAFE */ + rtcache_unref(rt, &ro); rtcache_free(&ro); } else { /* diff --git a/sys/netinet6/ip6_var.h b/sys/netinet6/ip6_var.h index e8356a8..d30a538 100644 --- a/sys/netinet6/ip6_var.h +++ b/sys/netinet6/ip6_var.h @@ -377,7 +377,7 @@ void frag6_drainstub(void); int ip6flow_init(int); void ip6flow_poolinit(void); struct ip6flow *ip6flow_reap(int); -void ip6flow_create(const struct route *, struct mbuf *); +void ip6flow_create(struct route *, struct mbuf *); void ip6flow_slowtimo(void); int ip6flow_invalidate_all(int); diff --git a/sys/netinet6/nd6.c b/sys/netinet6/nd6.c index 01c5939..200a914 100644 --- a/sys/netinet6/nd6.c +++ b/sys/netinet6/nd6.c @@ -998,10 +998,10 @@ nd6_is_new_addr_neighbor(const struct sockaddr_in6 *addr, struct ifnet *ifp) */ if (!IN6_ARE_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr, &satocsin6(rt_getkey(rt))->sin6_addr)) { - rtfree(rt); + rt_unref(rt); continue; } - rtfree(rt); + rt_unref(rt); } if (IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr, @@ -1126,10 +1126,10 @@ nd6_is_addr_neighbor(const struct sockaddr_in6 *addr, struct ifnet *ifp) rt->rt_ifp->if_carpdev == ifp->if_carpdev) #endif )) { - rtfree(rt); + rt_unref(rt); return 1; } - rtfree(rt); + rt_unref(rt); return 0; } diff --git a/sys/netinet6/nd6_nbr.c b/sys/netinet6/nd6_nbr.c index d952a16..855bdf6 100644 --- a/sys/netinet6/nd6_nbr.c +++ b/sys/netinet6/nd6_nbr.c @@ -256,7 +256,7 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len) } } if (rt) - rtfree(rt); + rt_unref(rt); } if (ifa == NULL) { /* diff --git a/sys/netipsec/ipsec.c b/sys/netipsec/ipsec.c index 5cc5451..87ff8ad 100644 --- a/sys/netipsec/ipsec.c +++ b/sys/netipsec/ipsec.c @@ -929,6 +929,7 @@ ipsec4_forward(struct mbuf *m, int *destmtu) rt->rt_rmx.rmx_mtu : rt->rt_ifp->if_mtu; *destmtu -= ipsechdr; } + rtcache_unref(rt, ro); } KEY_FREESP(&sp); return 0;