diff --git a/sys/net/if_llatbl.c b/sys/net/if_llatbl.c index 743fba4..e02ee513 100644 --- a/sys/net/if_llatbl.c +++ b/sys/net/if_llatbl.c @@ -162,6 +162,8 @@ htable_link_entry(struct lltable *llt, struct llentry *lle) lle->lle_head = lleh; lle->la_flags |= LLE_LINKED; LIST_INSERT_HEAD(lleh, lle, lle_next); + + llt->llt_lle_count++; } static void @@ -176,6 +178,8 @@ htable_unlink_entry(struct llentry *lle) lle->lle_tbl = NULL; lle->lle_head = NULL; #endif + KASSERT(lle->lle_tbl->llt_lle_count != 0); + lle->lle_tbl->llt_lle_count--; } } @@ -352,18 +356,16 @@ lltable_free_cb(struct lltable *llt, struct llentry *lle, void *farg) } /* - * Free all entries from given table and free itself. + * Free all entries from given table. */ void -lltable_free(struct lltable *llt) +lltable_purge_entries(struct lltable *llt) { struct llentry *lle, *next; struct llentries dchain; KASSERTMSG(llt != NULL, "llt is NULL"); - lltable_unlink(llt); - LIST_INIT(&dchain); IF_AFDATA_WLOCK(llt->llt_ifp); /* Push all lles to @dchain */ @@ -394,6 +396,19 @@ lltable_free(struct lltable *llt) llentry_free(lle); } +} + +/* + * Free all entries from given table and free itself. + */ +void +lltable_free(struct lltable *llt) +{ + + KASSERTMSG(llt != NULL, "llt is NULL"); + + lltable_unlink(llt); + lltable_purge_entries(llt); llt->llt_free_tbl(llt); } diff --git a/sys/net/if_llatbl.h b/sys/net/if_llatbl.h index b5b7656..c657d861 100644 --- a/sys/net/if_llatbl.h +++ b/sys/net/if_llatbl.h @@ -101,7 +101,12 @@ struct llentry { #ifdef __NetBSD__ #define la_timer lle_timer +#define ln_timer_ch lle_timer +#define ln_expire la_expire +#define ln_asked la_asked +#define ln_hold la_hold struct rtentry *la_rt; +#define ln_rt la_rt void *la_opaque; /* For tokenring */ #endif }; @@ -240,6 +245,7 @@ struct lltable { int llt_af; int llt_hsize; struct llentries *lle_head; + unsigned int llt_lle_count; struct ifnet *llt_ifp; llt_lookup_t *llt_lookup; @@ -282,6 +288,7 @@ void lltable_link(struct lltable *llt); void lltable_prefix_free(int, struct sockaddr *, struct sockaddr *, u_int); void lltable_drain(int); +void lltable_purge_entries(struct lltable *); int lltable_sysctl_dumparp(int, struct sysctl_req *); size_t llentry_free(struct llentry *); @@ -299,6 +306,12 @@ void lltable_fill_sa_entry(const struct llentry *lle, struct sockaddr *sa); struct ifnet *lltable_get_ifp(const struct lltable *llt); int lltable_get_af(const struct lltable *llt); +static inline unsigned int +lltable_get_entry_count(struct lltable *llt) +{ + return llt->llt_lle_count; +} + int lltable_foreach_lle(struct lltable *llt, llt_foreach_cb_t *f, void *farg); /* diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c index f45928d..1c6a684 100644 --- a/sys/netinet6/in6.c +++ b/sys/netinet6/in6.c @@ -83,16 +83,18 @@ __KERNEL_RCSID(0, "$NetBSD: in6.c,v 1.190 2015/08/24 22:21:27 pooka Exp $"); #include #include #include +#include #include #include -#include +#include +#include #include #include +#include #include #include -#include #include #include @@ -1761,6 +1763,34 @@ in6ifa_ifpforlinklocal(const struct ifnet *ifp, const int ignoreflags) return (struct in6_ifaddr *)best_ifa; } +/* + * find the internet address corresponding to a given address. + * ifaddr is returned referenced. + */ +struct in6_ifaddr * +in6ifa_ifwithaddr(const struct in6_addr *addr, uint32_t zoneid) +{ + struct in6_ifaddr *ia; + +#ifdef __FreeBSD__ + IN6_IFADDR_RLOCK(); + LIST_FOREACH(ia, IN6ADDR_HASH(addr), ia6_hash) { +#else + for (ia = in6_ifaddr; ia; ia = ia->ia_next) { +#endif + if (IN6_ARE_ADDR_EQUAL(IA6_IN6(ia), addr)) { + if (zoneid != 0 && + zoneid != ia->ia_addr.sin6_scope_id) + continue; + ifaref(&ia->ia_ifa); + break; + } + } +#ifdef __FreeBSD__ + IN6_IFADDR_RUNLOCK(); +#endif + return ia; +} /* * find the internet address corresponding to a given interface and address. @@ -2196,6 +2226,287 @@ in6_if2idlen(struct ifnet *ifp) } } +struct in6_llentry { + struct llentry base; +}; + +#define IN6_LLTBL_DEFAULT_HSIZE 32 +#define IN6_LLTBL_HASH(k, h) \ + (((((((k >> 8) ^ k) >> 8) ^ k) >> 8) ^ k) & ((h) - 1)) + +/* + * Do actual deallocation of @lle. + * Called by LLE_FREE_LOCKED when number of references + * drops to zero. + */ +static void +in6_lltable_destroy_lle(struct llentry *lle) +{ + + LLE_WUNLOCK(lle); + LLE_LOCK_DESTROY(lle); + kmem_intr_free(lle, sizeof(struct in6_llentry)); +} + +static struct llentry * +in6_lltable_new(const struct in6_addr *addr6, u_int flags) +{ + struct in6_llentry *lle; + + lle = kmem_intr_zalloc(sizeof(struct in6_llentry), KM_NOSLEEP); + if (lle == NULL) /* NB: caller generates msg */ + return NULL; + + lle->base.r_l3addr.addr6 = *addr6; + lle->base.lle_refcnt = 1; + lle->base.lle_free = in6_lltable_destroy_lle; + LLE_LOCK_INIT(&lle->base); + callout_init(&lle->base.lle_timer, CALLOUT_MPSAFE); + + return &lle->base; +} + +static int +in6_lltable_match_prefix(const struct sockaddr *prefix, + const struct sockaddr *mask, u_int flags, struct llentry *lle) +{ + const struct sockaddr_in6 *pfx = (const struct sockaddr_in6 *)prefix; + const struct sockaddr_in6 *msk = (const struct sockaddr_in6 *)mask; + + if (IN6_ARE_MASKED_ADDR_EQUAL(&lle->r_l3addr.addr6, + &pfx->sin6_addr, &msk->sin6_addr) && + ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC))) + return 1; + + return 0; +} + +static void +in6_lltable_free_entry(struct lltable *llt, struct llentry *lle) +{ + struct ifnet *ifp; + + LLE_WLOCK_ASSERT(lle); + KASSERT(llt != NULL); + + /* Unlink entry from table */ + if ((lle->la_flags & LLE_LINKED) != 0) { + + ifp = llt->llt_ifp; + IF_AFDATA_WLOCK_ASSERT(ifp); + lltable_unlink_entry(llt, lle); + } + + KASSERT(mutex_owned(softnet_lock)); + callout_halt(&lle->lle_timer, softnet_lock); + LLE_REMREF(lle); + + llentry_free(lle); +} + +static int +in6_lltable_rtcheck(struct ifnet *ifp, + u_int flags, + const struct sockaddr *l3addr) +{ + struct rtentry *rt; + + KASSERTMSG(l3addr->sa_family == AF_INET6, + "sin_family %d", l3addr->sa_family); + + rt = rtalloc1(l3addr, 0); + if (rt == NULL || (rt->rt_flags & RTF_GATEWAY) || rt->rt_ifp != ifp) { + struct ifaddr *ifa; + /* + * Create an ND6 cache for an IPv6 neighbor + * that is not covered by our own prefix. + */ + /* XXX ifaof_ifpforaddr should take a const param */ + ifa = ifaof_ifpforaddr(l3addr, ifp); + if (ifa != NULL) { + ifafree(ifa); + if (rt != NULL) + rtfree(rt); + return 0; + } + log(LOG_INFO, "IPv6 address: \"%s\" is not on the network\n", + ip6_sprintf(&((const struct sockaddr_in6 *)l3addr)->sin6_addr)); + if (rt != NULL) + rtfree(rt); + return EINVAL; + } + rtfree(rt); + return 0; +} + +static inline uint32_t +in6_lltable_hash_dst(const struct in6_addr *dst, uint32_t hsize) +{ + + return IN6_LLTBL_HASH(dst->s6_addr32[3], hsize); +} + +static uint32_t +in6_lltable_hash(const struct llentry *lle, uint32_t hsize) +{ + + return in6_lltable_hash_dst(&lle->r_l3addr.addr6, hsize); +} + +static void +in6_lltable_fill_sa_entry(const struct llentry *lle, struct sockaddr *sa) +{ + struct sockaddr_in6 *sin6; + + sin6 = (struct sockaddr_in6 *)sa; + bzero(sin6, sizeof(*sin6)); + sin6->sin6_family = AF_INET6; + sin6->sin6_len = sizeof(*sin6); + sin6->sin6_addr = lle->r_l3addr.addr6; +} + +static inline struct llentry * +in6_lltable_find_dst(struct lltable *llt, const struct in6_addr *dst) +{ + struct llentry *lle; + struct llentries *lleh; + u_int hashidx; + + hashidx = in6_lltable_hash_dst(dst, llt->llt_hsize); + lleh = &llt->lle_head[hashidx]; + LIST_FOREACH(lle, lleh, lle_next) { + if (lle->la_flags & LLE_DELETED) + continue; + if (IN6_ARE_ADDR_EQUAL(&lle->r_l3addr.addr6, dst)) + break; + } + + return lle; +} + +static int +in6_lltable_delete(struct lltable *llt, u_int flags, + const struct sockaddr *l3addr) +{ + const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)l3addr; + struct llentry *lle; + + IF_AFDATA_WLOCK_ASSERT(llt->llt_ifp); + KASSERTMSG(l3addr->sa_family == AF_INET6, + "sin_family %d", l3addr->sa_family); + + lle = in6_lltable_find_dst(llt, &sin6->sin6_addr); + + if (lle == NULL) + return ENOENT; + + if (!(lle->la_flags & LLE_IFADDR) || (flags & LLE_IFADDR)) { + LLE_WLOCK(lle); + lle->la_flags |= LLE_DELETED; +#ifdef DIAGNOSTIC + log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle); +#endif + if ((lle->la_flags & (LLE_STATIC | LLE_IFADDR)) == LLE_STATIC) + llentry_free(lle); + else + LLE_WUNLOCK(lle); + } + + return 0; +} + +static struct llentry * +in6_lltable_create(struct lltable *llt, u_int flags, + const struct sockaddr *l3addr) +{ + const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)l3addr; + struct ifnet *ifp = llt->llt_ifp; + struct llentry *lle; + + IF_AFDATA_WLOCK_ASSERT(ifp); + KASSERTMSG(l3addr->sa_family == AF_INET6, + "sin_family %d", l3addr->sa_family); + + lle = in6_lltable_find_dst(llt, &sin6->sin6_addr); + + if (lle != NULL) { + LLE_WLOCK(lle); + return lle; + } + + /* + * A route that covers the given address must have + * been installed 1st because we are doing a resolution, + * verify this. + */ + if (!(flags & LLE_IFADDR) && + in6_lltable_rtcheck(ifp, flags, l3addr) != 0) + return NULL; + + lle = in6_lltable_new(&sin6->sin6_addr, flags); + if (lle == NULL) { + log(LOG_INFO, "lla_lookup: new lle malloc failed\n"); + return NULL; + } + lle->la_flags = flags; + if ((flags & LLE_IFADDR) == LLE_IFADDR) { + memcpy(&lle->ll_addr, CLLADDR(ifp->if_sadl), ifp->if_addrlen); + lle->la_flags |= (LLE_VALID | LLE_STATIC); + } + + lltable_link_entry(llt, lle); + LLE_WLOCK(lle); + + return lle; +} + +static struct llentry * +in6_lltable_lookup(struct lltable *llt, u_int flags, + const struct sockaddr *l3addr) +{ + const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)l3addr; + struct llentry *lle; + + IF_AFDATA_LOCK_ASSERT(llt->llt_ifp); + KASSERTMSG(l3addr->sa_family == AF_INET6, + "sin_family %d", l3addr->sa_family); + + lle = in6_lltable_find_dst(llt, &sin6->sin6_addr); + + if (lle == NULL) + return NULL; + + if (flags & LLE_EXCLUSIVE) + LLE_WLOCK(lle); + else + LLE_RLOCK(lle); + return lle; +} + +static struct lltable * +in6_lltattach(struct ifnet *ifp) +{ + struct lltable *llt; + + llt = lltable_allocate_htbl(IN6_LLTBL_DEFAULT_HSIZE); + llt->llt_af = AF_INET6; + llt->llt_ifp = ifp; + + llt->llt_lookup = in6_lltable_lookup; + llt->llt_create = in6_lltable_create; + llt->llt_delete = in6_lltable_delete; +#if notyet + llt->llt_dump_entry = in6_lltable_dump_entry; +#endif + llt->llt_hash = in6_lltable_hash; + llt->llt_fill_sa_entry = in6_lltable_fill_sa_entry; + llt->llt_free_entry = in6_lltable_free_entry; + llt->llt_match_prefix = in6_lltable_match_prefix; + lltable_link(llt); + + return llt; +} + void * in6_domifattach(struct ifnet *ifp) { @@ -2213,6 +2524,9 @@ in6_domifattach(struct ifnet *ifp) ext->scope6_id = scope6_ifattach(ifp); ext->nprefixes = 0; ext->ndefrouters = 0; + + ext->lltable = in6_lltattach(ifp); + return ext; } @@ -2221,6 +2535,8 @@ in6_domifdetach(struct ifnet *ifp, void *aux) { struct in6_ifextra *ext = (struct in6_ifextra *)aux; + lltable_free(ext->lltable); + ext->lltable = NULL; nd6_ifdetach(ifp, ext); free(ext->in6_ifstat, M_IFADDR); free(ext->icmp6_ifstat, M_IFADDR); diff --git a/sys/netinet6/in6_var.h b/sys/netinet6/in6_var.h index dea8bba..71afbdd 100644 --- a/sys/netinet6/in6_var.h +++ b/sys/netinet6/in6_var.h @@ -89,6 +89,7 @@ struct in6_addrlifetime { u_int32_t ia6t_pltime; /* prefix lifetime */ }; +struct lltable; struct nd_ifinfo; struct in6_ifextra { struct in6_ifstat *in6_ifstat; @@ -97,6 +98,7 @@ struct in6_ifextra { struct scope6_id *scope6_id; int nprefixes; int ndefrouters; + struct lltable *lltable; }; LIST_HEAD(in6_multihead, in6_multi); @@ -700,6 +702,7 @@ void in6_purgemkludge(struct ifnet *); struct in6_ifaddr *in6ifa_ifpforlinklocal(const struct ifnet *, int); struct in6_ifaddr *in6ifa_ifpwithaddr(const struct ifnet *, const struct in6_addr *); +struct in6_ifaddr *in6ifa_ifwithaddr(const struct in6_addr *, uint32_t); char *ip6_sprintf(const struct in6_addr *); int in6_matchlen(struct in6_addr *, struct in6_addr *); int in6_are_prefix_equal(struct in6_addr *, struct in6_addr *, int); @@ -711,6 +714,9 @@ int ip6flow_fastforward(struct mbuf **); /* IPv6 fast forward routine */ int in6_src_ioctl(u_long, void *); int in6_is_addr_deprecated(struct sockaddr_in6 *); struct in6pcb; + +#define LLTABLE6(ifp) (((struct in6_ifextra *)(ifp)->if_afdata[AF_INET6])->lltable) + #endif /* _KERNEL */ #endif /* !_NETINET6_IN6_VAR_H_ */ diff --git a/sys/netinet6/nd6.c b/sys/netinet6/nd6.c index 7a01a9d..19ce45c 100644 --- a/sys/netinet6/nd6.c +++ b/sys/netinet6/nd6.c @@ -59,6 +59,7 @@ __KERNEL_RCSID(0, "$NetBSD: nd6.c,v 1.180 2015/11/19 03:02:10 ozaki-r Exp $"); #include #include +#include #include #include #include @@ -104,10 +105,6 @@ int nd6_debug = 0; /* for debugging? */ static int nd6_inuse, nd6_allocated; -struct llinfo_nd6 llinfo_nd6 = { - .ln_prev = &llinfo_nd6, - .ln_next = &llinfo_nd6, -}; struct nd_drhead nd_defrouter; struct nd_prhead nd_prefix = { 0 }; @@ -124,9 +121,9 @@ static const struct sockaddr_in6 all1_sa = { static void nd6_setmtu0(struct ifnet *, struct nd_ifinfo *); static void nd6_slowtimo(void *); static int regen_tmpaddr(struct in6_ifaddr *); -static struct llinfo_nd6 *nd6_free(struct rtentry *, int); +static void nd6_free(struct rtentry *, struct llentry *, int); static void nd6_llinfo_timer(void *); -static void clear_llinfo_pqueue(struct llinfo_nd6 *); +static void clear_llinfo_pqueue(struct llentry *); callout_t nd6_slowtimo_ch; callout_t nd6_timer_ch; @@ -137,16 +134,6 @@ static int fill_prlist(void *, size_t *, size_t); MALLOC_DEFINE(M_IP6NDP, "NDP", "IPv6 Neighbour Discovery"); -#define LN_DEQUEUE(ln) do { \ - (ln)->ln_next->ln_prev = (ln)->ln_prev; \ - (ln)->ln_prev->ln_next = (ln)->ln_next; \ - } while (/*CONSTCOND*/0) -#define LN_INSERTHEAD(ln) do { \ - (ln)->ln_next = llinfo_nd6.ln_next; \ - llinfo_nd6.ln_next = (ln); \ - (ln)->ln_prev = &llinfo_nd6; \ - (ln)->ln_next->ln_prev = (ln); \ - } while (/*CONSTCOND*/0) void nd6_init(void) { @@ -404,18 +391,18 @@ skip1: * ND6 timer routine to handle ND6 entries */ void -nd6_llinfo_settimer(struct llinfo_nd6 *ln, long xtick) +nd6_llinfo_settimer_locked(struct llentry *ln, long xtick) { - int s; - s = splsoftnet(); + LLE_WLOCK_ASSERT(ln); if (xtick < 0) { ln->ln_expire = 0; ln->ln_ntick = 0; - callout_stop(&ln->ln_timer_ch); + callout_halt(&ln->ln_timer_ch, &ln->lle_lock); } else { ln->ln_expire = time_uptime + xtick / hz; + LLE_ADDREF(ln); if (xtick > INT_MAX) { ln->ln_ntick = xtick - INT_MAX; callout_reset(&ln->ln_timer_ch, INT_MAX, @@ -426,8 +413,15 @@ nd6_llinfo_settimer(struct llinfo_nd6 *ln, long xtick) nd6_llinfo_timer, ln); } } +} - splx(s); +void +nd6_llinfo_settimer(struct llentry *ln, long xtick) +{ + + LLE_WLOCK(ln); + nd6_llinfo_settimer_locked(ln, xtick); + LLE_WUNLOCK(ln); } /* @@ -436,7 +430,7 @@ nd6_llinfo_settimer(struct llinfo_nd6 *ln, long xtick) * Returns pointer to @src (if hold queue is not empty) or NULL. */ static struct in6_addr * -nd6_llinfo_get_holdsrc(struct llinfo_nd6 *ln, struct in6_addr *src) +nd6_llinfo_get_holdsrc(struct llentry *ln, struct in6_addr *src) { struct ip6_hdr *hip6; @@ -459,7 +453,7 @@ nd6_llinfo_get_holdsrc(struct llinfo_nd6 *ln, struct in6_addr *src) static void nd6_llinfo_timer(void *arg) { - struct llinfo_nd6 *ln; + struct llentry *ln = arg; struct rtentry *rt; const struct sockaddr_in6 *dst; struct ifnet *ifp; @@ -470,25 +464,43 @@ nd6_llinfo_timer(void *arg) mutex_enter(softnet_lock); KERNEL_LOCK(1, NULL); - ln = (struct llinfo_nd6 *)arg; - + LLE_WLOCK(ln); if (ln->ln_ntick > 0) { - nd6_llinfo_settimer(ln, ln->ln_ntick); - KERNEL_UNLOCK_ONE(NULL); - mutex_exit(softnet_lock); - return; + nd6_llinfo_settimer_locked(ln, ln->ln_ntick); + goto out; } + if (callout_pending(&ln->la_timer)) { + /* + * Here we are a bit odd here in the treatment of + * active/pending. If the pending bit is set, it got + * rescheduled before I ran. The active + * bit we ignore, since if it was stopped + * in ll_tablefree() and was currently running + * it would have return 0 so the code would + * not have deleted it since the callout could + * not be stopped so we want to go through + * with the delete here now. If the callout + * was restarted, the pending bit will be back on and + * we just want to bail since the callout_reset would + * return 1 and our reference would have been removed + * by nd6_llinfo_settimer_locked above since canceled + * would have been 1. + */ + goto out; + } + + ifp = ln->lle_tbl->llt_ifp; rt = ln->ln_rt; + KASSERT(rt != NULL); - ifp = rt->rt_ifp; KASSERT(ifp != NULL); ndi = ND_IFINFO(ifp); dst = satocsin6(rt_getkey(rt)); /* sanity check */ - if (rt->rt_llinfo && (struct llinfo_nd6 *)rt->rt_llinfo != ln) + if (rt->rt_llinfo && (struct llentry *)rt->rt_llinfo != ln) panic("rt_llinfo(%p) is not equal to ln(%p)", rt->rt_llinfo, ln); if (!dst) @@ -513,7 +525,7 @@ nd6_llinfo_timer(void *arg) ln->ln_hold = m0; clear_llinfo_pqueue(ln); } - (void)nd6_free(rt, 0); + nd6_free(rt, ln, 0); ln = NULL; if (m != NULL) icmp6_error2(m, ICMP6_DST_UNREACH, @@ -523,7 +535,7 @@ nd6_llinfo_timer(void *arg) case ND6_LLINFO_REACHABLE: if (!ND6_LLINFO_PERMANENT(ln)) { ln->ln_state = ND6_LLINFO_STALE; - nd6_llinfo_settimer(ln, (long)nd6_gctimer * hz); + nd6_llinfo_settimer_locked(ln, (long)nd6_gctimer * hz); } break; @@ -531,7 +543,7 @@ nd6_llinfo_timer(void *arg) case ND6_LLINFO_STALE: /* Garbage Collection(RFC 2461 5.3) */ if (!ND6_LLINFO_PERMANENT(ln)) { - (void)nd6_free(rt, 1); + nd6_free(rt, ln, 1); ln = NULL; } break; @@ -545,7 +557,7 @@ nd6_llinfo_timer(void *arg) send_ns = true; } else { ln->ln_state = ND6_LLINFO_STALE; /* XXX */ - nd6_llinfo_settimer(ln, (long)nd6_gctimer * hz); + nd6_llinfo_settimer_locked(ln, (long)nd6_gctimer * hz); } break; case ND6_LLINFO_PROBE: @@ -554,7 +566,7 @@ nd6_llinfo_timer(void *arg) daddr6 = &dst->sin6_addr; send_ns = true; } else { - (void)nd6_free(rt, 0); + nd6_free(rt, ln, 0); ln = NULL; } break; @@ -563,11 +575,16 @@ nd6_llinfo_timer(void *arg) if (send_ns) { struct in6_addr src, *psrc; - nd6_llinfo_settimer(ln, (long)ndi->retrans * hz / 1000); + nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000); psrc = nd6_llinfo_get_holdsrc(ln, &src); + LLE_FREE_LOCKED(ln); + ln = NULL; nd6_ns_output(ifp, daddr6, &dst->sin6_addr, psrc, 0); } +out: + if (ln != NULL) + LLE_FREE_LOCKED(ln); KERNEL_UNLOCK_ONE(NULL); mutex_exit(softnet_lock); } @@ -786,7 +803,6 @@ nd6_accepts_rtadv(const struct nd_ifinfo *ndi) void nd6_purge(struct ifnet *ifp, struct in6_ifextra *ext) { - struct llinfo_nd6 *ln, *nln; struct nd_defrouter *dr, *ndr; struct nd_prefix *pr, *npr; @@ -862,26 +878,15 @@ nd6_purge(struct ifnet *ifp, struct in6_ifextra *ext) } /* - * Nuke neighbor cache entries for the ifp. - * Note that rt->rt_ifp may not be the same as ifp, - * due to KAME goto ours hack. See RTM_RESOLVE case in - * nd6_rtrequest(), and ip6_input(). + * We may not need to nuke the neighbor cache entries here + * because the neighbor cache is kept in if_afdata[AF_INET6]. + * nd6_purge() is invoked by in6_ifdetach() which is called + * from if_detach() where everything gets purged. However + * in6_ifdetach is directly called from vlan(4), so we still + * need to purge entries here. */ - ln = llinfo_nd6.ln_next; - while (ln != NULL && ln != &llinfo_nd6) { - struct rtentry *rt; - const struct sockaddr_dl *sdl; - - nln = ln->ln_next; - rt = ln->ln_rt; - if (rt && rt->rt_gateway && - rt->rt_gateway->sa_family == AF_LINK) { - sdl = satocsdl(rt->rt_gateway); - if (sdl->sdl_index == ifp->if_index) - nln = nd6_free(rt, 0); - } - ln = nln; - } + if (ext->lltable != NULL) + lltable_purge_entries(ext->lltable); } static struct rtentry * @@ -943,8 +948,7 @@ nd6_lookup1(const struct in6_addr *addr6, int create, struct ifnet *ifp, if (rt == NULL) return NULL; if (rt->rt_llinfo) { - struct llinfo_nd6 *ln = - (struct llinfo_nd6 *)rt->rt_llinfo; + struct llentry *ln = rt->rt_llinfo; ln->ln_state = ND6_LLINFO_NOSTATE; } } else @@ -1087,21 +1091,24 @@ nd6_is_addr_neighbor(const struct sockaddr_in6 *addr, struct ifnet *ifp) * make it global, unless you have a strong reason for the change, and are sure * that the change is safe. */ -static struct llinfo_nd6 * -nd6_free(struct rtentry *rt, int gc) +static void +nd6_free(struct rtentry *rt, struct llentry *ln, int gc) { - struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo, *next; struct in6_addr in6 = satocsin6(rt_getkey(rt))->sin6_addr; struct nd_defrouter *dr; int error; + KASSERT(ln != NULL); + KASSERT(ln == rt->rt_llinfo); + LLE_WLOCK_ASSERT(ln); + /* * we used to have pfctlinput(PRC_HOSTDEAD) here. * even though it is not harmful, it was not really necessary. */ /* cancel timer */ - nd6_llinfo_settimer(ln, -1); + nd6_llinfo_settimer_locked(ln, -1); if (!ip6_forwarding) { int s; @@ -1124,12 +1131,13 @@ nd6_free(struct rtentry *rt, int gc) * but we intentionally keep it just in case. */ if (dr->expire > time_uptime) - nd6_llinfo_settimer(ln, + nd6_llinfo_settimer_locked(ln, (dr->expire - time_uptime) * hz); else - nd6_llinfo_settimer(ln, (long)nd6_gctimer * hz); + nd6_llinfo_settimer_locked(ln, + (long)nd6_gctimer * hz); splx(s); - return ln->ln_next; + return; } if (ln->ln_router || dr) { @@ -1173,14 +1181,7 @@ nd6_free(struct rtentry *rt, int gc) splx(s); } - /* - * Before deleting the entry, remember the next entry as the - * return value. We need this because pfxlist_onlink_check() above - * might have freed other entries (particularly the old next entry) as - * a side effect (XXX). - */ - next = ln->ln_next; - + LLE_WUNLOCK(ln); /* * Detach the route from the routing tree and the list of neighbor * caches, and disable the route entry not to be used in already @@ -1191,8 +1192,6 @@ nd6_free(struct rtentry *rt, int gc) if (error != 0) { /* XXX need error message? */; } - - return next; } /* @@ -1203,7 +1202,7 @@ nd6_free(struct rtentry *rt, int gc) void nd6_nud_hint(struct rtentry *rt) { - struct llinfo_nd6 *ln; + struct llentry *ln; if (rt == NULL) return; @@ -1216,7 +1215,7 @@ nd6_nud_hint(struct rtentry *rt) return; } - ln = (struct llinfo_nd6 *)rt->rt_llinfo; + ln = rt->rt_llinfo; if (ln->ln_state < ND6_LLINFO_REACHABLE) return; @@ -1237,14 +1236,54 @@ nd6_nud_hint(struct rtentry *rt) return; } +static int +nd6_purge_entry(struct lltable *llt, struct llentry *ln, void *farg) +{ + int *n = farg; + + if (*n <= 0) + return 0; + + if (ND6_LLINFO_PERMANENT(ln)) + return 0; + + LLE_WLOCK(ln); + if (ln->ln_state > ND6_LLINFO_INCOMPLETE) + ln->ln_state = ND6_LLINFO_STALE; + else + ln->ln_state = ND6_LLINFO_PURGE; + nd6_llinfo_settimer_locked(ln, 0); + LLE_WUNLOCK(ln); + + (*n)--; + return 0; +} + +static void +nd6_gc_neighbors(struct lltable *llt) +{ + int max_gc_entries = 10; + + if (ip6_neighborgcthresh >= 0 && + lltable_get_entry_count(llt) >= ip6_neighborgcthresh) { + /* + * XXX entries that are "less recently used" should be + * freed first. + */ + lltable_foreach_lle(llt, nd6_purge_entry, &max_gc_entries); + } +} + void nd6_rtrequest(int req, struct rtentry *rt, const struct rt_addrinfo *info) { struct sockaddr *gate = rt->rt_gateway; - struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo; + struct llentry *ln; struct ifnet *ifp = rt->rt_ifp; uint8_t namelen = strlen(ifp->if_xname), addrlen = ifp->if_addrlen; struct ifaddr *ifa; + int flags = 0; + bool use_lo0ifp = false; RT_DPRINTF("rt_getkey(rt) = %p\n", rt_getkey(rt)); @@ -1292,6 +1331,10 @@ nd6_rtrequest(int req, struct rtentry *rt, const struct rt_addrinfo *info) return; } + IF_AFDATA_RLOCK(ifp); + ln = lla_lookup(LLTABLE6(ifp), flags, rt_getkey(rt)); + IF_AFDATA_RUNLOCK(ifp); + if (req == RTM_RESOLVE && (nd6_need_cache(ifp) == 0 || /* stf case */ !nd6_is_addr_neighbor(satocsin6(rt_getkey(rt)), ifp))) { @@ -1348,7 +1391,7 @@ nd6_rtrequest(int req, struct rtentry *rt, const struct rt_addrinfo *info) gate = rt->rt_gateway; RT_DPRINTF("rt_getkey(rt) = %p\n", rt_getkey(rt)); if (ln != NULL) - nd6_llinfo_settimer(ln, 0); + nd6_llinfo_settimer_locked(ln, 0); RT_DPRINTF("rt_getkey(rt) = %p\n", rt_getkey(rt)); if ((rt->rt_flags & RTF_CLONING) != 0) break; @@ -1404,23 +1447,53 @@ nd6_rtrequest(int req, struct rtentry *rt, const struct rt_addrinfo *info) if (ln != NULL) break; /* This happens on a route change */ RT_DPRINTF("rt_getkey(rt) = %p\n", rt_getkey(rt)); + + /* Determine to use lo0ifp or not before lla_create */ + ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, + &satocsin6(rt_getkey(rt))->sin6_addr); + RT_DPRINTF("rt_getkey(rt) = %p\n", rt_getkey(rt)); + if (ifa != NULL && nd6_useloopback) + use_lo0ifp = true; + /* * Case 2: This route may come from cloning, or a manual route * add with a LL address. */ - R_Malloc(ln, struct llinfo_nd6 *, sizeof(*ln)); - rt->rt_llinfo = ln; + flags = LLE_EXCLUSIVE; + if ((rt->rt_flags & RTF_CLONED) == 0) + flags |= LLE_IFADDR; + +#define _IFP() (use_lo0ifp ? lo0ifp : ifp) + IF_AFDATA_WLOCK(_IFP()); + ln = lla_create(LLTABLE6(_IFP()), flags, rt_getkey(rt)); + IF_AFDATA_WUNLOCK(_IFP()); + RT_DPRINTF("rt_getkey(rt) = %p\n", rt_getkey(rt)); if (ln == NULL) { log(LOG_DEBUG, "nd6_rtrequest: malloc failed\n"); break; } + RT_DPRINTF("rt_getkey(rt) = %p\n", rt_getkey(rt)); nd6_inuse++; nd6_allocated++; - memset(ln, 0, sizeof(*ln)); ln->ln_rt = rt; - callout_init(&ln->ln_timer_ch, CALLOUT_MPSAFE); + rt->rt_refcnt++; + rt->rt_llinfo = ln; + LLE_ADDREF(ln); + rt->rt_flags |= RTF_LLINFO; + switch (_IFP()->if_type) { +#if NTOKEN > 0 + case IFT_ISO88025: + ln->la_opaque = kmem_alloc(sizeof(struct token_rif), + KM_SLEEP); + break; +#endif /* NTOKEN > 0 */ + default: + break; + } +#undef _IFP + /* this is required for "ndp" command. - shin */ if (req == RTM_ADD) { /* @@ -1436,55 +1509,17 @@ nd6_rtrequest(int req, struct rtentry *rt, const struct rt_addrinfo *info) * initialized in rtrequest(), so rt_expire is 0. */ ln->ln_state = ND6_LLINFO_NOSTATE; - nd6_llinfo_settimer(ln, 0); + nd6_llinfo_settimer_locked(ln, 0); } RT_DPRINTF("rt_getkey(rt) = %p\n", rt_getkey(rt)); - rt->rt_flags |= RTF_LLINFO; - ln->ln_next = llinfo_nd6.ln_next; - llinfo_nd6.ln_next = ln; - ln->ln_prev = &llinfo_nd6; - ln->ln_next->ln_prev = ln; /* - * If we have too many cache entries, initiate immediate - * purging for some "less recently used" entries. Note that - * we cannot directly call nd6_free() here because it would - * cause re-entering rtable related routines triggering an LOR - * problem for FreeBSD. - */ - if (ip6_neighborgcthresh >= 0 && - nd6_inuse >= ip6_neighborgcthresh) { - int i; - - for (i = 0; i < 10 && llinfo_nd6.ln_prev != ln; i++) { - struct llinfo_nd6 *ln_end = llinfo_nd6.ln_prev; - - /* Move this entry to the head */ - LN_DEQUEUE(ln_end); - LN_INSERTHEAD(ln_end); - - if (ND6_LLINFO_PERMANENT(ln_end)) - continue; - - if (ln_end->ln_state > ND6_LLINFO_INCOMPLETE) - ln_end->ln_state = ND6_LLINFO_STALE; - else - ln_end->ln_state = ND6_LLINFO_PURGE; - nd6_llinfo_settimer(ln_end, 0); - } - } - - RT_DPRINTF("rt_getkey(rt) = %p\n", rt_getkey(rt)); - /* * check if rt_getkey(rt) is an address assigned * to the interface. */ - ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, - &satocsin6(rt_getkey(rt))->sin6_addr); - RT_DPRINTF("rt_getkey(rt) = %p\n", rt_getkey(rt)); if (ifa != NULL) { const void *mac; - nd6_llinfo_settimer(ln, -1); + nd6_llinfo_settimer_locked(ln, -1); ln->ln_state = ND6_LLINFO_REACHABLE; ln->ln_byhint = 0; if ((mac = nd6_ifptomac(ifp)) != NULL) { @@ -1516,7 +1551,7 @@ nd6_rtrequest(int req, struct rtentry *rt, const struct rt_addrinfo *info) } rt->rt_flags |= RTF_LOCAL; } else if (rt->rt_flags & RTF_ANNOUNCE) { - nd6_llinfo_settimer(ln, -1); + nd6_llinfo_settimer_locked(ln, -1); ln->ln_state = ND6_LLINFO_REACHABLE; ln->ln_byhint = 0; @@ -1539,6 +1574,14 @@ nd6_rtrequest(int req, struct rtentry *rt, const struct rt_addrinfo *info) } } } + LLE_WUNLOCK(ln); + /* + * If we have too many cache entries, initiate immediate + * purging for some entries. + */ + nd6_gc_neighbors(ln->lle_tbl); + ln = NULL; + break; case RTM_DELETE: @@ -1562,14 +1605,56 @@ nd6_rtrequest(int req, struct rtentry *rt, const struct rt_addrinfo *info) } } nd6_inuse--; - ln->ln_next->ln_prev = ln->ln_prev; - ln->ln_prev->ln_next = ln->ln_next; - ln->ln_prev = NULL; - nd6_llinfo_settimer(ln, -1); - rt->rt_llinfo = 0; + rt->rt_llinfo = NULL; rt->rt_flags &= ~RTF_LLINFO; + + /* Have to do before IF_AFDATA_WLOCK to avoid deadlock */ + callout_halt(&ln->la_timer, &ln->lle_lock); + /* XXX: LOR avoidance. We still have ref on lle. */ + LLE_RUNLOCK(ln); + + IF_AFDATA_WLOCK(ifp); + LLE_WLOCK(ln); + clear_llinfo_pqueue(ln); - Free(ln); + if (ln->la_opaque != NULL) { + switch (ifp->if_type) { +#if NTOKEN > 0 + case IFT_ISO88025: + kmem_free(ln->la_opaque, + sizeof(struct token_rif)); + break; +#endif /* NTOKEN > 0 */ + default: + break; + } + } + + if (ln->la_rt != NULL) { + /* + * Don't rtfree (may actually free objects) here. + * Leave it to rtrequest1. + */ + ln->la_rt->rt_refcnt--; + ln->la_rt = NULL; + } + /* Guard against race with other llentry_free(). */ + if (ln->la_flags & LLE_LINKED) { + LLE_REMREF(ln); + llentry_free(ln); + } else { + LLE_FREE_LOCKED(ln); + } + + IF_AFDATA_WUNLOCK(ifp); + ln = NULL; + } + + if (ln != NULL) { + if (flags & LLE_EXCLUSIVE) + LLE_WUNLOCK(ln); + else + LLE_RUNLOCK(ln); } } @@ -1853,7 +1938,7 @@ nd6_ioctl(u_long cmd, void *data, struct ifnet *ifp) } case SIOCGNBRINFO_IN6: { - struct llinfo_nd6 *ln; + struct llentry *ln; struct in6_addr nb_addr = nbi->addr; /* make local for safety */ struct rtentry *rt; @@ -1868,7 +1953,7 @@ nd6_ioctl(u_long cmd, void *data, struct ifnet *ifp) break; } - ln = (struct llinfo_nd6 *)rt->rt_llinfo; + ln = rt->rt_llinfo; rtfree(rt); if (ln == NULL) { error = EINVAL; @@ -1894,12 +1979,12 @@ nd6_ioctl(u_long cmd, void *data, struct ifnet *ifp) } void -nd6_llinfo_release_pkts(struct llinfo_nd6 *ln, struct ifnet *ifp, +nd6_llinfo_release_pkts(struct llentry *ln, struct ifnet *ifp, struct rtentry *rt) { struct mbuf *m_hold, *m_hold_next; - for (m_hold = ln->ln_hold, ln->ln_hold = NULL; + for (m_hold = ln->la_hold, ln->la_hold = NULL, ln->la_numheld = 0; m_hold != NULL; m_hold = m_hold_next) { m_hold_next = m_hold->m_nextpkt; @@ -1930,7 +2015,7 @@ nd6_cache_lladdr( { struct nd_ifinfo *ndi = ND_IFINFO(ifp); struct rtentry *rt = NULL; - struct llinfo_nd6 *ln = NULL; + struct llentry *ln = NULL; int is_newentry; struct sockaddr_dl *sdl = NULL; int do_update; @@ -1978,11 +2063,13 @@ nd6_cache_lladdr( return; if ((rt->rt_flags & (RTF_GATEWAY | RTF_LLINFO)) != RTF_LLINFO) { fail: - (void)nd6_free(rt, 0); + if (rt->rt_llinfo != NULL) + LLE_WLOCK((struct llentry *)rt->rt_llinfo); + nd6_free(rt, rt->rt_llinfo, 0); rtfree(rt); return; } - ln = (struct llinfo_nd6 *)rt->rt_llinfo; + ln = rt->rt_llinfo; if (ln == NULL) goto fail; if (rt->rt_gateway == NULL) @@ -2280,7 +2367,7 @@ nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0, { struct mbuf *m = m0; struct rtentry *rt = rt0; - struct llinfo_nd6 *ln = NULL; + struct llentry *ln = NULL; int error = 0; #define RTFREE_IF_NEEDED(_rt) \ @@ -2314,7 +2401,7 @@ nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0, /* Look up the neighbor cache for the nexthop */ if (rt != NULL && (rt->rt_flags & RTF_LLINFO) != 0) - ln = (struct llinfo_nd6 *)rt->rt_llinfo; + ln = rt->rt_llinfo; else { /* * Since nd6_is_addr_neighbor() internally calls nd6_lookup(), @@ -2325,7 +2412,7 @@ nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0, RTFREE_IF_NEEDED(rt); rt = nd6_lookup(&dst->sin6_addr, 1, ifp); if (rt != NULL) - ln = (struct llinfo_nd6 *)rt->rt_llinfo; + ln = rt->rt_llinfo; } } if (ln == NULL || rt == NULL) { @@ -2341,14 +2428,6 @@ nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0, goto sendpkt; /* send anyway */ } - /* - * Move this entry to the head of the queue so that it is less likely - * for this entry to be a target of forced garbage collection (see - * nd6_rtrequest()). - */ - LN_DEQUEUE(ln); - LN_INSERTHEAD(ln); - /* We don't have to do link-layer address resolution on a p2p link. */ if ((ifp->if_flags & IFF_POINTOPOINT) != 0 && ln->ln_state < ND6_LLINFO_REACHABLE) { @@ -2540,7 +2619,7 @@ nd6_storelladdr(const struct ifnet *ifp, const struct rtentry *rt, } static void -clear_llinfo_pqueue(struct llinfo_nd6 *ln) +clear_llinfo_pqueue(struct llentry *ln) { struct mbuf *m_hold, *m_hold_next; diff --git a/sys/netinet6/nd6.h b/sys/netinet6/nd6.h index 4d61c95..953667c 100644 --- a/sys/netinet6/nd6.h +++ b/sys/netinet6/nd6.h @@ -36,21 +36,6 @@ #include #include -struct llinfo_nd6 { - struct llinfo_nd6 *ln_next; - struct llinfo_nd6 *ln_prev; - struct rtentry *ln_rt; - struct mbuf *ln_hold; /* last packet until resolved/timeout */ - long ln_asked; /* number of queries already sent for this addr */ - u_long ln_expire; /* lifetime for NDP state transition */ - short ln_state; /* reachability state */ - short ln_router; /* 2^0: ND6 router bit */ - int ln_byhint; /* # of times we made it reachable by UL hint */ - - long ln_ntick; - struct callout ln_timer_ch; -}; - #define ND6_LLINFO_PURGE -3 #define ND6_LLINFO_NOSTATE -2 /* @@ -361,7 +346,6 @@ extern int nd6_mmaxtries; extern int nd6_useloopback; extern int nd6_maxnudhint; extern int nd6_gctimer; -extern struct llinfo_nd6 llinfo_nd6; extern struct nd_drhead nd_defrouter; extern struct nd_prhead nd_prefix; extern int nd6_debug; @@ -403,6 +387,8 @@ union nd_opts { #define nd_opts_last nd_opt_each.last #define nd_opts_done nd_opt_each.done +#include + /* XXX: need nd6_var.h?? */ /* nd6.c */ void nd6_init(void); @@ -414,7 +400,8 @@ struct nd_opt_hdr *nd6_option(union nd_opts *); int nd6_options(union nd_opts *); struct rtentry *nd6_lookup(const struct in6_addr *, int, struct ifnet *); void nd6_setmtu(struct ifnet *); -void nd6_llinfo_settimer(struct llinfo_nd6 *, long); +void nd6_llinfo_settimer(struct llentry *, long); +void nd6_llinfo_settimer_locked(struct llentry *, long); void nd6_timer(void *); void nd6_purge(struct ifnet *, struct in6_ifextra *); void nd6_nud_hint(struct rtentry *); @@ -430,7 +417,7 @@ int nd6_storelladdr(const struct ifnet *, const struct rtentry *, struct mbuf *, const struct sockaddr *, uint8_t *, size_t); int nd6_sysctl(int, void *, size_t *, void *, size_t); int nd6_need_cache(struct ifnet *); -void nd6_llinfo_release_pkts(struct llinfo_nd6 *, struct ifnet *, +void nd6_llinfo_release_pkts(struct llentry *, struct ifnet *, struct rtentry *); /* nd6_nbr.c */ diff --git a/sys/netinet6/nd6_nbr.c b/sys/netinet6/nd6_nbr.c index 00a1cf2..93d0893 100644 --- a/sys/netinet6/nd6_nbr.c +++ b/sys/netinet6/nd6_nbr.c @@ -548,7 +548,7 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) char *lladdr = NULL; int lladdrlen = 0; struct ifaddr *ifa; - struct llinfo_nd6 *ln; + struct llentry *ln; struct rtentry *rt = NULL; struct sockaddr_dl *sdl; union nd_opts ndopts; @@ -652,7 +652,7 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) */ rt = nd6_lookup(&taddr6, 0, ifp); if ((rt == NULL) || - ((ln = (struct llinfo_nd6 *)rt->rt_llinfo) == NULL) || + ((ln = rt->rt_llinfo) == NULL) || ((sdl = satosdl(rt->rt_gateway)) == NULL)) goto freeit; @@ -676,7 +676,7 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) ln->ln_byhint = 0; if (!ND6_LLINFO_PERMANENT(ln)) { nd6_llinfo_settimer(ln, - (long)ND_IFINFO(rt->rt_ifp)->reachable * hz); + (long)ND_IFINFO(ln->lle_tbl->llt_ifp)->reachable * hz); } } else { ln->ln_state = ND6_LLINFO_STALE; diff --git a/sys/netinet6/nd6_rtr.c b/sys/netinet6/nd6_rtr.c index 5498a58..9be31c6 100644 --- a/sys/netinet6/nd6_rtr.c +++ b/sys/netinet6/nd6_rtr.c @@ -109,12 +109,12 @@ static inline bool nd6_is_llinfo_probreach(struct nd_defrouter *dr) { struct rtentry *rt = NULL; - struct llinfo_nd6 *ln = NULL; + struct llentry *ln = NULL; rt = nd6_lookup(&dr->rtaddr, 0, dr->ifp); if (rt == NULL) return false; - ln = (struct llinfo_nd6 *)rt->rt_llinfo; + ln = rt->rt_llinfo; rtfree(rt); if (ln == NULL || !ND6_IS_LLINFO_PROBREACH(ln)) return false;