diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index dbdc10c..cdd21b6 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -228,6 +228,7 @@ extern void *_binary_splash_image_end; #include #include #include +#include #include @@ -565,6 +566,7 @@ main(void) */ s = splnet(); ifinit(); + lltableinit(); domaininit(true); if_attachdomain(); splx(s); diff --git a/sys/net/files.net b/sys/net/files.net index 436c9b1..de01365 100644 --- a/sys/net/files.net +++ b/sys/net/files.net @@ -22,6 +22,7 @@ file net/if_gif.c gif needs-flag file net/if_gre.c gre needs-flag file net/if_hippisubr.c hippi needs-flag file net/if_ieee1394subr.c ieee1394 +file net/if_llatbl.c ether file net/if_loop.c loop file net/if_media.c net file net/if_mpls.c ifmpls needs-flag diff --git a/sys/net/if.c b/sys/net/if.c index 1c32b5a..0c93043 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -615,6 +615,8 @@ if_initialize(ifnet_t *ifp) (void)pfil_run_hooks(if_pfil, (struct mbuf **)PFIL_IFNET_ATTACH, ifp, PFIL_IFNET); + IF_AFDATA_LOCK_INIT(ifp); + if_getindex(ifp); } diff --git a/sys/net/if.h b/sys/net/if.h index 324479e..44f740f 100644 --- a/sys/net/if.h +++ b/sys/net/if.h @@ -213,6 +213,7 @@ struct ifnet_lock; #include #include #include +#include struct ifnet_lock { kmutex_t il_lock; /* Protects the critical section. */ @@ -244,6 +245,7 @@ TAILQ_HEAD(ifnet_head, ifnet); /* the actual queue head */ struct bridge_softc; struct bridge_iflist; struct callout; +struct krwlock; typedef struct ifnet { void *if_softc; /* lower-level data for this if */ @@ -345,6 +347,7 @@ typedef struct ifnet { #ifdef _KERNEL /* XXX kvm(3) */ struct callout *if_slowtimo_ch; #endif + struct krwlock *if_afdata_lock; } ifnet_t; #define if_mtu if_data.ifi_mtu @@ -434,6 +437,28 @@ typedef struct ifnet { "\23TSO6" \ "\24LRO" \ +#define IF_AFDATA_LOCK_INIT(ifp) \ + do {(ifp)->if_afdata_lock = rw_obj_alloc();} while (0) + +#define IF_AFDATA_WLOCK(ifp) rw_enter((ifp)->if_afdata_lock, RW_WRITER) +#define IF_AFDATA_RLOCK(ifp) rw_enter((ifp)->if_afdata_lock, RW_READER) +#define IF_AFDATA_WUNLOCK(ifp) rw_exit((ifp)->if_afdata_lock) +#define IF_AFDATA_RUNLOCK(ifp) rw_exit((ifp)->if_afdata_lock) +#define IF_AFDATA_LOCK(ifp) IF_AFDATA_WLOCK(ifp) +#define IF_AFDATA_UNLOCK(ifp) IF_AFDATA_WUNLOCK(ifp) +#define IF_AFDATA_TRYLOCK(ifp) rw_tryenter((ifp)->if_afdata_lock, RW_WRITER) +#define IF_AFDATA_DESTROY(ifp) rw_destroy((ifp)->if_afdata_lock) + +#define IF_AFDATA_LOCK_ASSERT(ifp) \ + KASSERT(rw_lock_held((ifp)->if_afdata_lock)) +#define IF_AFDATA_RLOCK_ASSERT(ifp) \ + KASSERT(rw_read_held((ifp)->if_afdata_lock)) +#define IF_AFDATA_WLOCK_ASSERT(ifp) \ + KASSERT(rw_write_held((ifp)->if_afdata_lock)) +#define IF_AFDATA_UNLOCK_ASSERT(ifp) \ + KASSERT(!rw_lock_head((ifp)->if_afdata_lock)) + + #define IFQ_LOCK(_ifq) if ((_ifq)->ifq_lock) mutex_enter((_ifq)->ifq_lock) #define IFQ_UNLOCK(_ifq) if ((_ifq)->ifq_lock) mutex_exit((_ifq)->ifq_lock) diff --git a/sys/net/if_arp.h b/sys/net/if_arp.h index 16fce2d..6174d72 100644 --- a/sys/net/if_arp.h +++ b/sys/net/if_arp.h @@ -127,4 +127,6 @@ struct arpreq { #define ARP_NSTATS 23 +void arp_stat_add(int, uint64_t); + #endif /* !_NET_IF_ARP_H_ */ diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index 93497d1..367a9a25 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -243,7 +243,7 @@ ether_output(struct ifnet * const ifp0, struct mbuf * const m0, else if (m->m_flags & M_MCAST) ETHER_MAP_IP_MULTICAST(&satocsin(dst)->sin_addr, edst); else if (!arpresolve(ifp, rt, m, dst, edst)) - return (0); /* if not yet resolved */ + return 0; /* if not yet resolved */ /* If broadcasting on a simplex interface, loopback a copy */ if ((m->m_flags & M_BCAST) && (ifp->if_flags & IFF_SIMPLEX)) mcopy = m_copy(m, 0, (int)M_COPYALL); diff --git a/sys/net/if_llatbl.c b/sys/net/if_llatbl.c new file mode 100644 index 0000000..e6b60cf --- /dev/null +++ b/sys/net/if_llatbl.c @@ -0,0 +1,755 @@ +/* $NetBSD$ */ +/* + * Copyright (c) 2004 Luigi Rizzo, Alessandro Cerri. All rights reserved. + * Copyright (c) 2004-2008 Qing Li. All rights reserved. + * Copyright (c) 2008 Kip Macy. All rights reserved. + * Copyright (c) 2015 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include + +#ifdef _KERNEL_OPT +#include "opt_ddb.h" +#include "opt_inet.h" +#include "opt_inet6.h" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef DDB +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +static SLIST_HEAD(, lltable) lltables; +krwlock_t lltable_rwlock; + +static void lltable_unlink(struct lltable *llt); +static void llentries_unlink(struct lltable *llt, struct llentries *head); + +static void htable_unlink_entry(struct llentry *lle); +static void htable_link_entry(struct lltable *llt, struct llentry *lle); +static int htable_foreach_lle(struct lltable *llt, llt_foreach_cb_t *f, + void *farg); + +/* + * Dump lle state for a specific address family. + */ +static int +lltable_dump_af(struct lltable *llt, struct sysctl_req *wr) +{ + int error; + + LLTABLE_LOCK_ASSERT(); + + if (llt->llt_ifp->if_flags & IFF_LOOPBACK) + return (0); + error = 0; + + IF_AFDATA_RLOCK(llt->llt_ifp); + error = lltable_foreach_lle(llt, + (llt_foreach_cb_t *)llt->llt_dump_entry, wr); + IF_AFDATA_RUNLOCK(llt->llt_ifp); + + return (error); +} + +/* + * Dump arp state for a specific address family. + */ +int +lltable_sysctl_dumparp(int af, struct sysctl_req *wr) +{ + struct lltable *llt; + int error = 0; + + LLTABLE_RLOCK(); + SLIST_FOREACH(llt, &lltables, llt_link) { + if (llt->llt_af == af) { + error = lltable_dump_af(llt, wr); + if (error != 0) + goto done; + } + } +done: + LLTABLE_RUNLOCK(); + return (error); +} + +/* + * Common function helpers for chained hash table. + */ + +/* + * Runs specified callback for each entry in @llt. + * Caller does the locking. + * + */ +static int +htable_foreach_lle(struct lltable *llt, llt_foreach_cb_t *f, void *farg) +{ + struct llentry *lle, *next; + int i, error; + + error = 0; + + for (i = 0; i < llt->llt_hsize; i++) { + LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) { + error = f(llt, lle, farg); + if (error != 0) + break; + } + } + + return (error); +} + +static void +htable_link_entry(struct lltable *llt, struct llentry *lle) +{ + struct llentries *lleh; + uint32_t hashidx; + + if ((lle->la_flags & LLE_LINKED) != 0) + return; + + IF_AFDATA_WLOCK_ASSERT(llt->llt_ifp); + + hashidx = llt->llt_hash(lle, llt->llt_hsize); + lleh = &llt->lle_head[hashidx]; + + lle->lle_tbl = llt; + lle->lle_head = lleh; + lle->la_flags |= LLE_LINKED; + LIST_INSERT_HEAD(lleh, lle, lle_next); +} + +static void +htable_unlink_entry(struct llentry *lle) +{ + + if ((lle->la_flags & LLE_LINKED) != 0) { + IF_AFDATA_WLOCK_ASSERT(lle->lle_tbl->llt_ifp); + LIST_REMOVE(lle, lle_next); + lle->la_flags &= ~(LLE_VALID | LLE_LINKED); +#if 0 + lle->lle_tbl = NULL; + lle->lle_head = NULL; +#endif + } +} + +struct prefix_match_data { + const struct sockaddr *prefix; + const struct sockaddr *mask; + struct llentries dchain; + u_int flags; +}; + +static int +htable_prefix_free_cb(struct lltable *llt, struct llentry *lle, void *farg) +{ + struct prefix_match_data *pmd; + + pmd = (struct prefix_match_data *)farg; + + if (llt->llt_match_prefix(pmd->prefix, pmd->mask, pmd->flags, lle)) { + LLE_WLOCK(lle); + LIST_INSERT_HEAD(&pmd->dchain, lle, lle_chain); + } + + return (0); +} + +static void +htable_prefix_free(struct lltable *llt, const struct sockaddr *prefix, + const struct sockaddr *mask, u_int flags) +{ + struct llentry *lle, *next; + struct prefix_match_data pmd; + + memset(&pmd, 0, sizeof(pmd)); + pmd.prefix = prefix; + pmd.mask = mask; + pmd.flags = flags; + LIST_INIT(&pmd.dchain); + + IF_AFDATA_WLOCK(llt->llt_ifp); + /* Push matching lles to chain */ + lltable_foreach_lle(llt, htable_prefix_free_cb, &pmd); + + llentries_unlink(llt, &pmd.dchain); + IF_AFDATA_WUNLOCK(llt->llt_ifp); + + LIST_FOREACH_SAFE(lle, &pmd.dchain, lle_chain, next) + llt->llt_free_entry(llt, lle); +} + +static void +htable_free_tbl(struct lltable *llt) +{ + + free(llt->lle_head, M_LLTABLE); + free(llt, M_LLTABLE); +} + +static void +llentries_unlink(struct lltable *llt, struct llentries *head) +{ + struct llentry *lle, *next; + + LIST_FOREACH_SAFE(lle, head, lle_chain, next) + llt->llt_unlink_entry(lle); +} + +/* + * Helper function used to drop all mbufs in hold queue. + * + * Returns the number of held packets, if any, that were dropped. + */ +size_t +lltable_drop_entry_queue(struct llentry *lle) +{ + size_t pkts_dropped; + struct mbuf *next; + + LLE_WLOCK_ASSERT(lle); + + pkts_dropped = 0; + while ((lle->la_numheld > 0) && (lle->la_hold != NULL)) { + next = lle->la_hold->m_nextpkt; + m_freem(lle->la_hold); + lle->la_hold = next; + lle->la_numheld--; + pkts_dropped++; + } + + KASSERTMSG(lle->la_numheld == 0, + "la_numheld %d > 0, pkts_droped %zd", + lle->la_numheld, pkts_dropped); + + return (pkts_dropped); +} + +/* + * Deletes an address from the address table. + * This function is called by the timer functions + * such as arptimer() and nd6_llinfo_timer(), and + * the caller does the locking. + * + * Returns the number of held packets, if any, that were dropped. + */ +size_t +llentry_free(struct llentry *lle) +{ + struct lltable *llt; + size_t pkts_dropped; + + LLE_WLOCK_ASSERT(lle); + + if ((lle->la_flags & LLE_LINKED) != 0) { + llt = lle->lle_tbl; + + IF_AFDATA_WLOCK_ASSERT(llt->llt_ifp); + llt->llt_unlink_entry(lle); + } + + pkts_dropped = lltable_drop_entry_queue(lle); + + LLE_FREE_LOCKED(lle); + + return (pkts_dropped); +} + +/* + * (al)locate an llentry for address dst (equivalent to rtalloc for new-arp). + * + * If found the llentry * is returned referenced and unlocked. + */ +struct llentry * +llentry_alloc(struct ifnet *ifp, struct lltable *lt, + struct sockaddr_storage *dst) +{ + struct llentry *la; + + IF_AFDATA_RLOCK(ifp); + la = lla_lookup(lt, LLE_EXCLUSIVE, (struct sockaddr *)dst); + IF_AFDATA_RUNLOCK(ifp); + if ((la == NULL) && +#ifdef __FreeBSD__ + (ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) == 0) { +#else /* XXX */ + (ifp->if_flags & IFF_NOARP) == 0) { +#endif + IF_AFDATA_WLOCK(ifp); + la = lla_create(lt, 0, (struct sockaddr *)dst); + IF_AFDATA_WUNLOCK(ifp); + } + + if (la != NULL) { + LLE_ADDREF(la); + LLE_WUNLOCK(la); + } + + return (la); +} + +/* + * Free all entries from given table and free itself. + */ + +static int +lltable_free_cb(struct lltable *llt, struct llentry *lle, void *farg) +{ + struct llentries *dchain; + + dchain = (struct llentries *)farg; + + LLE_WLOCK(lle); + LIST_INSERT_HEAD(dchain, lle, lle_chain); + + return (0); +} + +/* + * Free all entries from given table and free itself. + */ +void +lltable_free(struct lltable *llt) +{ + struct llentry *lle, *next; + struct llentries dchain; + + KASSERTMSG(llt != NULL, "llt is NULL"); + + lltable_unlink(llt); + + LIST_INIT(&dchain); + IF_AFDATA_WLOCK(llt->llt_ifp); + /* Push all lles to @dchain */ + lltable_foreach_lle(llt, lltable_free_cb, &dchain); + llentries_unlink(llt, &dchain); + IF_AFDATA_WUNLOCK(llt->llt_ifp); + + LIST_FOREACH_SAFE(lle, &dchain, lle_chain, next) { + if (callout_stop(&lle->la_timer)) + LLE_REMREF(lle); +#if __NetBSD__ + /* XXX should have callback? */ + if (lle->la_rt != NULL) + rtfree(lle->la_rt); +#endif + llentry_free(lle); + } + + llt->llt_free_tbl(llt); +} + +void +lltable_drain(int af) +{ + struct lltable *llt; + struct llentry *lle; + register int i; + + LLTABLE_RLOCK(); + SLIST_FOREACH(llt, &lltables, llt_link) { + if (llt->llt_af != af) + continue; + + for (i=0; i < llt->llt_hsize; i++) { + LIST_FOREACH(lle, &llt->lle_head[i], lle_next) { + LLE_WLOCK(lle); + lltable_drop_entry_queue(lle); + LLE_WUNLOCK(lle); + } + } + } + LLTABLE_RUNLOCK(); +} + +void +lltable_prefix_free(int af, struct sockaddr *prefix, struct sockaddr *mask, + u_int flags) +{ + struct lltable *llt; + + LLTABLE_RLOCK(); + SLIST_FOREACH(llt, &lltables, llt_link) { + if (llt->llt_af != af) + continue; + + llt->llt_prefix_free(llt, prefix, mask, flags); + } + LLTABLE_RUNLOCK(); +} + +struct lltable * +lltable_allocate_htbl(uint32_t hsize) +{ + struct lltable *llt; + int i; + + llt = malloc(sizeof(struct lltable), M_LLTABLE, M_WAITOK | M_ZERO); + llt->llt_hsize = hsize; + llt->lle_head = malloc(sizeof(struct llentries) * hsize, + M_LLTABLE, M_WAITOK | M_ZERO); + + for (i = 0; i < llt->llt_hsize; i++) + LIST_INIT(&llt->lle_head[i]); + + /* Set some default callbacks */ + llt->llt_link_entry = htable_link_entry; + llt->llt_unlink_entry = htable_unlink_entry; + llt->llt_prefix_free = htable_prefix_free; + llt->llt_foreach_entry = htable_foreach_lle; + + llt->llt_free_tbl = htable_free_tbl; + + return (llt); +} + +/* + * Links lltable to global llt list. + */ +void +lltable_link(struct lltable *llt) +{ + + LLTABLE_WLOCK(); + SLIST_INSERT_HEAD(&lltables, llt, llt_link); + LLTABLE_WUNLOCK(); +} + +static void +lltable_unlink(struct lltable *llt) +{ + + LLTABLE_WLOCK(); + SLIST_REMOVE(&lltables, llt, lltable, llt_link); + LLTABLE_WUNLOCK(); + +} + +/* + * External methods used by lltable consumers + */ + +int +lltable_foreach_lle(struct lltable *llt, llt_foreach_cb_t *f, void *farg) +{ + + return (llt->llt_foreach_entry(llt, f, farg)); +} + +void +lltable_link_entry(struct lltable *llt, struct llentry *lle) +{ + + llt->llt_link_entry(llt, lle); +} + +void +lltable_unlink_entry(struct lltable *llt, struct llentry *lle) +{ + + llt->llt_unlink_entry(lle); +} + +void +lltable_fill_sa_entry(const struct llentry *lle, struct sockaddr *sa) +{ + struct lltable *llt; + + llt = lle->lle_tbl; + llt->llt_fill_sa_entry(lle, sa); +} + +struct ifnet * +lltable_get_ifp(const struct lltable *llt) +{ + + return (llt->llt_ifp); +} + +int +lltable_get_af(const struct lltable *llt) +{ + + return (llt->llt_af); +} + +/* + * Called in route_output when rtm_flags contains RTF_LLDATA. + */ +int +lla_rt_output(struct rt_msghdr *rtm, struct rt_addrinfo *info) +{ + const struct sockaddr_dl *dl = satocsdl(info->rti_info[RTAX_GATEWAY]); + const struct sockaddr *dst = info->rti_info[RTAX_DST]; + struct ifnet *ifp; + struct lltable *llt; + struct llentry *lle; + u_int laflags; + int error; + + KASSERTMSG(dl != NULL && dl->sdl_family == AF_LINK, "invalid dl"); + + ifp = if_byindex(dl->sdl_index); + if (ifp == NULL) { + log(LOG_INFO, "%s: invalid ifp (sdl_index %d)\n", + __func__, dl->sdl_index); + return EINVAL; + } + + /* XXX linked list may be too expensive */ + LLTABLE_RLOCK(); + SLIST_FOREACH(llt, &lltables, llt_link) { + if (llt->llt_af == dst->sa_family && + llt->llt_ifp == ifp) + break; + } + LLTABLE_RUNLOCK(); + KASSERTMSG(llt != NULL, "Yep, ugly hacks are bad"); + + error = 0; + + switch (rtm->rtm_type) { + case RTM_ADD: + /* Add static LLE */ + IF_AFDATA_WLOCK(ifp); + lle = lla_create(llt, 0, dst); + if (lle == NULL) { + IF_AFDATA_WUNLOCK(ifp); + return (ENOMEM); + } + + + memcpy(&lle->ll_addr, CLLADDR(dl), ifp->if_addrlen); + if ((rtm->rtm_flags & RTF_ANNOUNCE)) + lle->la_flags |= LLE_PUB; + lle->la_flags |= LLE_VALID; +#ifdef INET6 + /* + * ND6 + */ + if (dst->sa_family == AF_INET6) + lle->ln_state = ND6_LLINFO_REACHABLE; +#endif + /* + * NB: arp and ndp always set (RTF_STATIC | RTF_HOST) + */ + + if (rtm->rtm_rmx.rmx_expire == 0) { + lle->la_flags |= LLE_STATIC; + lle->la_expire = 0; + } else + lle->la_expire = rtm->rtm_rmx.rmx_expire; + laflags = lle->la_flags; + LLE_WUNLOCK(lle); + IF_AFDATA_WUNLOCK(ifp); +#ifdef INET + /* gratuitous ARP */ + if ((laflags & LLE_PUB) && dst->sa_family == AF_INET) + arprequest(ifp, + &((const struct sockaddr_in *)dst)->sin_addr, + &((const struct sockaddr_in *)dst)->sin_addr, + CLLADDR(dl)); +#endif + + break; + + case RTM_DELETE: + IF_AFDATA_WLOCK(ifp); + error = lla_delete(llt, 0, dst); + IF_AFDATA_WUNLOCK(ifp); + return (error == 0 ? 0 : ENOENT); + + default: + error = EINVAL; + } + + return (error); +} + +void +lltableinit(void) +{ + + SLIST_INIT(&lltables); + rw_init(&lltable_rwlock); +} + +#ifdef __FreeBSD__ +#ifdef DDB +struct llentry_sa { + struct llentry base; + struct sockaddr l3_addr; +}; + +static void +llatbl_lle_show(struct llentry_sa *la) +{ + struct llentry *lle; + uint8_t octet[6]; + + lle = &la->base; + db_printf("lle=%p\n", lle); + db_printf(" lle_next=%p\n", lle->lle_next.le_next); + db_printf(" lle_lock=%p\n", &lle->lle_lock); + db_printf(" lle_tbl=%p\n", lle->lle_tbl); + db_printf(" lle_head=%p\n", lle->lle_head); + db_printf(" la_hold=%p\n", lle->la_hold); + db_printf(" la_numheld=%d\n", lle->la_numheld); + db_printf(" la_expire=%ju\n", (uintmax_t)lle->la_expire); + db_printf(" la_flags=0x%04x\n", lle->la_flags); + db_printf(" la_asked=%u\n", lle->la_asked); + db_printf(" la_preempt=%u\n", lle->la_preempt); + db_printf(" ln_byhint=%u\n", lle->ln_byhint); + db_printf(" ln_state=%d\n", lle->ln_state); + db_printf(" ln_router=%u\n", lle->ln_router); + db_printf(" ln_ntick=%ju\n", (uintmax_t)lle->ln_ntick); + db_printf(" lle_refcnt=%d\n", lle->lle_refcnt); + memcopy(octet, &lle->ll_addr.mac16, sizeof(octet)); + db_printf(" ll_addr=%02x:%02x:%02x:%02x:%02x:%02x\n", + octet[0], octet[1], octet[2], octet[3], octet[4], octet[5]); + db_printf(" lle_timer=%p\n", &lle->lle_timer); + + switch (la->l3_addr.sa_family) { +#ifdef INET + case AF_INET: + { + struct sockaddr_in *sin; + char l3s[INET_ADDRSTRLEN]; + + sin = (struct sockaddr_in *)&la->l3_addr; + inet_ntoa_r(sin->sin_addr, l3s); + db_printf(" l3_addr=%s\n", l3s); + break; + } +#endif +#ifdef INET6 + case AF_INET6: + { + struct sockaddr_in6 *sin6; + char l3s[INET6_ADDRSTRLEN]; + + sin6 = (struct sockaddr_in6 *)&la->l3_addr; + ip6_sprintf(l3s, &sin6->sin6_addr); + db_printf(" l3_addr=%s\n", l3s); + break; + } +#endif + default: + db_printf(" l3_addr=N/A (af=%d)\n", la->l3_addr.sa_family); + break; + } +} + +DB_SHOW_COMMAND(llentry, db_show_llentry) +{ + + if (!have_addr) { + db_printf("usage: show llentry \n"); + return; + } + + llatbl_lle_show((struct llentry_sa *)addr); +} + +static void +llatbl_llt_show(struct lltable *llt) +{ + int i; + struct llentry *lle; + + db_printf("llt=%p llt_af=%d llt_ifp=%p\n", + llt, llt->llt_af, llt->llt_ifp); + + for (i = 0; i < llt->llt_hsize; i++) { + LIST_FOREACH(lle, &llt->lle_head[i], lle_next) { + + llatbl_lle_show((struct llentry_sa *)lle); + if (db_pager_quit) + return; + } + } +} + +DB_SHOW_COMMAND(lltable, db_show_lltable) +{ + + if (!have_addr) { + db_printf("usage: show lltable \n"); + return; + } + + llatbl_llt_show((struct lltable *)addr); +} + +DB_SHOW_ALL_COMMAND(lltables, db_show_all_lltables) +{ + VNET_ITERATOR_DECL(vnet_iter); + struct lltable *llt; + + VNET_FOREACH(vnet_iter) { + CURVNET_SET_QUIET(vnet_iter); +#ifdef VIMAGE + db_printf("vnet=%p\n", curvnet); +#endif + SLIST_FOREACH(llt, &lltables, llt_link) { + db_printf("llt=%p llt_af=%d llt_ifp=%p(%s)\n", + llt, llt->llt_af, llt->llt_ifp, + (llt->llt_ifp != NULL) ? + llt->llt_ifp->if_xname : "?"); + if (have_addr && addr != 0) /* verbose */ + llatbl_llt_show(llt); + if (db_pager_quit) { + CURVNET_RESTORE(); + return; + } + } + CURVNET_RESTORE(); + } +} +#endif /* DDB */ +#endif /* __FreeBSD__ */ diff --git a/sys/net/if_llatbl.h b/sys/net/if_llatbl.h new file mode 100644 index 0000000..b897daf --- /dev/null +++ b/sys/net/if_llatbl.h @@ -0,0 +1,280 @@ +/* $NetBSD$ */ +/* + * Copyright (c) 2004 Luigi Rizzo, Alessandro Cerri. All rights reserved. + * Copyright (c) 2004-2008 Qing Li. All rights reserved. + * Copyright (c) 2008 Kip Macy. All rights reserved. + * Copyright (c) 2015 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include + +#ifndef _NET_IF_LLATBL_H_ +#define _NET_IF_LLATBL_H_ + +#include +#include + +struct ifnet; +struct sysctl_req; +struct rt_msghdr; +struct rt_addrinfo; + +struct llentry; +LIST_HEAD(llentries, llentry); + +extern krwlock_t lltable_rwlock; +#define LLTABLE_RLOCK() rw_enter(&lltable_rwlock, RW_READER) +#define LLTABLE_RUNLOCK() rw_exit(&lltable_rwlock) +#define LLTABLE_WLOCK() rw_enter(&lltable_rwlock, RW_WRITER) +#define LLTABLE_WUNLOCK() rw_exit(&lltable_rwlock) +#define LLTABLE_LOCK_ASSERT() KASSERT(rw_lock_held(&lltable_rwlock)) + +/* + * Code referencing llentry must at least hold + * a shared lock + */ +struct llentry { + LIST_ENTRY(llentry) lle_next; + union { + struct in_addr addr4; + struct in6_addr addr6; + } r_l3addr; + union { + uint64_t mac_aligned; + uint16_t mac16[3]; + uint8_t mac8[20]; /* IB needs 20 bytes. */ + } ll_addr; + uint32_t spare0; + uint64_t spare1; + + struct lltable *lle_tbl; + struct llentries *lle_head; + void (*lle_free)(struct llentry *); + struct mbuf *la_hold; + int la_numheld; /* # of packets currently held */ + time_t la_expire; + uint16_t la_flags; + uint16_t la_asked; + uint16_t la_preempt; + uint16_t ln_byhint; + int16_t ln_state; /* IPv6 has ND6_LLINFO_NOSTATE == -2 */ + uint16_t ln_router; + time_t ln_ntick; + int lle_refcnt; + + LIST_ENTRY(llentry) lle_chain; /* chain of deleted items */ + struct callout lle_timer; + krwlock_t lle_lock; + +#ifdef __NetBSD__ +#define la_timer lle_timer + struct rtentry *la_rt; + void *la_opaque; /* For tokenring */ +#endif +}; + + +#if 0 +#define LLE_LOCK_TRACE(n) aprint_normal("%s: " #n " line %d\n", __func__, __LINE__) +#else +#define LLE_LOCK_TRACE(n) +#endif + +#define LLE_WLOCK(lle) do { \ + LLE_LOCK_TRACE(WL); \ + rw_enter(&(lle)->lle_lock, RW_WRITER); \ + } while (0) +#define LLE_RLOCK(lle) do { \ + LLE_LOCK_TRACE(RL); \ + rw_enter(&(lle)->lle_lock, RW_READER); \ + } while (0) +#define LLE_WUNLOCK(lle) do { \ + LLE_LOCK_TRACE(WU); \ + rw_exit(&(lle)->lle_lock); \ + } while (0) +#define LLE_RUNLOCK(lle) do { \ + LLE_LOCK_TRACE(RU); \ + rw_exit(&(lle)->lle_lock); \ + } while (0) +#define LLE_DOWNGRADE(lle) rw_downgrade(&(lle)->lle_lock) +#define LLE_TRY_UPGRADE(lle) rw_tryupgrade(&(lle)->lle_lock) +#ifdef __FreeBSD__ +#define LLE_LOCK_INIT(lle) rw_init_flags(&(lle)->lle_lock, "lle", RW_DUPOK) +#else /* XXX */ +#define LLE_LOCK_INIT(lle) rw_init(&(lle)->lle_lock) +#endif +#define LLE_LOCK_DESTROY(lle) rw_destroy(&(lle)->lle_lock) +#define LLE_WLOCK_ASSERT(lle) KASSERT(rw_write_held(&(lle)->lle_lock)) + +#define LLE_IS_VALID(lle) (((lle) != NULL) && ((lle) != (void *)-1)) + +#define LLE_ADDREF(lle) do { \ + LLE_WLOCK_ASSERT(lle); \ + KASSERTMSG((lle)->lle_refcnt >= 0, \ + "negative refcnt %d on lle %p", \ + (lle)->lle_refcnt, (lle)); \ + (lle)->lle_refcnt++; \ +} while (0) + +#define LLE_REMREF(lle) do { \ + LLE_WLOCK_ASSERT(lle); \ + KASSERTMSG((lle)->lle_refcnt > 0, \ + "bogus refcnt %d on lle %p", \ + (lle)->lle_refcnt, (lle)); \ + (lle)->lle_refcnt--; \ +} while (0) + +#define LLE_FREE_LOCKED(lle) do { \ + if ((lle)->lle_refcnt == 1) \ + (lle)->lle_free(lle); \ + else { \ + LLE_REMREF(lle); \ + LLE_WUNLOCK(lle); \ + } \ + /* guard against invalid refs */ \ + (lle) = NULL; \ +} while (0) + +#define LLE_FREE(lle) do { \ + LLE_WLOCK(lle); \ + LLE_FREE_LOCKED(lle); \ +} while (0) + + +typedef struct llentry *(llt_lookup_t)(struct lltable *, u_int flags, + const struct sockaddr *l3addr); +typedef struct llentry *(llt_create_t)(struct lltable *, u_int flags, + const struct sockaddr *l3addr); +typedef int (llt_delete_t)(struct lltable *, u_int flags, + const struct sockaddr *l3addr); +typedef void (llt_prefix_free_t)(struct lltable *, + const struct sockaddr *prefix, const struct sockaddr *mask, u_int flags); +typedef int (llt_dump_entry_t)(struct lltable *, struct llentry *, + struct sysctl_req *); +typedef uint32_t (llt_hash_t)(const struct llentry *, uint32_t); +typedef int (llt_match_prefix_t)(const struct sockaddr *, + const struct sockaddr *, u_int, struct llentry *); +typedef void (llt_free_entry_t)(struct lltable *, struct llentry *); +typedef void (llt_fill_sa_entry_t)(const struct llentry *, struct sockaddr *); +typedef void (llt_free_tbl_t)(struct lltable *); +typedef void (llt_link_entry_t)(struct lltable *, struct llentry *); +typedef void (llt_unlink_entry_t)(struct llentry *); + +typedef int (llt_foreach_cb_t)(struct lltable *, struct llentry *, void *); +typedef int (llt_foreach_entry_t)(struct lltable *, llt_foreach_cb_t *, void *); + +struct lltable { + SLIST_ENTRY(lltable) llt_link; + int llt_af; + int llt_hsize; + struct llentries *lle_head; + struct ifnet *llt_ifp; + + llt_lookup_t *llt_lookup; + llt_create_t *llt_create; + llt_delete_t *llt_delete; + llt_prefix_free_t *llt_prefix_free; + llt_dump_entry_t *llt_dump_entry; + llt_hash_t *llt_hash; + llt_match_prefix_t *llt_match_prefix; + llt_free_entry_t *llt_free_entry; + llt_foreach_entry_t *llt_foreach_entry; + llt_link_entry_t *llt_link_entry; + llt_unlink_entry_t *llt_unlink_entry; + llt_fill_sa_entry_t *llt_fill_sa_entry; + llt_free_tbl_t *llt_free_tbl; +}; + +MALLOC_DECLARE(M_LLTABLE); + +/* + * LLentry flags + */ +#define LLE_DELETED 0x0001 /* entry must be deleted */ +#define LLE_STATIC 0x0002 /* entry is static */ +#define LLE_IFADDR 0x0004 /* entry is interface addr */ +#define LLE_VALID 0x0008 /* ll_addr is valid */ +#define LLE_PUB 0x0020 /* publish entry ??? */ +#define LLE_LINKED 0x0040 /* linked to lookup structure */ +/* LLE request flags */ +#define LLE_EXCLUSIVE 0x2000 /* return lle xlocked */ + +#define LLATBL_HASH(key, mask) \ + (((((((key >> 8) ^ key) >> 8) ^ key) >> 8) ^ key) & mask) + +void lltableinit(void); + +struct lltable *lltable_allocate_htbl(uint32_t hsize); +void lltable_free(struct lltable *); +void lltable_link(struct lltable *llt); +void lltable_prefix_free(int, struct sockaddr *, + struct sockaddr *, u_int); +void lltable_drain(int); +int lltable_sysctl_dumparp(int, struct sysctl_req *); + +size_t llentry_free(struct llentry *); +struct llentry *llentry_alloc(struct ifnet *, struct lltable *, + struct sockaddr_storage *); + +/* helper functions */ +size_t lltable_drop_entry_queue(struct llentry *); + +struct llentry *lltable_create_lle(struct lltable *llt, u_int flags, + const void *paddr); +void lltable_link_entry(struct lltable *llt, struct llentry *lle); +void lltable_unlink_entry(struct lltable *llt, struct llentry *lle); +void lltable_fill_sa_entry(const struct llentry *lle, struct sockaddr *sa); +struct ifnet *lltable_get_ifp(const struct lltable *llt); +int lltable_get_af(const struct lltable *llt); + +int lltable_foreach_lle(struct lltable *llt, llt_foreach_cb_t *f, + void *farg); +/* + * Generic link layer address lookup function. + */ +static __inline struct llentry * +lla_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr) +{ + + return (llt->llt_lookup(llt, flags, l3addr)); +} + +static __inline struct llentry * +lla_create(struct lltable *llt, u_int flags, const struct sockaddr *l3addr) +{ + + return (llt->llt_create(llt, flags, l3addr)); +} + +static __inline int +lla_delete(struct lltable *llt, u_int flags, const struct sockaddr *l3addr) +{ + + return (llt->llt_delete(llt, flags, l3addr)); +} + + +int lla_rt_output(struct rt_msghdr *, struct rt_addrinfo *); + +#endif /* _NET_IF_LLATBL_H_ */ diff --git a/sys/net/if_tokensubr.c b/sys/net/if_tokensubr.c index 412c92b..717cbd6 100644 --- a/sys/net/if_tokensubr.c +++ b/sys/net/if_tokensubr.c @@ -114,11 +114,12 @@ __KERNEL_RCSID(0, "$NetBSD: if_tokensubr.c,v 1.70 2015/08/24 22:21:26 pooka Exp #include #include -#include -#include -#include #include +#include +#include #include +#include +#include #include @@ -217,9 +218,13 @@ token_output(struct ifnet *ifp0, struct mbuf *m0, const struct sockaddr *dst, * XXX m->m_flags & M_MCAST IEEE802_MAP_IP_MULTICAST ?? */ else { + struct llentry *la; if (!arpresolve(ifp, rt, m, dst, edst)) return (0); /* if not yet resolved */ - rif = TOKEN_RIF((struct llinfo_arp *) rt->rt_llinfo); + la = rt->rt_llinfo; + KASSERT(la != NULL); + KASSERT(la->la_opaque != NULL); + rif = la->la_opaque; riflen = (ntohs(rif->tr_rcf) & TOKEN_RCF_LEN_MASK) >> 8; } /* If broadcasting on a simplex interface, loopback a copy. */ diff --git a/sys/net/route.h b/sys/net/route.h index 2ae0501..d49c59c 100644 --- a/sys/net/route.h +++ b/sys/net/route.h @@ -148,6 +148,7 @@ struct ortentry { #define RTF_CLONING 0x100 /* generate new routes on use */ #define RTF_XRESOLVE 0x200 /* external daemon resolves name */ #define RTF_LLINFO 0x400 /* generated by ARP or NDP */ +#define RTF_LLDATA 0x400 /* used by apps to add/del L2 entries */ #define RTF_STATIC 0x800 /* manually added */ #define RTF_BLACKHOLE 0x1000 /* just discard pkts (during updates) */ #define RTF_CLONED 0x2000 /* this is a cloned route */ diff --git a/sys/netinet/if_arp.c b/sys/netinet/if_arp.c index 859c146..7c30f2a 100644 --- a/sys/netinet/if_arp.c +++ b/sys/netinet/if_arp.c @@ -98,6 +98,7 @@ __KERNEL_RCSID(0, "$NetBSD: if_arp.c,v 1.173 2015/08/24 22:21:26 pooka Exp $"); #include #include #include +#include #include #include @@ -105,6 +106,7 @@ __KERNEL_RCSID(0, "$NetBSD: if_arp.c,v 1.173 2015/08/24 22:21:26 pooka Exp $"); #include #include #include +#include #include #include #include @@ -139,10 +141,9 @@ __KERNEL_RCSID(0, "$NetBSD: if_arp.c,v 1.173 2015/08/24 22:21:26 pooka Exp $"); #define ETHERTYPE_IPTRAILERS ETHERTYPE_TRAIL /* timer values */ -static int arpt_prune = (5*60*1); /* walk list every 5 minutes */ static int arpt_keep = (20*60); /* once resolved, good for 20 more minutes */ static int arpt_down = 20; /* once declared down, don't send for 20 secs */ -static int arpt_refresh = (5*60); /* time left before refreshing */ +static int arp_maxhold = 1; /* number of packets to hold per ARP entry */ #define rt_expire rt_rmx.rmx_expire #define rt_pksent rt_rmx.rmx_pksent @@ -158,12 +159,10 @@ static void arp_init(void); static struct sockaddr *arp_setgate(struct rtentry *, struct sockaddr *, const struct sockaddr *); -static void arptfree(struct llinfo_arp *); +static void arptfree(struct llentry *); static void arptimer(void *); -static struct llinfo_arp *arplookup1(struct mbuf *, const struct in_addr *, - int, int, struct rtentry *); -static struct llinfo_arp *arplookup(struct mbuf *, const struct in_addr *, - int, int); +static struct llentry *arplookup(struct ifnet *, struct mbuf *, + const struct in_addr *, int, int, int, struct rtentry *); static void in_arpinput(struct mbuf *); static void in_revarpinput(struct mbuf *); static void revarprequest(struct ifnet *); @@ -175,7 +174,6 @@ static void arp_dad_start(struct ifaddr *); static void arp_dad_stop(struct ifaddr *); static void arp_dad_duplicated(struct ifaddr *); -LIST_HEAD(llinfo_arpq, llinfo_arp) llinfo_arp; struct ifqueue arpintrq = { .ifq_head = NULL, .ifq_tail = NULL, @@ -186,7 +184,6 @@ struct ifqueue arpintrq = { static int arp_inuse, arp_allocated; static int arp_maxtries = 5; static int useloopback = 1; /* use loopback interface for local traffic */ -static int arpinit_done = 0; static percpu_t *arpstat_percpu; @@ -196,8 +193,6 @@ static percpu_t *arpstat_percpu; #define ARP_STATINC(x) _NET_STATINC(arpstat_percpu, x) #define ARP_STATADD(x, v) _NET_STATADD(arpstat_percpu, x, v) -struct callout arptimer_ch; - /* revarp state */ static struct in_addr myip, srv_ip; static int myip_initialized = 0; @@ -287,75 +282,6 @@ struct domain arpdomain = { .dom_protoswNPROTOSW = &arpsw[__arraycount(arpsw)], }; -/* - * ARP table locking. - * - * to prevent lossage vs. the arp_drain routine (which may be called at - * any time, including in a device driver context), we do two things: - * - * 1) manipulation of la->la_hold is done at splnet() (for all of - * about two instructions). - * - * 2) manipulation of the arp table's linked list is done under the - * protection of the ARP_LOCK; if arp_drain() or arptimer is called - * while the arp table is locked, we punt and try again later. - */ - -static int arp_locked; -static inline int arp_lock_try(int); -static inline void arp_unlock(void); - -static inline int -arp_lock_try(int recurse) -{ - int s; - - /* - * Use splvm() -- we're blocking things that would cause - * mbuf allocation. - */ - s = splvm(); - if (!recurse && arp_locked) { - splx(s); - return 0; - } - arp_locked++; - splx(s); - return 1; -} - -static inline void -arp_unlock(void) -{ - int s; - - s = splvm(); - arp_locked--; - splx(s); -} - -#ifdef DIAGNOSTIC -#define ARP_LOCK(recurse) \ -do { \ - if (arp_lock_try(recurse) == 0) { \ - printf("%s:%d: arp already locked\n", __FILE__, __LINE__); \ - panic("arp_lock"); \ - } \ -} while (/*CONSTCOND*/ 0) -#define ARP_LOCK_CHECK() \ -do { \ - if (arp_locked == 0) { \ - printf("%s:%d: arp lock not held\n", __FILE__, __LINE__); \ - panic("arp lock check"); \ - } \ -} while (/*CONSTCOND*/ 0) -#else -#define ARP_LOCK(x) (void) arp_lock_try(x) -#define ARP_LOCK_CHECK() /* nothing */ -#endif - -#define ARP_UNLOCK() arp_unlock() - static void sysctl_net_inet_arp_setup(struct sysctllog **); void @@ -380,74 +306,71 @@ arp_drainstub(void) void arp_drain(void) { - struct llinfo_arp *la; - int count = 0; - struct mbuf *mold; - KERNEL_LOCK(1, NULL); - - if (arp_lock_try(0) == 0) { - KERNEL_UNLOCK_ONE(NULL); - return; - } - - LIST_FOREACH(la, &llinfo_arp, la_list) { - mold = la->la_hold; - la->la_hold = NULL; - - if (mold) { - m_freem(mold); - count++; - } - } - ARP_UNLOCK(); - ARP_STATADD(ARP_STAT_DFRDROPPED, count); - KERNEL_UNLOCK_ONE(NULL); + lltable_drain(AF_INET); } - -/* - * Timeout routine. Age arp_tab entries periodically. - */ -/* ARGSUSED */ static void arptimer(void *arg) { - struct llinfo_arp *la, *nla; + struct llentry *lle = arg; + struct ifnet *ifp; mutex_enter(softnet_lock); - KERNEL_LOCK(1, NULL); - if (arp_lock_try(0) == 0) { - /* get it later.. */ - KERNEL_UNLOCK_ONE(NULL); - mutex_exit(softnet_lock); - return; + if (lle == NULL) + goto out; + + if (lle->la_flags & LLE_STATIC) + goto out; + + LLE_WLOCK(lle); + if (callout_pending(&lle->la_timer)) { + /* + * Here we are a bit odd here in the treatment of + * active/pending. If the pending bit is set, it got + * rescheduled before I ran. The active + * bit we ignore, since if it was stopped + * in ll_tablefree() and was currently running + * it would have return 0 so the code would + * not have deleted it since the callout could + * not be stopped so we want to go through + * with the delete here now. If the callout + * was restarted, the pending bit will be back on and + * we just want to bail since the callout_reset would + * return 1 and our reference would have been removed + * by arpresolve() below. + */ + LLE_WUNLOCK(lle); + goto out; } + ifp = lle->lle_tbl->llt_ifp; + + callout_stop(&lle->la_timer); + + /* XXX: LOR avoidance. We still have ref on lle. */ + LLE_WUNLOCK(lle); + + /* We have to call this w/o lock */ + arptfree(lle); + + IF_AFDATA_LOCK(ifp); + LLE_WLOCK(lle); + + /* Guard against race with other llentry_free(). */ + if (lle->la_flags & LLE_LINKED) { + size_t pkts_dropped; - callout_reset(&arptimer_ch, arpt_prune * hz, arptimer, NULL); - LIST_FOREACH_SAFE(la, &llinfo_arp, la_list, nla) { - struct rtentry *rt = la->la_rt; - - if (rt->rt_expire == 0) - continue; - if ((rt->rt_expire - time_uptime) < arpt_refresh && - rt->rt_pksent > (time_uptime - arpt_keep)) { - /* - * If the entry has been used during since last - * refresh, try to renew it before deleting. - */ - arprequest(rt->rt_ifp, - &satocsin(rt->rt_ifa->ifa_addr)->sin_addr, - &satocsin(rt_getkey(rt))->sin_addr, - CLLADDR(rt->rt_ifp->if_sadl)); - } else if (rt->rt_expire <= time_uptime) - arptfree(la); /* timer has expired; clear */ + LLE_REMREF(lle); + pkts_dropped = llentry_free(lle); + ARP_STATADD(ARP_STAT_DFRDROPPED, pkts_dropped); + } else { + LLE_FREE_LOCKED(lle); } - ARP_UNLOCK(); + IF_AFDATA_UNLOCK(ifp); - KERNEL_UNLOCK_ONE(NULL); +out: mutex_exit(softnet_lock); } @@ -498,19 +421,11 @@ void arp_rtrequest(int req, struct rtentry *rt, const struct rt_addrinfo *info) { struct sockaddr *gate = rt->rt_gateway; - struct llinfo_arp *la = (struct llinfo_arp *)rt->rt_llinfo; - size_t allocsize; - struct mbuf *mold; - int s; + struct llentry *la = NULL; struct in_ifaddr *ia; struct ifaddr *ifa; struct ifnet *ifp = rt->rt_ifp; - - if (!arpinit_done) { - arpinit_done = 1; - callout_init(&arptimer_ch, CALLOUT_MPSAFE); - callout_reset(&arptimer_ch, hz, arptimer, NULL); - } + int flags = 0; if (req == RTM_LLINFO_UPD) { struct in_addr *in; @@ -565,7 +480,9 @@ arp_rtrequest(int req, struct rtentry *rt, const struct rt_addrinfo *info) return; } - ARP_LOCK(1); /* we may already be locked here. */ + IF_AFDATA_RLOCK(ifp); + la = lla_lookup(LLTABLE(ifp), flags, rt_getkey(rt)); + IF_AFDATA_RUNLOCK(ifp); switch (req) { case RTM_SETGATE: @@ -660,26 +577,38 @@ arp_rtrequest(int req, struct rtentry *rt, const struct rt_addrinfo *info) * Case 2: This route may come from cloning, or a manual route * add with a LL address. */ + flags = LLE_EXCLUSIVE; + if ((rt->rt_flags & RTF_CLONED) == 0) + flags |= LLE_IFADDR; + + IF_AFDATA_WLOCK(ifp); + la = lla_create(LLTABLE(ifp), flags, rt_getkey(rt)); + IF_AFDATA_WUNLOCK(ifp); + + if (la == NULL) { + log(LOG_DEBUG, "%s: lla_create failed\n", + __func__); + rt->rt_llinfo = NULL; + break; + } + rt->rt_llinfo = la; switch (ifp->if_type) { #if NTOKEN > 0 case IFT_ISO88025: - allocsize = sizeof(*la) + sizeof(struct token_rif); + la->la_opaque = kmem_alloc(sizeof(struct token_rif), + KM_SLEEP); break; #endif /* NTOKEN > 0 */ default: - allocsize = sizeof(*la); - } - R_Malloc(la, struct llinfo_arp *, allocsize); - rt->rt_llinfo = (void *)la; - if (la == NULL) { - log(LOG_DEBUG, "arp_rtrequest: malloc failed\n"); break; } - arp_inuse++, arp_allocated++; - memset(la, 0, allocsize); la->la_rt = rt; + rt->rt_refcnt++; rt->rt_flags |= RTF_LLINFO; - LIST_INSERT_HEAD(&llinfo_arp, la, la_list); + arp_inuse++, arp_allocated++; + + LLE_WUNLOCK(la); + la = NULL; INADDR_TO_IA(satocsin(rt_getkey(rt))->sin_addr, ia); while (ia && ia->ia_ifp != ifp) @@ -726,21 +655,54 @@ arp_rtrequest(int req, struct rtentry *rt, const struct rt_addrinfo *info) if (la == NULL) break; arp_inuse--; - LIST_REMOVE(la, la_list); rt->rt_llinfo = NULL; rt->rt_flags &= ~RTF_LLINFO; - s = splnet(); - mold = la->la_hold; - la->la_hold = NULL; - splx(s); + LLE_RUNLOCK(la); - if (mold) - m_freem(mold); + flags |= LLE_EXCLUSIVE; + IF_AFDATA_WLOCK(ifp); - Free((void *)la); + la = lla_lookup(LLTABLE(ifp), flags, rt_getkey(rt)); + /* This shouldn't happen */ + if (la == NULL) { + IF_AFDATA_WUNLOCK(ifp); + break; + } + + if (la->la_opaque != NULL) { + switch (ifp->if_type) { +#if NTOKEN > 0 + case IFT_ISO88025: + kmem_free(la->la_opaque, + sizeof(struct token_rif)); + break; +#endif /* NTOKEN > 0 */ + default: + break; + } + } + + if (la->la_rt != NULL) { + /* + * Don't rtfree (may actually free objects) here. + * Leave it to rtrequest1. + */ + la->la_rt->rt_refcnt--; + la->la_rt = NULL; + } + llentry_free(la); + + IF_AFDATA_WUNLOCK(ifp); + la = NULL; + } + + if (la != NULL) { + if (flags & LLE_EXCLUSIVE) + LLE_WUNLOCK(la); + else + LLE_RUNLOCK(la); } - ARP_UNLOCK(); } /* @@ -759,6 +721,10 @@ arprequest(struct ifnet *ifp, struct sockaddr sa; uint64_t *arps; + KASSERT(sip != NULL); + KASSERT(tip != NULL); + KASSERT(enaddr != NULL); + if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL) return; MCLAIM(m, &arpdomain.dom_mowner); @@ -816,12 +782,15 @@ int arpresolve(struct ifnet *ifp, struct rtentry *rt, struct mbuf *m, const struct sockaddr *dst, u_char *desten) { - struct llinfo_arp *la; + struct llentry *la; const struct sockaddr_dl *sdl; - struct mbuf *mold; - int s; + int renew; + int flags = 0; + int error; + bool create; - if ((la = arplookup1(m, &satocsin(dst)->sin_addr, 1, 0, rt)) != NULL) + la = arplookup(ifp, m, &satocsin(dst)->sin_addr, 1, 0, 0, rt); + if (la != NULL) rt = la->la_rt; if (la == NULL || rt == NULL) { @@ -830,6 +799,8 @@ arpresolve(struct ifnet *ifp, struct rtentry *rt, struct mbuf *m, "arpresolve: can't allocate llinfo on %s for %s\n", ifp->if_xname, in_fmtaddr(satocsin(dst)->sin_addr)); m_freem(m); + if (la != NULL) + LLE_RUNLOCK(la); return 0; } sdl = satocsdl(rt->rt_gateway); @@ -842,24 +813,9 @@ arpresolve(struct ifnet *ifp, struct rtentry *rt, struct mbuf *m, memcpy(desten, CLLADDR(sdl), min(sdl->sdl_alen, ifp->if_addrlen)); rt->rt_pksent = time_uptime; /* Time for last pkt sent */ + LLE_RUNLOCK(la); return 1; } - /* - * There is an arptab entry, but no ethernet address - * response yet. Replace the held mbuf with this - * latest one. - */ - - ARP_STATINC(ARP_STAT_DFRTOTAL); - s = splnet(); - mold = la->la_hold; - la->la_hold = m; - splx(s); - - if (mold) { - ARP_STATINC(ARP_STAT_DFRDROPPED); - m_freem(mold); - } /* * Re-send the ARP request when appropriate. @@ -872,27 +828,158 @@ arpresolve(struct ifnet *ifp, struct rtentry *rt, struct mbuf *m, rt->rt_expire = time_uptime; } #endif - if (rt->rt_expire) { - rt->rt_flags &= ~RTF_REJECT; - if (la->la_asked == 0 || rt->rt_expire != time_uptime) { - rt->rt_expire = time_uptime; - if (la->la_asked++ < arp_maxtries) { - arprequest(ifp, - &satocsin(rt->rt_ifa->ifa_addr)->sin_addr, - &satocsin(dst)->sin_addr, + +retry: + create = false; + if (la == NULL) { + IF_AFDATA_RLOCK(ifp); + la = lla_lookup(LLTABLE(ifp), flags, dst); + IF_AFDATA_RUNLOCK(ifp); + } + + if ((la == NULL) && ((flags & LLE_EXCLUSIVE) == 0) +#ifdef __FreeBSD__ + && ((ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) == 0)) { +#else + && ((ifp->if_flags & IFF_NOARP) == 0)) { +#endif + create = true; + flags |= LLE_EXCLUSIVE; + IF_AFDATA_WLOCK(ifp); + la = lla_create(LLTABLE(ifp), flags, dst); + IF_AFDATA_WUNLOCK(ifp); + } + + if (la == NULL) { + if (create) { + log(LOG_DEBUG, + "%s: failed to create llentry for %s on %s\n", + __func__, inet_ntoa(satocsin(dst)->sin_addr), + ifp->if_xname); + } + m_freem(m); + return 0; + } + + if ((la->la_flags & LLE_VALID) && + ((la->la_flags & LLE_STATIC) || la->la_expire > time_uptime)) { + memcpy(desten, CLLADDR(sdl), + min(sdl->sdl_alen, ifp->if_addrlen)); + renew = 0; + /* + * If entry has an expiry time and it is approaching, + * see if we need to send an ARP request within this + * arpt_down interval. + */ + if (!(la->la_flags & LLE_STATIC) && + time_uptime + la->la_preempt > la->la_expire) { + renew = 1; + la->la_preempt--; + } + + if (flags & LLE_EXCLUSIVE) + LLE_WUNLOCK(la); + else + LLE_RUNLOCK(la); + + if (renew == 1) { + const u_int8_t *enaddr = #if NCARP > 0 - (rt->rt_ifp->if_type == IFT_CARP) ? - CLLADDR(rt->rt_ifp->if_sadl): + (rt->rt_ifp->if_type == IFT_CARP) ? + CLLADDR(rt->rt_ifp->if_sadl): #endif - CLLADDR(ifp->if_sadl)); - } else { - rt->rt_flags |= RTF_REJECT; - rt->rt_expire += arpt_down; - la->la_asked = 0; + CLLADDR(ifp->if_sadl); + arprequest(ifp, &satocsin(rt->rt_ifa->ifa_addr)->sin_addr, + &satocsin(dst)->sin_addr, enaddr); + } + + return 1; + } + + if (la->la_flags & LLE_STATIC) { /* should not happen! */ + log(LOG_DEBUG, "arpresolve: ouch, empty static llinfo for %s\n", + inet_ntoa(satocsin(dst)->sin_addr)); + m_freem(m); + error = EINVAL; + goto done; + } + + renew = (la->la_asked == 0 || la->la_expire != time_uptime); + if ((renew || m != NULL) && (flags & LLE_EXCLUSIVE) == 0) { + flags |= LLE_EXCLUSIVE; + LLE_RUNLOCK(la); + la = NULL; + goto retry; + } + /* + * There is an arptab entry, but no ethernet address + * response yet. Add the mbuf to the list, dropping + * the oldest packet if we have exceeded the system + * setting. + */ + if (m != NULL) { + LLE_WLOCK_ASSERT(la); + if (la->la_numheld >= arp_maxhold) { + if (la->la_hold != NULL) { + struct mbuf *next = la->la_hold->m_nextpkt; + m_freem(la->la_hold); + la->la_hold = next; + la->la_numheld--; + ARP_STATINC(ARP_STAT_DFRDROPPED); } } + if (la->la_hold != NULL) { + struct mbuf *curr = la->la_hold; + while (curr->m_nextpkt != NULL) + curr = curr->m_nextpkt; + curr->m_nextpkt = m; + } else + la->la_hold = m; + la->la_numheld++; + if (renew == 0 && (flags & LLE_EXCLUSIVE)) { + flags &= ~LLE_EXCLUSIVE; + LLE_DOWNGRADE(la); + } + } - return 0; + /* + * Return EWOULDBLOCK if we have tried less than arp_maxtries. It + * will be masked by ether_output(). Return EHOSTDOWN/EHOSTUNREACH + * if we have already sent arp_maxtries ARP requests. Retransmit the + * ARP request, but not faster than one request per second. + */ + if (la->la_asked < arp_maxtries) + error = EWOULDBLOCK; /* First request. */ + else + error = (rt->rt_flags & RTF_GATEWAY) ? + EHOSTUNREACH : EHOSTDOWN; + + if (renew) { + const u_int8_t *enaddr = +#if NCARP > 0 + (rt->rt_ifp->if_type == IFT_CARP) ? + CLLADDR(rt->rt_ifp->if_sadl): +#endif + CLLADDR(ifp->if_sadl); + LLE_ADDREF(la); + la->la_expire = time_uptime; + callout_reset(&la->la_timer, hz * arpt_down, + arptimer, la); + la->la_asked++; + if (flags & LLE_EXCLUSIVE) + LLE_WUNLOCK(la); + else + LLE_RUNLOCK(la); + arprequest(ifp, &satocsin(rt->rt_ifa->ifa_addr)->sin_addr, + &satocsin(dst)->sin_addr, enaddr); + return error == 0; + } +done: + if (flags & LLE_EXCLUSIVE) + LLE_WUNLOCK(la); + else + LLE_RUNLOCK(la); + return error == 0; } /* @@ -976,8 +1063,8 @@ in_arpinput(struct mbuf *m) { struct arphdr *ah; struct ifnet *ifp = m->m_pkthdr.rcvif; - struct llinfo_arp *la = NULL; - struct rtentry *rt; + struct llentry *la = NULL; + struct rtentry *rt = NULL; struct in_ifaddr *ia; #if NBRIDGE > 0 struct in_ifaddr *bridge_ia = NULL; @@ -985,13 +1072,11 @@ in_arpinput(struct mbuf *m) #if NCARP > 0 u_int32_t count = 0, index = 0; #endif - struct sockaddr_dl *sdl; + struct sockaddr_dl *sdl = NULL; struct sockaddr sa; struct in_addr isaddr, itaddr, myaddr; int op; - struct mbuf *mold; void *tha; - int s; uint64_t *arps; if (__predict_false(m_makewritable(&m, 0, m->m_pkthdr.len, M_DONTWAIT))) @@ -1142,8 +1227,14 @@ in_arpinput(struct mbuf *m) itaddr = myaddr; goto reply; } - la = arplookup(m, &isaddr, in_hosteq(itaddr, myaddr), 0); - if (la != NULL && (rt = la->la_rt) && (sdl = satosdl(rt->rt_gateway))) { + + la = arplookup(ifp, m, &isaddr, in_hosteq(itaddr, myaddr), 0, 1, NULL); + if (la != NULL) { + rt = la->la_rt; + if (rt != NULL) + sdl = satosdl(rt->rt_gateway); + } + if (sdl != NULL) { if (sdl->sdl_alen && memcmp(ar_sha(ah), CLLADDR(sdl), sdl->sdl_alen)) { if (rt->rt_flags & RTF_STATIC) { @@ -1226,22 +1317,43 @@ in_arpinput(struct mbuf *m) #endif /* NTOKEN > 0 */ (void)sockaddr_dl_setaddr(sdl, sdl->sdl_len, ar_sha(ah), ah->ar_hln); - if (rt->rt_expire) + if (rt->rt_expire) { rt->rt_expire = time_uptime + arpt_keep; + + KASSERT((la->la_flags & LLE_STATIC) == 0); + LLE_ADDREF(la); + callout_reset(&la->la_timer, hz * arpt_keep, arptimer, la); + } rt->rt_flags &= ~RTF_REJECT; la->la_asked = 0; - s = splnet(); - mold = la->la_hold; - la->la_hold = NULL; - splx(s); + if (la->la_hold != NULL) { + int n = la->la_numheld; + struct mbuf *m_hold, *m_hold_next; - if (mold) { - ARP_STATINC(ARP_STAT_DFRSENT); - (*ifp->if_output)(ifp, mold, rt_getkey(rt), rt); - } + m_hold = la->la_hold; + la->la_hold = NULL; + la->la_numheld = 0; + /* + * We have to unlock here because if_output would call + * arpresolve + */ + LLE_WUNLOCK(la); + ARP_STATADD(ARP_STAT_DFRSENT, n); + for (; m_hold != NULL; m_hold = m_hold_next) { + m_hold_next = m_hold->m_nextpkt; + m_hold->m_nextpkt = NULL; + (*ifp->if_output)(ifp, m_hold, rt_getkey(rt), rt); + } + } else + LLE_WUNLOCK(la); + la = NULL; } reply: + if (la != NULL) { + LLE_WUNLOCK(la); + la = NULL; + } if (op != ARPOP_REQUEST) { if (op == ARPOP_REPLY) ARP_STATINC(ARP_STAT_RCVREPLY); @@ -1258,10 +1370,12 @@ reply: memcpy(tha, ar_sha(ah), ah->ar_hln); memcpy(ar_sha(ah), CLLADDR(ifp->if_sadl), ah->ar_hln); } else { - la = arplookup(m, &itaddr, 0, SIN_PROXY); + la = arplookup(ifp, m, &itaddr, 0, SIN_PROXY, 0, NULL); if (la == NULL) goto out; rt = la->la_rt; + LLE_RUNLOCK(la); + la = NULL; if (rt->rt_ifp->if_type == IFT_CARP && m->m_pkthdr.rcvif->if_type != IFT_CARP) goto out; @@ -1300,47 +1414,38 @@ reply: ARP_STAT_PUTREF(); (*ifp->if_output)(ifp, m, &sa, NULL); return; + out: + if (la != NULL) + LLE_WUNLOCK(la); m_freem(m); } /* * Free an arp entry. */ -static void arptfree(struct llinfo_arp *la) +static void arptfree(struct llentry *la) { struct rtentry *rt = la->la_rt; - struct sockaddr_dl *sdl; - ARP_LOCK_CHECK(); + KASSERT(rt != NULL); - if (rt == NULL) - panic("arptfree"); - if (rt->rt_refcnt > 0 && (sdl = satosdl(rt->rt_gateway)) && - sdl->sdl_family == AF_LINK) { - sdl->sdl_alen = 0; - la->la_asked = 0; - rt->rt_flags &= ~RTF_REJECT; - return; + if (la->la_rt != NULL) { + rtfree(la->la_rt); + la->la_rt = NULL; } + rtrequest(RTM_DELETE, rt_getkey(rt), NULL, rt_mask(rt), 0, NULL); } -static struct llinfo_arp * -arplookup(struct mbuf *m, const struct in_addr *addr, int create, int proxy) -{ - return arplookup1(m, addr, create, proxy, NULL); -} - /* * Lookup or enter a new address in arptab. */ -static struct llinfo_arp * -arplookup1(struct mbuf *m, const struct in_addr *addr, int create, int proxy, - struct rtentry *rt0) +static struct llentry * +arplookup(struct ifnet *ifp, struct mbuf *m, const struct in_addr *addr, + int create, int proxy, int wlock, struct rtentry *rt0) { struct arphdr *ah; - struct ifnet *ifp = m->m_pkthdr.rcvif; struct rtentry *rt; struct sockaddr_inarp sin; const char *why = NULL; @@ -1364,8 +1469,22 @@ arplookup1(struct mbuf *m, const struct in_addr *addr, int create, int proxy, (__rt)->rt_gateway->sa_family == AF_LINK) - if (IS_LLINFO(rt)) - return (struct llinfo_arp *)rt->rt_llinfo; + if (IS_LLINFO(rt)) { + struct llentry *la; + int flags = wlock ? LLE_EXCLUSIVE : 0; + + if (create) { + IF_AFDATA_WLOCK(ifp); + la = lla_create(LLTABLE(ifp), flags, rt_getkey(rt)); + IF_AFDATA_WUNLOCK(ifp); + } else { + IF_AFDATA_RLOCK(ifp); + la = lla_lookup(LLTABLE(ifp), flags, rt_getkey(rt)); + IF_AFDATA_RUNLOCK(ifp); + } + + return la; + } if (create) { if (rt->rt_flags & RTF_GATEWAY) { @@ -1382,7 +1501,7 @@ arplookup1(struct mbuf *m, const struct in_addr *addr, int create, int proxy, lla_snprintf(ar_sha(ah), ah->ar_hln), (ifp) ? ifp->if_xname : "null", why); } - if (rt->rt_refcnt <= 0 && (rt->rt_flags & RTF_CLONED) != 0) { + if ((rt->rt_flags & RTF_CLONED) != 0) { rtrequest(RTM_DELETE, rt_getkey(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL); } @@ -1997,6 +2116,12 @@ db_show_arptab(db_expr_t addr, bool have_addr, } #endif +void +arp_stat_add(int type, uint64_t count) +{ + ARP_STATADD(type, count); +} + static int sysctl_net_inet_arp_stats(SYSCTLFN_ARGS) { @@ -2023,13 +2148,6 @@ sysctl_net_inet_arp_setup(struct sysctllog **clog) sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, - CTLTYPE_INT, "prune", - SYSCTL_DESCR("ARP cache pruning interval in seconds"), - NULL, 0, &arpt_prune, 0, - CTL_NET,PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL); - - sysctl_createv(clog, 0, NULL, NULL, - CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "keep", SYSCTL_DESCR("Valid ARP entry lifetime in seconds"), NULL, 0, &arpt_keep, 0, @@ -2043,13 +2161,6 @@ sysctl_net_inet_arp_setup(struct sysctllog **clog) CTL_NET,PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, - CTLFLAG_PERMANENT|CTLFLAG_READWRITE, - CTLTYPE_INT, "refresh", - SYSCTL_DESCR("ARP entry refresh interval"), - NULL, 0, &arpt_refresh, 0, - CTL_NET,PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL); - - sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_STRUCT, "stats", SYSCTL_DESCR("ARP statistics"), diff --git a/sys/netinet/in.c b/sys/netinet/in.c index 6a7d277..10b3e06 100644 --- a/sys/netinet/in.c +++ b/sys/netinet/in.c @@ -113,6 +113,7 @@ __KERNEL_RCSID(0, "$NetBSD: in.c,v 1.157 2015/08/24 22:21:26 pooka Exp $"); #include #include #include +#include #include @@ -120,7 +121,11 @@ __KERNEL_RCSID(0, "$NetBSD: in.c,v 1.157 2015/08/24 22:21:26 pooka Exp $"); #include #include +#include #include +#include +#include +#include #include #include @@ -129,6 +134,7 @@ __KERNEL_RCSID(0, "$NetBSD: in.c,v 1.157 2015/08/24 22:21:26 pooka Exp $"); #include #include #include +#include #include #include #include @@ -1529,6 +1535,323 @@ in_selectsrc(struct sockaddr_in *sin, struct route *ro, return satosin(&ia->ia_addr); } +struct in_llentry { + struct llentry base; +}; + +#define IN_LLTBL_DEFAULT_HSIZE 32 +#define IN_LLTBL_HASH(k, h) \ + (((((((k >> 8) ^ k) >> 8) ^ k) >> 8) ^ k) & ((h) - 1)) + +/* + * Do actual deallocation of @lle. + * Called by LLE_FREE_LOCKED when number of references + * drops to zero. + */ +static void +in_lltable_destroy_lle(struct llentry *lle) +{ + + LLE_WUNLOCK(lle); + LLE_LOCK_DESTROY(lle); + kmem_intr_free(lle, sizeof(*lle)); +} + +static struct llentry * +in_lltable_new(struct in_addr addr4, u_int flags) +{ + struct in_llentry *lle; + + lle = kmem_intr_zalloc(sizeof(*lle), KM_NOSLEEP); + if (lle == NULL) /* NB: caller generates msg */ + return NULL; + + /* + * For IPv4 this will trigger "arpresolve" to generate + * an ARP request. + */ + lle->base.la_expire = time_uptime; /* mark expired */ + lle->base.r_l3addr.addr4 = addr4; + lle->base.lle_refcnt = 1; + lle->base.lle_free = in_lltable_destroy_lle; + LLE_LOCK_INIT(&lle->base); + callout_init(&lle->base.la_timer, CALLOUT_MPSAFE); + + return (&lle->base); +} + +#define IN_ARE_MASKED_ADDR_EQUAL(d, a, m) ( \ + (((ntohl((d).s_addr) ^ (a)->sin_addr.s_addr) & (m)->sin_addr.s_addr)) == 0 ) + +static int +in_lltable_match_prefix(const struct sockaddr *prefix, + const struct sockaddr *mask, u_int flags, struct llentry *lle) +{ + const struct sockaddr_in *pfx = (const struct sockaddr_in *)prefix; + const struct sockaddr_in *msk = (const struct sockaddr_in *)mask; + + /* + * (flags & LLE_STATIC) means deleting all entries + * including static ARP entries. + */ + if (IN_ARE_MASKED_ADDR_EQUAL(lle->r_l3addr.addr4, pfx, msk) && + ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC))) + return (1); + + return (0); +} + +static void +in_lltable_free_entry(struct lltable *llt, struct llentry *lle) +{ + struct ifnet *ifp; + size_t pkts_dropped; + + LLE_WLOCK_ASSERT(lle); + KASSERT(llt != NULL); + + /* Unlink entry from table if not already */ + if ((lle->la_flags & LLE_LINKED) != 0) { + ifp = llt->llt_ifp; + IF_AFDATA_WLOCK_ASSERT(ifp); + lltable_unlink_entry(llt, lle); + } + + /* cancel timer */ + if (callout_stop(&lle->lle_timer)) + LLE_REMREF(lle); + + /* Drop hold queue */ + pkts_dropped = llentry_free(lle); + arp_stat_add(ARP_STAT_DFRDROPPED, (uint64_t)pkts_dropped); +} + +static int +in_lltable_rtcheck(struct ifnet *ifp, u_int flags, const struct sockaddr *l3addr) +{ + struct rtentry *rt; + int error = EINVAL; + + KASSERTMSG(l3addr->sa_family == AF_INET, + "sin_family %d", l3addr->sa_family); + + rt = rtalloc1(l3addr, 0); + if (rt == NULL) + return error; + + /* + * If the gateway for an existing host route matches the target L3 + * address, which is a special route inserted by some implementation + * such as MANET, and the interface is of the correct type, then + * allow for ARP to proceed. + */ + if (rt->rt_flags & RTF_GATEWAY) { + if (!(rt->rt_flags & RTF_HOST) || !rt->rt_ifp || + rt->rt_ifp->if_type != IFT_ETHER || +#ifdef __FreeBSD__ + (rt->rt_ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) != 0 || +#else + (rt->rt_ifp->if_flags & IFF_NOARP) != 0 || +#endif + memcmp(rt->rt_gateway->sa_data, l3addr->sa_data, + sizeof(in_addr_t)) != 0) { + goto error; + } + } + + /* + * Make sure that at least the destination address is covered + * by the route. This is for handling the case where 2 or more + * interfaces have the same prefix. An incoming packet arrives + * on one interface and the corresponding outgoing packet leaves + * another interface. + */ + if (!(rt->rt_flags & RTF_HOST) && rt->rt_ifp != ifp) { + const char *sa, *mask, *addr, *lim; + int len; + + mask = (const char *)rt_mask(rt); + /* + * Just being extra cautious to avoid some custom + * code getting into trouble. + */ + if (mask == NULL) + goto error; + + sa = (const char *)rt_getkey(rt); + addr = (const char *)l3addr; + len = ((const struct sockaddr_in *)l3addr)->sin_len; + lim = addr + len; + + for ( ; addr < lim; sa++, mask++, addr++) { + if ((*sa ^ *addr) & *mask) { +#ifdef DIAGNOSTIC + log(LOG_INFO, "IPv4 address: \"%s\" is not on the network\n", + inet_ntoa(((const struct sockaddr_in *)l3addr)->sin_addr)); +#endif + goto error; + } + } + } + + error = 0; +error: + return error; +} + +static inline uint32_t +in_lltable_hash_dst(const struct in_addr dst, uint32_t hsize) +{ + + return (IN_LLTBL_HASH(dst.s_addr, hsize)); +} + +static uint32_t +in_lltable_hash(const struct llentry *lle, uint32_t hsize) +{ + + return (in_lltable_hash_dst(lle->r_l3addr.addr4, hsize)); +} + +static void +in_lltable_fill_sa_entry(const struct llentry *lle, struct sockaddr *sa) +{ + struct sockaddr_in *sin; + + sin = (struct sockaddr_in *)sa; + memset(sin, 0, sizeof(*sin)); + sin->sin_family = AF_INET; + sin->sin_len = sizeof(*sin); + sin->sin_addr = lle->r_l3addr.addr4; +} + +static inline struct llentry * +in_lltable_find_dst(struct lltable *llt, struct in_addr dst) +{ + struct llentry *lle; + struct llentries *lleh; + u_int hashidx; + + hashidx = in_lltable_hash_dst(dst, llt->llt_hsize); + lleh = &llt->lle_head[hashidx]; + LIST_FOREACH(lle, lleh, lle_next) { + if (lle->la_flags & LLE_DELETED) + continue; + if (lle->r_l3addr.addr4.s_addr == dst.s_addr) + break; + } + + return (lle); +} + +static int +in_lltable_delete(struct lltable *llt, u_int flags, + const struct sockaddr *l3addr) +{ + const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr; + struct ifnet *ifp = llt->llt_ifp; + struct llentry *lle; + + IF_AFDATA_WLOCK_ASSERT(ifp); + KASSERTMSG(l3addr->sa_family == AF_INET, + "sin_family %d", l3addr->sa_family); + + lle = in_lltable_find_dst(llt, sin->sin_addr); + if (lle == NULL) { +#ifdef DIAGNOSTIC + log(LOG_INFO, "interface address is missing from cache = %p in delete\n", lle); +#endif + return (ENOENT); + } + + if (!(lle->la_flags & LLE_IFADDR) || (flags & LLE_IFADDR)) { + LLE_WLOCK(lle); + lle->la_flags |= LLE_DELETED; +#ifdef DIAGNOSTIC + log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle); +#endif + if ((lle->la_flags & (LLE_STATIC | LLE_IFADDR)) == LLE_STATIC) + llentry_free(lle); + else + LLE_WUNLOCK(lle); + } + + return (0); +} + +static struct llentry * +in_lltable_create(struct lltable *llt, u_int flags, const struct sockaddr *l3addr) +{ + const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr; + struct ifnet *ifp = llt->llt_ifp; + struct llentry *lle; + + IF_AFDATA_WLOCK_ASSERT(ifp); + KASSERTMSG(l3addr->sa_family == AF_INET, + "sin_family %d", l3addr->sa_family); + + lle = in_lltable_find_dst(llt, sin->sin_addr); + + if (lle != NULL) { + LLE_WLOCK(lle); + return (lle); + } + + /* no existing record, we need to create new one */ + + /* + * A route that covers the given address must have + * been installed 1st because we are doing a resolution, + * verify this. + */ + if (!(flags & LLE_IFADDR) && + in_lltable_rtcheck(ifp, flags, l3addr) != 0) + return (NULL); + + lle = in_lltable_new(sin->sin_addr, flags); + if (lle == NULL) { + log(LOG_INFO, "lla_lookup: new lle malloc failed\n"); + return (NULL); + } + lle->la_flags = flags; + if ((flags & LLE_IFADDR) == LLE_IFADDR) { + memcpy(&lle->ll_addr, CLLADDR(ifp->if_sadl), ifp->if_addrlen); + lle->la_flags |= (LLE_VALID | LLE_STATIC); + } + + lltable_link_entry(llt, lle); + LLE_WLOCK(lle); + + return (lle); +} + +/* + * Return NULL if not found or marked for deletion. + * If found return lle read locked. + */ +static struct llentry * +in_lltable_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr) +{ + const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr; + struct llentry *lle; + + IF_AFDATA_LOCK_ASSERT(llt->llt_ifp); + KASSERTMSG(l3addr->sa_family == AF_INET, + "sin_family %d", l3addr->sa_family); + + lle = in_lltable_find_dst(llt, sin->sin_addr); + + if (lle == NULL) + return NULL; + + if (flags & LLE_EXCLUSIVE) + LLE_WLOCK(lle); + else + LLE_RLOCK(lle); + + return lle; +} + static void in_sysctl_init(struct sysctllog **clog) { @@ -1561,3 +1884,57 @@ in_sysctl_init(struct sysctllog **clog) CTL_NET, PF_INET, IPPROTO_IP, IPCTL_HOSTZEROBROADCAST, CTL_EOL); } + +static struct lltable * +in_lltattach(struct ifnet *ifp) +{ + struct lltable *llt; + + llt = lltable_allocate_htbl(IN_LLTBL_DEFAULT_HSIZE); + llt->llt_af = AF_INET; + llt->llt_ifp = ifp; + + llt->llt_lookup = in_lltable_lookup; + llt->llt_create = in_lltable_create; + llt->llt_delete = in_lltable_delete; +#if 0 + llt->llt_dump_entry = in_lltable_dump_entry; +#endif + llt->llt_hash = in_lltable_hash; + llt->llt_fill_sa_entry = in_lltable_fill_sa_entry; + llt->llt_free_entry = in_lltable_free_entry; + llt->llt_match_prefix = in_lltable_match_prefix; + lltable_link(llt); + + return (llt); +} + +void * +in_domifattach(struct ifnet *ifp) +{ + struct in_ifinfo *ii; + + ii = kmem_zalloc(sizeof(struct in_ifinfo), KM_SLEEP); + KASSERT(ii != NULL); + + ii->ii_llt = in_lltattach(ifp); + +#ifdef IPSELSRC + ii->ii_selsrc = in_selsrc_domifattach(ifp); + KASSERT(ii->ii_selsrc != NULL); +#endif + + return ii; +} + +void +in_domifdetach(struct ifnet *ifp, void *aux) +{ + struct in_ifinfo *ii = aux; + +#ifdef IPSELSRC + in_selsrc_domifdetach(ifp, ii->ii_selsrc); +#endif + lltable_free(ii->ii_llt); + kmem_free(ii, sizeof(struct in_ifinfo)); +} diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c index 9f4e406..b8c8842 100644 --- a/sys/netinet/in_proto.c +++ b/sys/netinet/in_proto.c @@ -388,13 +388,8 @@ struct domain inetdomain = { .dom_maxrtkey = sizeof(struct ip_pack4), .dom_if_up = in_if_up, .dom_if_down = in_if_down, -#ifdef IPSELSRC .dom_ifattach = in_domifattach, .dom_ifdetach = in_domifdetach, -#else - .dom_ifattach = NULL, - .dom_ifdetach = NULL, -#endif .dom_if_link_state_change = in_if_link_state_change, .dom_ifqueues = { NULL, NULL }, .dom_link = { NULL }, diff --git a/sys/netinet/in_selsrc.c b/sys/netinet/in_selsrc.c index d95513b..9dacd7d 100644 --- a/sys/netinet/in_selsrc.c +++ b/sys/netinet/in_selsrc.c @@ -311,7 +311,8 @@ in_getifa(struct ifaddr *ifa, const struct sockaddr *dst0) } ifp = ifa->ifa_ifp; - isc = (struct in_ifsysctl *)ifp->if_afdata[AF_INET]; + KASSERT(ifp->if_afdata[AF_INET] != NULL); + isc = ifp->if_afdata[AF_INET]->ii_selsrc; if (isc != NULL && isc->isc_selsrc != NULL && isc->isc_selsrc->iss_score_src[0] != NULL) iss = isc->isc_selsrc; @@ -544,7 +545,7 @@ err: } void * -in_domifattach(struct ifnet *ifp) +in_selsrc_domifattach(struct ifnet *ifp) { struct in_ifsysctl *isc; struct in_ifselsrc *iss; @@ -572,7 +573,7 @@ err: } void -in_domifdetach(struct ifnet *ifp, void *aux) +in_selsrc_domifdetach(struct ifnet *ifp, void *aux) { struct in_ifsysctl *isc; struct in_ifselsrc *iss; diff --git a/sys/netinet/in_selsrc.h b/sys/netinet/in_selsrc.h index 1fec73e..bac9377 100644 --- a/sys/netinet/in_selsrc.h +++ b/sys/netinet/in_selsrc.h @@ -27,4 +27,7 @@ enum in_category { struct ifaddr *in_getifa(struct ifaddr *, const struct sockaddr *); +void *in_selsrc_domifattach(struct ifnet *ifp); +void in_selsrc_domifdetach(struct ifnet *ifp, void *aux); + #endif /* _NETINET_IN_SELSRC_H */ diff --git a/sys/netinet/in_var.h b/sys/netinet/in_var.h index 221ca8d..09b2651 100644 --- a/sys/netinet/in_var.h +++ b/sys/netinet/in_var.h @@ -204,7 +204,17 @@ extern const int inetctlerrmap[]; } \ (ia) = ifatoia(ifa); \ } -#endif + +#include +/* + * IPv4 per-interface state. + */ +struct in_ifinfo { + struct lltable *ii_llt; /* ARP state */ + struct in_ifsysctl *ii_selsrc; +}; + +#endif /* _KERNEL */ /* * Internet multicast address structure. There is one of these for each IP @@ -313,6 +323,9 @@ ip_newid(const struct in_ifaddr *ia) int sysctl_inpcblist(SYSCTLFN_PROTO); #endif +#define LLTABLE(ifp) \ + ((struct in_ifinfo *)(ifp)->if_afdata[AF_INET])->ii_llt + #endif /* !_KERNEL */ /* INET6 stuff */ diff --git a/sys/rump/librump/rumpkern/Makefile.rumpkern b/sys/rump/librump/rumpkern/Makefile.rumpkern index ba00948..40c68bc 100644 --- a/sys/rump/librump/rumpkern/Makefile.rumpkern +++ b/sys/rump/librump/rumpkern/Makefile.rumpkern @@ -113,6 +113,7 @@ SRCS+= init_sysctl_base.c \ subr_pool.c \ subr_prf.c \ subr_pserialize.c \ + kern_rwlock_obj.c \ subr_specificdata.c \ subr_time.c \ subr_vmem.c \ diff --git a/sys/rump/librump/rumpkern/rump.c b/sys/rump/librump/rumpkern/rump.c index 85dde63..8875773 100644 --- a/sys/rump/librump/rumpkern/rump.c +++ b/sys/rump/librump/rumpkern/rump.c @@ -315,6 +315,7 @@ rump_init(void) uao_init(); mutex_obj_init(); + rw_obj_init(); callout_startup(); kprintf_init(); diff --git a/sys/rump/net/lib/libnet/Makefile b/sys/rump/net/lib/libnet/Makefile index c1ec81c..f1f85937 100644 --- a/sys/rump/net/lib/libnet/Makefile +++ b/sys/rump/net/lib/libnet/Makefile @@ -10,6 +10,7 @@ IOCONF= NET.ioconf SRCS= if.c if_loop.c route.c rtsock.c raw_usrreq.c \ raw_cb.c if_media.c link_proto.c net_stats.c if_ethersubr.c SRCS+= if_43.c pfil.c +SRCS+= if_llatbl.c SRCS+= net_component.c .include diff --git a/sys/rump/net/lib/libnet/net_component.c b/sys/rump/net/lib/libnet/net_component.c index 03f8179..780383b 100644 --- a/sys/rump/net/lib/libnet/net_component.c +++ b/sys/rump/net/lib/libnet/net_component.c @@ -35,6 +35,7 @@ __KERNEL_RCSID(0, "$NetBSD: net_component.c,v 1.3 2015/04/23 07:55:24 pooka Exp #include #include +#include #include #include "rump_private.h" @@ -45,6 +46,7 @@ RUMP_COMPONENT(RUMP_COMPONENT_NET) ifinit1(); ifinit(); + lltableinit(); } RUMP_COMPONENT(RUMP_COMPONENT_NET_ROUTE)