commit 38063d0e19e0fdd2077952265b6acdf7e5fc9349
Author: Ryota Ozaki <ozaki-r@iij.ad.jp>
Date:   Tue Feb 16 15:35:41 2016 +0900

    Fix race condition on if_link_state_change softint

diff --git a/sys/net/if.c b/sys/net/if.c
index f047c00..edaa1bd 100644
--- a/sys/net/if.c
+++ b/sys/net/if.c
@@ -201,6 +201,7 @@ static int ifconf(u_long, void *);
 static int if_clone_create(const char *);
 static int if_clone_destroy(const char *);
 static void if_link_state_change_si(void *);
+static void if_link_state_change0(struct ifnet *, int, bool);
 
 struct if_percpuq {
 	struct ifnet	*ipq_ifp;
@@ -208,6 +209,15 @@ struct if_percpuq {
 	struct percpu	*ipq_ifqs;	/* struct ifqueue */
 };
 
+struct link_state_change {
+	SIMPLEQ_ENTRY(link_state_change)	lsc_entry;
+	int					lsc_state;
+	bool					lsc_lost;
+};
+
+static pool_cache_t link_state_change_pc __read_mostly;
+static int max_link_state_changes = 10;
+
 static struct mbuf *if_percpuq_dequeue(struct if_percpuq *);
 
 #if defined(INET) || defined(INET6)
@@ -256,6 +266,9 @@ ifinit(void)
 
 	/* interfaces are available, inform socket code */
 	ifioctl = doifioctl;
+
+	link_state_change_pc = pool_cache_init(sizeof(struct link_state_change),
+	    0, 0, 0, "if_link_state_change", NULL, IPL_NET, NULL, NULL, NULL);
 }
 
 /*
@@ -603,7 +616,10 @@ if_initialize(ifnet_t *ifp)
 	ifp->if_broadcastaddr = 0; /* reliably crash if used uninitialized */
 
 	ifp->if_link_state = LINK_STATE_UNKNOWN;
-	ifp->if_old_link_state = LINK_STATE_UNKNOWN;
+	ifp->if_link_state_pending = LINK_STATE_UNKNOWN;
+	ifp->if_link_state_lost = false;
+	ifp->if_link_state_change_count = 0;
+	SIMPLEQ_INIT(&ifp->if_link_state_changes);
 
 	ifp->if_capenable = 0;
 	ifp->if_csum_flags_tx = 0;
@@ -629,8 +645,9 @@ if_initialize(ifnet_t *ifp)
 
 	IF_AFDATA_LOCK_INIT(ifp);
 
-	ifp->if_link_si = softint_establish(SOFTINT_NET, if_link_state_change_si, ifp);
-	if (ifp->if_link_si == NULL)
+	ifp->if_link_state_si = softint_establish(SOFTINT_NET,
+	    if_link_state_change_si, ifp);
+	if (ifp->if_link_state_si == NULL)
 		panic("%s: softint_establish() failed", __func__);
 
 	if_getindex(ifp);
@@ -1051,8 +1068,14 @@ again:
 
 	IF_AFDATA_LOCK_DESTROY(ifp);
 
-	softint_disestablish(ifp->if_link_si);
-	ifp->if_link_si = NULL;
+	softint_disestablish(ifp->if_link_state_si);
+	ifp->if_link_state_si = NULL;
+	while (!SIMPLEQ_EMPTY(&ifp->if_link_state_changes)) {
+		struct link_state_change *lsc;
+		lsc = SIMPLEQ_FIRST(&ifp->if_link_state_changes);
+		SIMPLEQ_REMOVE_HEAD(&ifp->if_link_state_changes, lsc_entry);
+		pool_cache_put(link_state_change_pc, lsc);
+	}
 
 	/*
 	 * remove packets that came from ifp, from software interrupt queues.
@@ -1573,40 +1596,78 @@ if_link_state_change(struct ifnet *ifp, int link_state)
 	int s;
 
 	s = splnet();
-	if (ifp->if_link_state == link_state) {
-		splx(s);
-		return;
+	/*
+	 * Skip redundant change events. If there is no pending events,
+	 * if_link_state_pending equals to the current if_link_state.
+	 * If events are pending, if_link_state_pending indicates
+	 * a state of the latest event.
+	 */
+	if (ifp->if_link_state_pending == link_state)
+		goto out;
+
+	/* ??? */
+	if (ifp->if_link_state_pending != ifp->if_link_state) {
+		struct link_state_change *lsc;
+
+		/* We don't keep too many events to avoid DoS */
+		if (ifp->if_link_state_change_count >
+		    max_link_state_changes) {
+			ifp->if_link_state_lost = true;
+			goto change;
+		}
+
+		lsc = pool_cache_get(link_state_change_pc, PR_NOWAIT);
+		if (lsc == NULL) {
+			ifp->if_link_state_lost = true;
+			goto change;
+		}
+
+		lsc->lsc_state = link_state;
+		lsc->lsc_lost = ifp->if_link_state_lost;
+		SIMPLEQ_INSERT_TAIL(&ifp->if_link_state_changes, lsc,
+		    lsc_entry);
+		ifp->if_link_state_change_count++;
 	}
 
-	ifp->if_link_state = link_state;
-	softint_schedule(ifp->if_link_si);
-
+	ifp->if_link_state_lost = false;
+change:
+	ifp->if_link_state_pending = link_state;
+	softint_schedule(ifp->if_link_state_si);
+out:
 	splx(s);
 }
 
-
 static void
 if_link_state_change_si(void *arg)
 {
 	struct ifnet *ifp = arg;
+	struct link_state_change *lsc;
 	int s;
-	int link_state, old_link_state;
-	struct domain *dp;
 
 	s = splnet();
-	link_state = ifp->if_link_state;
-	old_link_state = ifp->if_old_link_state;
-	ifp->if_old_link_state = ifp->if_link_state;
+	while (!SIMPLEQ_EMPTY(&ifp->if_link_state_changes)) {
+		lsc = SIMPLEQ_FIRST(&ifp->if_link_state_changes);
+		SIMPLEQ_REMOVE_HEAD(&ifp->if_link_state_changes, lsc_entry);
+		if_link_state_change0(ifp, lsc->lsc_state, lsc->lsc_lost);
+		pool_cache_put(link_state_change_pc, lsc);
+		ifp->if_link_state_change_count--;
+	}
 
-#ifdef DEBUG
-	log(LOG_DEBUG, "%s: link state %s (was %s)\n", ifp->if_xname,
-		link_state == LINK_STATE_UP ? "UP" :
-		link_state == LINK_STATE_DOWN ? "DOWN" :
-		"UNKNOWN",
-		 old_link_state == LINK_STATE_UP ? "UP" :
-		old_link_state == LINK_STATE_DOWN ? "DOWN" :
-		"UNKNOWN");
-#endif
+	/* Make sure the last state is reflected */
+	if (ifp->if_link_state_pending != ifp->if_link_state)
+		if_link_state_change0(ifp, ifp->if_link_state_pending,
+		    ifp->if_link_state_lost);
+	KASSERT(ifp->if_link_state == ifp->if_link_state_pending);
+
+	ifp->if_link_state_lost = false;
+	splx(s);
+}
+
+static void
+if_link_state_change0(struct ifnet *ifp, int link_state, bool lost)
+{
+	int old_link_state __debugused;
+	struct domain *dp;
 
 	/*
 	 * When going from UNKNOWN to UP, we need to mark existing
@@ -1617,9 +1678,8 @@ if_link_state_change_si(void *arg)
 	 * listeners would have an address and expect it to work right
 	 * away.
 	 */
-	if (link_state == LINK_STATE_UP &&
-	    old_link_state == LINK_STATE_UNKNOWN)
-	{
+	if (lost || (ifp->if_link_state == LINK_STATE_UP &&
+	    link_state == LINK_STATE_UNKNOWN)) {
 		DOMAIN_FOREACH(dp) {
 			if (dp->dom_if_link_state_change != NULL)
 				dp->dom_if_link_state_change(ifp,
@@ -1627,6 +1687,19 @@ if_link_state_change_si(void *arg)
 		}
 	}
 
+	old_link_state = ifp->if_link_state;
+	ifp->if_link_state = link_state;
+
+#ifdef DEBUG
+	log(LOG_DEBUG, "%s: link state %s (was %s)\n", ifp->if_xname,
+		link_state == LINK_STATE_UP ? "UP" :
+		link_state == LINK_STATE_DOWN ? "DOWN" :
+		"UNKNOWN",
+		 old_link_state == LINK_STATE_UP ? "UP" :
+		old_link_state == LINK_STATE_DOWN ? "DOWN" :
+		"UNKNOWN");
+#endif
+
 	/* Notify that the link state has changed. */
 	rt_ifmsg(ifp);
 
@@ -1639,8 +1712,6 @@ if_link_state_change_si(void *arg)
 		if (dp->dom_if_link_state_change != NULL)
 			dp->dom_if_link_state_change(ifp, link_state);
 	}
-
-	splx(s);
 }
 
 /*
diff --git a/sys/net/if.h b/sys/net/if.h
index c9a0fe6..d39f794 100644
--- a/sys/net/if.h
+++ b/sys/net/if.h
@@ -248,6 +248,7 @@ struct bridge_iflist;
 struct callout;
 struct krwlock;
 struct if_percpuq;
+struct link_state_change;
 
 typedef struct ifnet {
 	void	*if_softc;		/* lower-level data for this if */
@@ -350,8 +351,12 @@ typedef struct ifnet {
 	struct callout *if_slowtimo_ch;
 	struct krwlock	*if_afdata_lock;
 	struct if_percpuq	*if_percpuq; /* We should remove it in the future */
-	void	*if_link_si;		/* softint to handle link state changes */
-	int	if_old_link_state;	/* previous link state */
+	void	*if_link_state_si;	/* softint to handle link state changes */
+	int	if_link_state_pending;	/* latest pending link state */
+	/* events of link state changes to the softint */
+	SIMPLEQ_HEAD(,link_state_change)     if_link_state_changes;
+	int	if_link_state_change_count; /* the number of pending events */
+	bool	if_link_state_lost;	/* if event(s) is lost or not */
 #endif
 } ifnet_t;