diff --git a/sys/arch/amd64/conf/ALL b/sys/arch/amd64/conf/ALL
index 1bc3036..c79b457 100644
--- a/sys/arch/amd64/conf/ALL
+++ b/sys/arch/amd64/conf/ALL
@@ -125,6 +125,7 @@ options 	SYSCALL_STATS	# per syscall counts
 options 	SYSCALL_TIMES	# per syscall times
 options 	SYSCALL_TIMES_HASCOUNTER	# use 'broken' rdtsc (soekris)
 options 	KDTRACE_HOOKS	# kernel DTrace hooks
+options 	PSREF_DEBUG	# debug passive references
 
 # Compatibility options
 #options 	COMPAT_NOMID	# NetBSD 0.8, 386BSD, and BSDI
diff --git a/sys/arch/i386/conf/ALL b/sys/arch/i386/conf/ALL
index 3524f8e..0faca2a 100644
--- a/sys/arch/i386/conf/ALL
+++ b/sys/arch/i386/conf/ALL
@@ -125,6 +125,7 @@ options 	SYSCALL_STATS	# per syscall counts
 options 	SYSCALL_TIMES	# per syscall times
 options 	SYSCALL_TIMES_HASCOUNTER	# use 'broken' rdtsc (soekris)
 options 	KDTRACE_HOOKS	# kernel DTrace hooks
+options 	PSREF_DEBUG	# debug passive references
 
 # Compatibility options
 options 	COMPAT_NOMID	# NetBSD 0.8, 386BSD, and BSDI
diff --git a/sys/conf/files b/sys/conf/files
index ca3092f..d7c495c 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -284,6 +284,7 @@ defparam opt_kgdb.h		KGDB_DEV KGDB_DEVNAME KGDB_DEVPORT
 defflag				LOCKDEBUG
 defflag				SYSCALL_DEBUG
 defflag	opt_kstack.h		KSTACK_CHECK_MAGIC
+defflag	opt_psref.h		PSREF_DEBUG
 
 # memory (ram) disk options
 #
diff --git a/sys/kern/files.kern b/sys/kern/files.kern
index 4ff6a76..b21f43a 100644
--- a/sys/kern/files.kern
+++ b/sys/kern/files.kern
@@ -125,6 +125,7 @@ file	kern/subr_pool.c		kern
 file	kern/subr_prf.c			kern
 file	kern/subr_prof.c		kern
 file	kern/subr_pserialize.c		kern
+file	kern/subr_psref.c		kern
 file	kern/subr_specificdata.c	kern
 file	kern/subr_tftproot.c		tftproot
 file	kern/subr_time.c		kern
diff --git a/sys/kern/kern_softint.c b/sys/kern/kern_softint.c
index 782540f..75f67d6 100644
--- a/sys/kern/kern_softint.c
+++ b/sys/kern/kern_softint.c
@@ -442,8 +442,8 @@ softint_disestablish(void *arg)
 			KASSERT(sh->sh_func != NULL);
 			flags |= sh->sh_flags;
 		}
-		/* Neither pending nor active on all CPUs? */
-		if ((flags & (SOFTINT_PENDING | SOFTINT_ACTIVE)) == 0) {
+		/* Inactive on all CPUs? */
+		if ((flags & SOFTINT_ACTIVE) == 0) {
 			break;
 		}
 		/* Oops, still active.  Wait for it to clear. */
diff --git a/sys/kern/subr_psref.c b/sys/kern/subr_psref.c
new file mode 100644
index 0000000..51c14eb
--- /dev/null
+++ b/sys/kern/subr_psref.c
@@ -0,0 +1,513 @@
+/*	$NetBSD$	*/
+
+/*-
+ * Copyright (c) 2016 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Taylor R. Campbell.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Passive references
+ *
+ *	Passive references are references to objects that guarantee the
+ *	object will not be destroyed until the reference is released.
+ *
+ *	Passive references require no interprocessor synchronization to
+ *	acquire or release.  However, destroying the target of passive
+ *	references requires expensive interprocessor synchronization --
+ *	xcalls to determine on which CPUs the object is still in use.
+ *
+ *	Passive references may be held only on a single CPU and by a
+ *	single LWP.  They require the caller to allocate a little stack
+ *	space, a struct psref object.  Sleeping while a passive
+ *	reference is held is allowed, provided that the owner's LWP is
+ *	bound to a CPU -- e.g., the owner is a softint or a bound
+ *	kthread.  However, sleeping should be kept to a short duration,
+ *	e.g. sleeping on an adaptive lock.
+ *
+ *	Passive references serve as an intermediate stage between
+ *	reference counting and passive serialization (pserialize(9)):
+ *
+ *	- If you need references to transfer from CPU to CPU or LWP to
+ *	  LWP, or if you need long-term references, you must use
+ *	  reference counting, e.g. with atomic operations or locks,
+ *	  which incurs interprocessor synchronization for every use --
+ *	  cheaper than an xcall, but not scalable.
+ *
+ *	- If all users *guarantee* that they will not sleep, then it is
+ *	  not necessary to use passive references: you may as well just
+ *	  use the even cheaper pserialize(9), because you have
+ *	  satisfied the requirements of a pserialize read section.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD$");
+
+#include <sys/types.h>
+#include <sys/condvar.h>
+#include <sys/cpu.h>
+#include <sys/intr.h>
+#include <sys/kmem.h>
+#include <sys/lwp.h>
+#include <sys/mutex.h>
+#include <sys/percpu.h>
+#include <sys/psref.h>
+#include <sys/queue.h>
+#include <sys/xcall.h>
+
+#ifdef PSREF_DEBUG
+#define	__psref_debugused
+#else
+#define	__psref_debugused	__unused
+#endif
+
+LIST_HEAD(psref_head, psref);
+
+/*
+ * struct psref_class
+ *
+ *	Private global state for a class of passive reference targets.
+ *	Opaque to callers.
+ */
+struct psref_class {
+	kmutex_t		prc_lock;
+	kcondvar_t		prc_cv;
+	struct percpu		*prc_percpu; /* struct psref_cpu */
+	ipl_cookie_t		prc_iplcookie;
+};
+
+/*
+ * struct psref_cpu
+ *
+ *	Private per-CPU state for a class of passive reference targets.
+ *	Not exposed by the API.
+ */
+struct psref_cpu {
+	struct psref_head	pcpu_head;
+};
+
+/*
+ * psref_class_create(name, ipl)
+ *
+ *	Create a new passive reference class, with the given wchan name
+ *	and ipl.
+ */
+struct psref_class *
+psref_class_create(const char *name, int ipl)
+{
+	struct psref_class *class;
+
+	ASSERT_SLEEPABLE();
+
+	class = kmem_alloc(sizeof(*class), KM_SLEEP);
+	if (class == NULL)
+		goto fail0;
+
+	class->prc_percpu = percpu_alloc(sizeof(struct psref_cpu));
+	if (class->prc_percpu == NULL)
+		goto fail1;
+
+	mutex_init(&class->prc_lock, MUTEX_DEFAULT, ipl);
+	cv_init(&class->prc_cv, name);
+	class->prc_iplcookie = makeiplcookie(ipl);
+
+	return class;
+
+fail1:	kmem_free(class, sizeof(*class));
+fail0:	return NULL;
+}
+
+#ifdef DIAGNOSTIC
+static void
+psref_cpu_drained_p(void *p, void *cookie, struct cpu_info *ci __unused)
+{
+	struct psref_cpu *pcpu = p;
+	bool *retp = cookie;
+
+	if (!LIST_EMPTY(&pcpu->pcpu_head))
+		*retp = false;
+}
+
+static bool
+psref_class_drained_p(struct psref_class *prc)
+{
+	bool ret = true;
+
+	percpu_foreach(prc->prc_percpu, &psref_cpu_drained_p, &ret);
+
+	return ret;
+}
+#endif	/* DIAGNOSTIC */
+
+/*
+ * psref_class_destroy(class)
+ *
+ *	Destroy a passive reference class and free memory associated
+ *	with it.  All targets in this class must have been drained and
+ *	destroyed already.
+ */
+void
+psref_class_destroy(struct psref_class *class)
+{
+
+	KASSERT(psref_class_drained_p(class));
+
+	cv_destroy(&class->prc_cv);
+	mutex_destroy(&class->prc_lock);
+	percpu_free(class->prc_percpu, sizeof(struct psref_cpu));
+	kmem_free(class, sizeof(*class));
+}
+
+/*
+ * psref_target_init(target, class)
+ *
+ *	Initialize a passive reference target in the specified class.
+ *	The caller is responsible for issuing a membar_producer after
+ *	psref_target_init and before exposing a pointer to the target
+ *	to other CPUs.
+ */
+void
+psref_target_init(struct psref_target *target,
+    struct psref_class *class __psref_debugused)
+{
+
+#ifdef PSREF_DEBUG
+	target->prt_class = class;
+#endif
+	target->prt_draining = false;
+}
+
+/*
+ * psref_acquire(psref, target, class)
+ *
+ *	Acquire a passive reference to the specified target, which must
+ *	be in the specified class.
+ *
+ *	The caller must guarantee that the target will not be destroyed
+ *	before psref_acquire returns.
+ *
+ *	The caller must additionally guarantee that it will not switch
+ *	CPUs before releasing the passive reference, either by
+ *	disabling kpreemption and avoiding sleeps, or by being in a
+ *	softint or in an LWP bound to a CPU.
+ */
+void
+psref_acquire(struct psref *psref, struct psref_target *target,
+    struct psref_class *class)
+{
+	struct psref_cpu *pcpu;
+	int s;
+
+	KASSERTMSG((kpreempt_disabled() || cpu_softintr_p() ||
+		ISSET(curlwp->l_pflag, LP_BOUND)),
+	    "passive references are CPU-local,"
+	    " but preemption is enabled and the caller is not"
+	    " in a softint or CPU-bound LWP");
+
+#ifdef PSREF_DEBUG
+	KASSERTMSG((target->prt_class == class),
+	    "mismatched psref target class: %p (ref) != %p (expected)",
+	    target->prt_class, class);
+#endif
+	KASSERTMSG(!target->prt_draining, "psref target already destroyed: %p",
+	    target);
+
+	/* Block interrupts and acquire the current CPU's reference list.  */
+	s = splraiseipl(class->prc_iplcookie);
+	pcpu = percpu_getref(class->prc_percpu);
+
+	/* Record our reference.  */
+	LIST_INSERT_HEAD(&pcpu->pcpu_head, psref, psref_entry);
+	psref->psref_target = target;
+#ifdef PSREF_DEBUG
+	psref->psref_lwp = curlwp;
+	psref->psref_cpu = curcpu();
+#endif
+
+	/* Release the CPU list and restore interrupts.  */
+	percpu_putref(class->prc_percpu);
+	splx(s);
+}
+
+/*
+ * psref_release(psref, target, class)
+ *
+ *	Release a passive reference to the specified target, which must
+ *	be in the specified class.
+ *
+ *	The caller must not have switched CPUs or LWPs since acquiring
+ *	the passive reference.
+ */
+void
+psref_release(struct psref *psref, struct psref_target *target,
+    struct psref_class *class)
+{
+	int s;
+
+	KASSERTMSG((kpreempt_disabled() || cpu_softintr_p() ||
+		ISSET(curlwp->l_pflag, LP_BOUND)),
+	    "passive references are CPU-local,"
+	    " but preemption is enabled and the caller is not"
+	    " in a softint or CPU-bound LWP");
+
+#ifdef PSREF_DEBUG
+	KASSERTMSG((target->prt_class == class),
+	    "mismatched psref target class: %p (ref) != %p (expected)",
+	    target->prt_class, class);
+#endif
+
+	/* Make sure the psref looks sensible.  */
+	KASSERTMSG((psref->psref_target == target),
+	    "passive reference target mismatch: %p (ref) != %p (expected)",
+	    psref->psref_target, target);
+#ifdef PSREF_DEBUG
+	KASSERTMSG((psref->psref_lwp == curlwp),
+	    "passive reference transferred from lwp %p to lwp %p",
+	    psref->psref_lwp, curlwp);
+	KASSERTMSG((psref->psref_cpu == curcpu()),
+	    "passive reference transferred from CPU %u to CPU %u",
+	    cpu_index(psref->psref_cpu), cpu_index(curcpu()));
+#endif
+
+	/*
+	 * Block interrupts and remove the psref from the current CPU's
+	 * list.  No need to percpu_getref or get the head of the list,
+	 * and the caller guarantees that we are bound to a CPU anyway
+	 * (as does blocking interrupts).
+	 */
+	s = splraiseipl(class->prc_iplcookie);
+	LIST_REMOVE(psref, psref_entry);
+	splx(s);
+
+	/* If someone is waiting for users to drain, notify 'em.  */
+	if (__predict_false(target->prt_draining))
+		cv_broadcast(&class->prc_cv);
+}
+
+/*
+ * psref_copy(pto, pfrom class)
+ *
+ *	Copy a passive reference from pfrom, which must be in the
+ *	specified class, to pto.  Both pfrom and pto must later be
+ *	released with psref_release.
+ *
+ *	The caller must not have switched CPUs or LWPs since acquiring
+ *	pfrom, and must not switch CPUs or LWPs before releasing both
+ *	pfrom and pto.
+ */
+void
+psref_copy(struct psref *pto, const struct psref *pfrom,
+    struct psref_class *class)
+{
+	struct psref_cpu *pcpu;
+	int s;
+
+	KASSERTMSG((kpreempt_disabled() || cpu_softintr_p() ||
+		ISSET(curlwp->l_pflag, LP_BOUND)),
+	    "passive references are CPU-local,"
+	    " but preemption is enabled and the caller is not"
+	    " in a softint or CPU-bound LWP");
+	KASSERTMSG((pto != pfrom),
+	    "can't copy passive reference to itself: %p",
+	    pto);
+
+#ifdef PSREF_DEBUG
+	/* Make sure the pfrom reference looks sensible.  */
+	KASSERTMSG((pfrom->psref_lwp == curlwp),
+	    "passive reference transferred from lwp %p to lwp %p",
+	    pfrom->psref_lwp, curlwp);
+	KASSERTMSG((pfrom->psref_cpu == curcpu()),
+	    "passive reference transferred from CPU %u to CPU %u",
+	    cpu_index(pfrom->psref_cpu), cpu_index(curcpu()));
+	KASSERTMSG((pfrom->psref_target->prt_class == class),
+	    "mismatched psref target class: %p (ref) != %p (expected)",
+	    pfrom->psref_target->prt_class, class);
+#endif
+
+	/* Block interrupts and acquire the current CPU's reference list.  */
+	s = splraiseipl(class->prc_iplcookie);
+	pcpu = percpu_getref(class->prc_percpu);
+
+	/* Record the new reference.  */
+	LIST_INSERT_HEAD(&pcpu->pcpu_head, pto, psref_entry);
+	pto->psref_target = pfrom->psref_target;
+#ifdef PSREF_DEBUG
+	pto->psref_lwp = curlwp;
+	pto->psref_cpu = curcpu();
+#endif
+
+	/* Release the CPU list and restore interrupts.  */
+	percpu_putref(class->prc_percpu);
+	splx(s);
+}
+
+/*
+ * struct psreffed
+ *
+ *	Global state for draining a psref target.
+ */
+struct psreffed {
+	struct psref_class	*class;
+	struct psref_target	*target;
+	bool			ret;
+};
+
+static void
+psreffed_p_xc(void *cookie0, void *cookie1 __unused)
+{
+	struct psreffed *P = cookie0;
+
+	/*
+	 * If we hold a psref to the target, then answer true.
+	 *
+	 * This is the only dynamic decision that may be made with
+	 * psref_held.
+	 *
+	 * No need to lock anything here: every write transitions from
+	 * false to true, so there can be no conflicting writes.  No
+	 * need for a memory barrier here because P->ret is read only
+	 * after xc_wait, which has already issued any necessary memory
+	 * barriers.
+	 */
+	if (psref_held(P->target, P->class))
+		P->ret = true;
+}
+
+static bool
+psreffed_p(struct psref_target *target, struct psref_class *class)
+{
+	struct psreffed P = {
+		.class = class,
+		.target = target,
+		.ret = false,
+	};
+
+	/* Ask all CPUs to say whether they hold a psref to the target.  */
+	xc_wait(xc_broadcast(0, &psreffed_p_xc, &P, NULL));
+
+	return P.ret;
+}
+
+/*
+ * psref_target_destroy(target, class)
+ *
+ *	Destroy a passive reference target.  Waits for all existing
+ *	references to drain.  Caller must guarantee no new references
+ *	will be acquired once it calls psref_target_destroy, e.g. by
+ *	removing the target from a global list first.  May sleep.
+ */
+void
+psref_target_destroy(struct psref_target *target, struct psref_class *class)
+{
+
+	ASSERT_SLEEPABLE();
+
+#ifdef PSREF_DEBUG
+	KASSERTMSG((target->prt_class == class),
+	    "mismatched psref target class: %p (ref) != %p (expected)",
+	    target->prt_class, class);
+#endif
+
+	/* Request psref_release to notify us when done.  */
+	KASSERTMSG(!target->prt_draining, "psref target already destroyed: %p",
+	    target);
+	target->prt_draining = true;
+
+	/* Wait until there are no more references on any CPU.  */
+	while (psreffed_p(target, class)) {
+		/*
+		 * This enter/wait/exit business looks wrong, but it is
+		 * both necessary, because psreffed_p performs a
+		 * low-priority xcall and hence cannot run while a
+		 * mutex is locked, and OK, because the wait is timed
+		 * -- explicit wakeups are only an optimization.
+		 */
+		mutex_enter(&class->prc_lock);
+		(void)cv_timedwait(&class->prc_cv, &class->prc_lock, 1);
+		mutex_exit(&class->prc_lock);
+	}
+
+#ifdef PSREF_DEBUG
+	/* No more references.  Cause subsequent psref_acquire to kassert.  */
+	target->prt_class = NULL;
+#endif
+}
+
+/*
+ * psref_held(target, class)
+ *
+ *	True if the current CPU holds a passive reference to target,
+ *	false otherwise.  May be used only inside assertions.
+ */
+bool
+psref_held(struct psref_target *target, struct psref_class *class)
+{
+	struct psref_cpu *pcpu;
+	struct psref *psref;
+	int s;
+	bool held = false;
+
+	KASSERTMSG((kpreempt_disabled() || cpu_softintr_p() ||
+		ISSET(curlwp->l_pflag, LP_BOUND)),
+	    "passive references are CPU-local,"
+	    " but preemption is enabled and the caller is not"
+	    " in a softint or CPU-bound LWP");
+
+#ifdef PSREF_DEBUG
+	KASSERTMSG((target->prt_class == class),
+	    "mismatched psref target class: %p (ref) != %p (expected)",
+	    target->prt_class, class);
+#endif
+
+	/* Block interrupts and acquire the current CPU's reference list.  */
+	s = splraiseipl(class->prc_iplcookie);
+	pcpu = percpu_getref(class->prc_percpu);
+
+	/* Search through all the references on this CPU.  */
+	LIST_FOREACH(psref, &pcpu->pcpu_head, psref_entry) {
+#ifdef PSREF_DEBUG
+		/* Sanity-check the reference.  */
+		KASSERTMSG((psref->psref_lwp == curlwp),
+		    "passive reference transferred from lwp %p to lwp %p",
+		    psref->psref_lwp, curlwp);
+		KASSERTMSG((psref->psref_cpu == curcpu()),
+		    "passive reference transferred from CPU %u to CPU %u",
+		    cpu_index(psref->psref_cpu), cpu_index(curcpu()));
+#endif
+
+		/* If it matches, stop here and answer yes.  */
+		if (psref->psref_target == target) {
+			held = true;
+			break;
+		}
+	}
+
+	/* Release the CPU list and restore interrupts.  */
+	percpu_putref(class->prc_percpu);
+	splx(s);
+
+	return held;
+}
diff --git a/sys/net/if.c b/sys/net/if.c
index 2633c72..32c19fc 100644
--- a/sys/net/if.c
+++ b/sys/net/if.c
@@ -94,6 +94,7 @@ __KERNEL_RCSID(0, "$NetBSD: if.c,v 1.325 2016/02/19 20:05:43 roy Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_inet.h"
+#include "opt_ipsec.h"
 
 #include "opt_atalk.h"
 #include "opt_natm.h"
@@ -138,6 +139,9 @@ __KERNEL_RCSID(0, "$NetBSD: if.c,v 1.325 2016/02/19 20:05:43 roy Exp $");
 #include <net/pfil.h>
 #include <netinet/in.h>
 #include <netinet/in_var.h>
+#ifndef IPSEC
+#include <netinet/ip_encap.h>
+#endif
 
 #ifdef INET6
 #include <netinet6/in6_var.h>
@@ -251,6 +255,10 @@ ifinit(void)
 		sysctl_net_pktq_setup(NULL, PF_INET6);
 #endif
 
+#ifndef IPSEC
+	encapinit();
+#endif
+
 	if_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK,
 	    if_listener_cb, NULL);
 
diff --git a/sys/net/if_gif.c b/sys/net/if_gif.c
index 83afafe..1ce2332 100644
--- a/sys/net/if_gif.c
+++ b/sys/net/if_gif.c
@@ -53,6 +53,7 @@ __KERNEL_RCSID(0, "$NetBSD: if_gif.c,v 1.106 2016/02/26 07:35:17 knakahara Exp $
 #include <sys/intr.h>
 #include <sys/kmem.h>
 #include <sys/sysctl.h>
+#include <sys/xcall.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
@@ -100,6 +101,7 @@ static int	gif_check_nesting(struct ifnet *, struct mbuf *);
 
 static int	gif_encap_attach(struct gif_softc *);
 static int	gif_encap_detach(struct gif_softc *);
+static void	gif_encap_pause(struct gif_softc *);
 
 static struct if_clone gif_cloner =
     IF_CLONE_INITIALIZER("gif", gif_clone_create, gif_clone_destroy);
@@ -217,7 +219,8 @@ gif_encapcheck(struct mbuf *m, int off, int proto, void *arg)
 	if (sc == NULL)
 		return 0;
 
-	if ((sc->gif_if.if_flags & IFF_UP) == 0)
+	if ((sc->gif_if.if_flags & (IFF_UP|IFF_RUNNING))
+	    != (IFF_UP|IFF_RUNNING))
 		return 0;
 
 	/* no physical address */
@@ -321,9 +324,8 @@ gif_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
 	}
 
 	m->m_flags &= ~(M_BCAST|M_MCAST);
-	if (!(ifp->if_flags & IFF_UP) ||
-	    sc->gif_psrc == NULL || sc->gif_pdst == NULL ||
-	    sc->gif_si == NULL) {
+	if (!(ifp->if_flags & IFF_UP) || /* check IFF_RUNNING later */
+	    sc->gif_psrc == NULL || sc->gif_pdst == NULL) {
 		m_freem(m);
 		error = ENETDOWN;
 		goto end;
@@ -344,6 +346,17 @@ gif_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
 	m->m_pkthdr.csum_data = 0;
 
 	s = splnet();
+	/*
+	 * This if_flags check, IFQ_ENQUEUE and softint_schedule() are required
+	 * to be done atomically in the local CPU, because this local CPU must
+	 * let gif_encap_pause() wait until softint_schedule() completion.
+	 */
+	if (!(ifp->if_flags & IFF_RUNNING)) {
+		splx(s);
+		m_freem(m);
+		error = ENETDOWN;
+		goto end;
+	}
 	IFQ_ENQUEUE(&ifp->if_snd, m, &pktattr, error);
 	if (error) {
 		splx(s);
@@ -376,15 +389,6 @@ gifintr(void *arg)
 	sc = arg;
 	ifp = &sc->gif_if;
 
-	/*
-	 * other CPUs does {set,delete}_tunnel after curcpu have done
-	 * softint_schedule().
-	 */
-	if (sc->gif_pdst == NULL || sc->gif_psrc == NULL) {
-		IFQ_PURGE(&ifp->if_snd);
-		return;
-	}
-
 	/* output processing */
 	while (1) {
 		s = splnet();
@@ -776,6 +780,46 @@ gif_encap_detach(struct gif_softc *sc)
 	return error;
 }
 
+static void
+gif_encap_pause(struct gif_softc *sc)
+{
+	struct ifnet *ifp;
+	uint64_t where;
+
+	if (sc == NULL || sc->gif_psrc == NULL)
+		return;
+
+	ifp = &sc->gif_if;
+	if ((ifp->if_flags & IFF_RUNNING) == 0)
+		return;
+
+	switch (sc->gif_psrc->sa_family) {
+#ifdef INET
+	case AF_INET:
+		(void)in_gif_pause(sc);
+		break;
+#endif
+#ifdef INET6
+	case AF_INET6:
+		(void)in6_gif_pause(sc);
+		break;
+#endif
+	}
+
+	ifp->if_flags &= ~IFF_RUNNING;
+	/* membar_sync() is done in xc_broadcast(). */
+
+	/*
+	 * Wait for softint_schedule() completion done by other CPUs which
+	 * already run over if_flags check in gif_output().
+	 * In addition, wait for softint_execute()(ipintr() or ip6intr())
+	 * completion done by other CPUs which already run over if_flags
+	 * check in in_gif_input() or in6_gif_input().
+	 */
+	where = xc_broadcast(0, (xcfunc_t)nullop, NULL, NULL);
+	xc_wait(where);
+}
+
 int
 gif_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst)
 {
@@ -783,11 +827,13 @@ gif_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst)
 	struct gif_softc *sc2;
 	struct sockaddr *osrc, *odst;
 	struct sockaddr *nsrc, *ndst;
-	void *osi;
 	int s;
 	int error;
 
 	s = splsoftnet();
+	error = encap_lock_enter();
+	if (error)
+		return error;
 
 	LIST_FOREACH(sc2, &gif_softc_list, gif_list) {
 		if (sc2 == sc)
@@ -798,6 +844,7 @@ gif_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst)
 		if (sockaddr_cmp(sc2->gif_pdst, dst) == 0 &&
 		    sockaddr_cmp(sc2->gif_psrc, src) == 0) {
 			/* continue to use the old configureation. */
+			encap_lock_exit();
 			splx(s);
 			return EADDRNOTAVAIL;
 		}
@@ -806,42 +853,29 @@ gif_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst)
 	}
 
 	if ((nsrc = sockaddr_dup(src, M_WAITOK)) == NULL) {
+		encap_lock_exit();
 		splx(s);
 		return ENOMEM;
 	}
 	if ((ndst = sockaddr_dup(dst, M_WAITOK)) == NULL) {
 		sockaddr_free(nsrc);
+		encap_lock_exit();
 		splx(s);
 		return ENOMEM;
 	}
 
+	gif_encap_pause(sc);
+	/*
+	 * At this point, gif_output() does not softint_schedule() any more.
+	 * Furthermore, all of gif_output() has completed. It promises not to
+	 * call softint_schedule() anymore, so we can call
+	 * softint_disestablish() now.
+	 */
+
 	/* Firstly, clear old configurations. */
 	if (sc->gif_si) {
-		osrc = sc->gif_psrc;
-		odst = sc->gif_pdst;
-		osi = sc->gif_si;
-		sc->gif_psrc = NULL;
-		sc->gif_pdst = NULL;
+		softint_disestablish(sc->gif_si);
 		sc->gif_si = NULL;
-		/*
-		 * At this point, gif_output() does not softint_schedule()
-		 * any more. However, there are below 2 fears of other CPUs
-		 * which would cause panic because of the race between
-		 * softint_execute() and softint_disestablish().
-		 *     (a) gif_output() has done softint_schedule(), and softint
-		 *         (gifintr()) is waiting for execution
-		 *         => This pattern is avoided by waiting SOFTINT_PENDING
-		 *            CPUs in softint_disestablish()
-		 *     (b) gifintr() is already running
-		 *         => This pattern is avoided by waiting SOFTINT_ACTIVE
-		 *            CPUs in softint_disestablish()
-		 */
-
-		softint_disestablish(osi);
-		sc->gif_psrc = osrc;
-		sc->gif_pdst = odst;
-		osrc = NULL;
-		odst = NULL;
 	}
 	/* XXX we can detach from both, but be polite just in case */
 	if (sc->gif_psrc)
@@ -900,6 +934,7 @@ gif_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst)
 	else
 		ifp->if_flags &= ~IFF_RUNNING;
 
+	encap_lock_exit();
 	splx(s);
 	return error;
 }
@@ -908,24 +943,18 @@ void
 gif_delete_tunnel(struct ifnet *ifp)
 {
 	struct gif_softc *sc = ifp->if_softc;
-	struct sockaddr *osrc, *odst;
-	void *osi;
 	int s;
+	int error;
 
 	s = splsoftnet();
+	error = encap_lock_enter();
+	if (error)
+		return;
 
+	gif_encap_pause(sc);
 	if (sc->gif_si) {
-		osrc = sc->gif_psrc;
-		odst = sc->gif_pdst;
-		osi = sc->gif_si;
-
-		sc->gif_psrc = NULL;
-		sc->gif_pdst = NULL;
+		softint_disestablish(sc->gif_si);
 		sc->gif_si = NULL;
-
-		softint_disestablish(osi);
-		sc->gif_psrc = osrc;
-		sc->gif_pdst = odst;
 	}
 	if (sc->gif_psrc) {
 		sockaddr_free(sc->gif_psrc);
@@ -947,5 +976,7 @@ gif_delete_tunnel(struct ifnet *ifp)
 		ifp->if_flags |= IFF_RUNNING;
 	else
 		ifp->if_flags &= ~IFF_RUNNING;
+
+	encap_lock_exit();
 	splx(s);
 }
diff --git a/sys/net/if_stf.c b/sys/net/if_stf.c
index 3ac7dd1..0139e76 100644
--- a/sys/net/if_stf.c
+++ b/sys/net/if_stf.c
@@ -190,18 +190,27 @@ static int
 stf_clone_create(struct if_clone *ifc, int unit)
 {
 	struct stf_softc *sc;
+	int error;
+
+	sc = malloc(sizeof(struct stf_softc), M_DEVBUF, M_WAIT|M_ZERO);
+	if_initname(&sc->sc_if, ifc->ifc_name, unit);
+
+	error = encap_lock_enter();
+	if (error) {
+		free(sc, M_DEVBUF);
+		return error;
+	}
 
 	if (LIST_FIRST(&stf_softc_list) != NULL) {
 		/* Only one stf interface is allowed. */
+		encap_lock_exit();
+		free(sc, M_DEVBUF);
 		return (EEXIST);
 	}
 
-	sc = malloc(sizeof(struct stf_softc), M_DEVBUF, M_WAIT|M_ZERO);
-
-	if_initname(&sc->sc_if, ifc->ifc_name, unit);
-
 	sc->encap_cookie = encap_attach_func(AF_INET, IPPROTO_IPV6,
 	    stf_encapcheck, &in_stf_encapsw, sc);
+	encap_lock_exit();
 	if (sc->encap_cookie == NULL) {
 		printf("%s: unable to attach encap\n", if_name(&sc->sc_if));
 		free(sc, M_DEVBUF);
@@ -226,8 +235,10 @@ stf_clone_destroy(struct ifnet *ifp)
 {
 	struct stf_softc *sc = (void *) ifp;
 
+	encap_lock_enter();
 	LIST_REMOVE(sc, sc_list);
 	encap_detach(sc->encap_cookie);
+	encap_lock_exit();
 	bpf_detach(ifp);
 	if_detach(ifp);
 	rtcache_free(&sc->sc_ro);
diff --git a/sys/netinet/in_gif.c b/sys/netinet/in_gif.c
index 35f6116..526979f 100644
--- a/sys/netinet/in_gif.c
+++ b/sys/netinet/in_gif.c
@@ -204,7 +204,8 @@ in_gif_input(struct mbuf *m, int off, int proto)
 
 	gifp = (struct ifnet *)encap_getarg(m);
 
-	if (gifp == NULL || (gifp->if_flags & IFF_UP) == 0) {
+	if (gifp == NULL || (gifp->if_flags & (IFF_UP|IFF_RUNNING))
+		!= (IFF_UP|IFF_RUNNING)) {
 		m_freem(m);
 		ip_statinc(IP_STAT_NOGIF);
 		return;
@@ -384,11 +385,21 @@ in_gif_detach(struct gif_softc *sc)
 {
 	int error;
 
+	error = in_gif_pause(sc);
+
+	rtcache_free(&sc->gif_ro);
+
+	return error;
+}
+
+int
+in_gif_pause(struct gif_softc *sc)
+{
+	int error;
+
 	error = encap_detach(sc->encap_cookie4);
 	if (error == 0)
 		sc->encap_cookie4 = NULL;
 
-	rtcache_free(&sc->gif_ro);
-
 	return error;
 }
diff --git a/sys/netinet/in_gif.h b/sys/netinet/in_gif.h
index 1107ee8..654b71c 100644
--- a/sys/netinet/in_gif.h
+++ b/sys/netinet/in_gif.h
@@ -45,5 +45,6 @@ int gif_encapcheck4(struct mbuf *, int, int, void *);
 #endif
 int in_gif_attach(struct gif_softc *);
 int in_gif_detach(struct gif_softc *);
+int in_gif_pause(struct gif_softc *);
 
 #endif /* !_NETINET_IN_GIF_H_ */
diff --git a/sys/netinet/ip_encap.c b/sys/netinet/ip_encap.c
index 128ee75..baa81a1 100644
--- a/sys/netinet/ip_encap.c
+++ b/sys/netinet/ip_encap.c
@@ -58,13 +58,18 @@
 /* XXX is M_NETADDR correct? */
 
 /*
- * The code will use radix table for tunnel lookup, for
+ * With USE_RADIX the code will use radix table for tunnel lookup, for
  * tunnels registered with encap_attach() with a addr/mask pair.
  * Faster on machines with thousands of tunnel registerations (= interfaces).
  *
  * The code assumes that radix table code can handle non-continuous netmask,
  * as it will pass radix table memory region with (src + dst) sockaddr pair.
  */
+/* XXX future work
+ * eliminate linear search of encap interfaces. It must fix the many encap
+ * interface scaling issue without reducing computation by radix tree.
+ */
+#undef USE_RADIX
 
 #include <sys/cdefs.h>
 __KERNEL_RCSID(0, "$NetBSD: ip_encap.c,v 1.52 2016/02/26 07:35:17 knakahara Exp $");
@@ -82,6 +87,9 @@ __KERNEL_RCSID(0, "$NetBSD: ip_encap.c,v 1.52 2016/02/26 07:35:17 knakahara Exp
 #include <sys/errno.h>
 #include <sys/queue.h>
 #include <sys/kmem.h>
+#include <sys/mutex.h>
+#include <sys/condvar.h>
+#include <sys/psref.h>
 
 #include <net/if.h>
 #include <net/route.h>
@@ -109,21 +117,65 @@ __KERNEL_RCSID(0, "$NetBSD: ip_encap.c,v 1.52 2016/02/26 07:35:17 knakahara Exp
 enum direction { INBOUND, OUTBOUND };
 
 #ifdef INET
-static struct encaptab *encap4_lookup(struct mbuf *, int, int, enum direction);
+static struct encaptab *encap4_lookup(struct mbuf *, int, int, enum direction,
+    struct psref *);
 #endif
 #ifdef INET6
-static struct encaptab *encap6_lookup(struct mbuf *, int, int, enum direction);
+static struct encaptab *encap6_lookup(struct mbuf *, int, int, enum direction,
+    struct psref *);
 #endif
 static int encap_add(struct encaptab *);
 static int encap_remove(struct encaptab *);
 static int encap_afcheck(int, const struct sockaddr *, const struct sockaddr *);
+#ifdef USE_RADIX
 static struct radix_node_head *encap_rnh(int);
 static int mask_matchlen(const struct sockaddr *);
+#else
+static int mask_match(const struct encaptab *, const struct sockaddr *,
+		const struct sockaddr *);
+#endif
 static void encap_fillarg(struct mbuf *, const struct encaptab *);
 
-LIST_HEAD(, encaptab) encaptab = LIST_HEAD_INITIALIZER(&encaptab);
-
+/*
+ * In encap[46]_lookup(), ep->func can sleep(e.g. rtalloc1) while walking
+ * encap_table. So, it cannot use pserialize_read_enter()
+ */
+static struct {
+	LIST_HEAD(, encaptab)	list;
+	pserialize_t		psz;
+	struct psref_class	*elem_class; /* for the element of et_list */
+} encaptab  __cacheline_aligned = {
+	.list = LIST_HEAD_INITIALIZER(&encap_table),
+};
+#define encap_table encaptab.list
+
+static struct {
+	kmutex_t	lock;
+	kcondvar_t	cv;
+	struct lwp	*busy;
+} encap_whole __cacheline_aligned;
+
+#ifdef USE_RADIX
 struct radix_node_head *encap_head[2];	/* 0 for AF_INET, 1 for AF_INET6 */
+static bool encap_head_updating = false;
+#endif
+
+/*
+ * must be done before other encap interfaces initialization.
+ */
+void
+encapinit(void)
+{
+
+	encaptab.psz = pserialize_create();
+	encaptab.elem_class = psref_class_create("encapelem", IPL_SOFTNET);
+	if (encaptab.elem_class == NULL)
+		panic("encaptab.elem_class cannot be allocated.\n");
+
+	mutex_init(&encap_whole.lock, MUTEX_DEFAULT, IPL_NONE);
+	cv_init(&encap_whole.cv, "ip_encap cv");
+	encap_whole.busy = NULL;
+}
 
 void
 encap_init(void)
@@ -141,9 +193,10 @@ encap_init(void)
 	 * initialization - using LIST_INIT() here can nuke encap_attach()
 	 * from drivers.
 	 */
-	LIST_INIT(&encaptab);
+	LIST_INIT(&encap_table);
 #endif
 
+#ifdef USE_RADIX
 	/*
 	 * initialize radix lookup table when the radix subsystem is inited.
 	 */
@@ -153,18 +206,23 @@ encap_init(void)
 	rn_delayedinit((void *)&encap_head[1],
 	    sizeof(struct sockaddr_pack) << 3);
 #endif
+#endif
 }
 
 #ifdef INET
 static struct encaptab *
-encap4_lookup(struct mbuf *m, int off, int proto, enum direction dir)
+encap4_lookup(struct mbuf *m, int off, int proto, enum direction dir,
+    struct psref *match_psref)
 {
 	struct ip *ip;
 	struct ip_pack4 pack;
 	struct encaptab *ep, *match;
 	int prio, matchprio;
+	int s;
+#ifdef USE_RADIX
 	struct radix_node_head *rnh = encap_rnh(AF_INET);
 	struct radix_node *rn;
+#endif
 
 	KASSERT(m->m_len >= sizeof(*ip));
 
@@ -185,22 +243,54 @@ encap4_lookup(struct mbuf *m, int off, int proto, enum direction dir)
 	match = NULL;
 	matchprio = 0;
 
+	s = pserialize_read_enter();
+#ifdef USE_RADIX
+	/* Check whether there's and update in progress. */
+	if (encap_head_updating) {
+		/*
+		 * Update in progress. Pretend there are no tunnels
+		 */
+		pserialize_read_exit(s);
+		return NULL;
+	}
 	rn = rnh->rnh_matchaddr((void *)&pack, rnh);
 	if (rn && (rn->rn_flags & RNF_ROOT) == 0) {
-		match = (struct encaptab *)rn;
+		struct encaptab *encapp = (struct encaptab *)rn;
+
+		psref_acquire(match_psref, &encapp->psref,
+		    encaptab.elem_class);
+		match = encapp;
 		matchprio = mask_matchlen(match->srcmask) +
-		    mask_matchlen(match->dstmask);
+			mask_matchlen(match->dstmask);
 	}
+#endif
+	LIST_FOREACH(ep, &encap_table, chain) {
+		struct psref elem_psref;
+
+		membar_datadep_consumer();
 
-	LIST_FOREACH(ep, &encaptab, chain) {
 		if (ep->af != AF_INET)
 			continue;
 		if (ep->proto >= 0 && ep->proto != proto)
 			continue;
-		if (ep->func)
+
+		psref_acquire(&elem_psref, &ep->psref,
+		    encaptab.elem_class);
+		if (ep->func) {
+			pserialize_read_exit(s);
+			/* XXXX ep->func is sleepable. */
 			prio = (*ep->func)(m, off, proto, ep->arg);
-		else
+			s = pserialize_read_enter();
+		} else {
+#ifdef USE_RADIX
+			psref_release(&elem_psref, &ep->psref,
+			    encaptab.elem_class);
 			continue;
+#else
+			prio = mask_match(ep, (struct sockaddr *)&pack.mine,
+			    (struct sockaddr *)&pack.yours);
+#endif
+		}
 
 		/*
 		 * We prioritize the matches by using bit length of the
@@ -223,13 +313,30 @@ encap4_lookup(struct mbuf *m, int off, int proto, enum direction dir)
 		 * For radix-based lookup, I guess source takes precedence.
 		 * See rn_{refines,lexobetter} for the correct answer.
 		 */
-		if (prio <= 0)
+		if (prio <= 0) {
+			psref_release(&elem_psref, &ep->psref,
+			    encaptab.elem_class);
 			continue;
+		}
 		if (prio > matchprio) {
+			/* release last matched ep */
+			if (match != NULL)
+				psref_release(match_psref, &match->psref,
+				    encaptab.elem_class);
+
+			psref_copy(match_psref, &elem_psref,
+			    encaptab.elem_class);
 			matchprio = prio;
 			match = ep;
 		}
+		KASSERTMSG((match == NULL) || psref_held(&match->psref,
+			encaptab.elem_class),
+		    "current match = %p, but not hold its psref", match);
+
+		psref_release(&elem_psref, &ep->psref,
+		    encaptab.elem_class);
 	}
+	pserialize_read_exit(s);
 
 	return match;
 }
@@ -241,22 +348,27 @@ encap4_input(struct mbuf *m, ...)
 	va_list ap;
 	const struct encapsw *esw;
 	struct encaptab *match;
+	struct psref match_psref;
 
 	va_start(ap, m);
 	off = va_arg(ap, int);
 	proto = va_arg(ap, int);
 	va_end(ap);
 
-	match = encap4_lookup(m, off, proto, INBOUND);
-
+	match = encap4_lookup(m, off, proto, INBOUND, &match_psref);
 	if (match) {
 		/* found a match, "match" has the best one */
 		esw = match->esw;
 		if (esw && esw->encapsw4.pr_input) {
 			encap_fillarg(m, match);
 			(*esw->encapsw4.pr_input)(m, off, proto);
-		} else
+			psref_release(&match_psref, &match->psref,
+			    encaptab.elem_class);
+		} else {
+			psref_release(&match_psref, &match->psref,
+			    encaptab.elem_class);
 			m_freem(m);
+		}
 		return;
 	}
 
@@ -267,14 +379,18 @@ encap4_input(struct mbuf *m, ...)
 
 #ifdef INET6
 static struct encaptab *
-encap6_lookup(struct mbuf *m, int off, int proto, enum direction dir)
+encap6_lookup(struct mbuf *m, int off, int proto, enum direction dir,
+    struct psref *match_psref)
 {
 	struct ip6_hdr *ip6;
 	struct ip_pack6 pack;
 	int prio, matchprio;
+	int s;
 	struct encaptab *ep, *match;
+#ifdef USE_RADIX
 	struct radix_node_head *rnh = encap_rnh(AF_INET6);
 	struct radix_node *rn;
+#endif
 
 	KASSERT(m->m_len >= sizeof(*ip6));
 
@@ -295,31 +411,82 @@ encap6_lookup(struct mbuf *m, int off, int proto, enum direction dir)
 	match = NULL;
 	matchprio = 0;
 
+	s = pserialize_read_enter();
+#ifdef USE_RADIX
+	/* Check whether there's and update in progress. */
+	if (encap_head_updating) {
+		/*
+		 * Update in progress. Pretend there are no tunnels
+		 */
+		pserialize_read_exit(s);
+		return NULL;
+	}
+
 	rn = rnh->rnh_matchaddr((void *)&pack, rnh);
 	if (rn && (rn->rn_flags & RNF_ROOT) == 0) {
-		match = (struct encaptab *)rn;
+		struct encaptab *encapp = (struct encaptab *)rn;
+
+		psref_acquire(match_psref, &encapp->psref,
+		    encaptab.elem_class);
+		match = encapp;
 		matchprio = mask_matchlen(match->srcmask) +
-		    mask_matchlen(match->dstmask);
+			mask_matchlen(match->dstmask);
 	}
+#endif
+	LIST_FOREACH(ep, &encap_table, chain) {
+		struct psref elem_psref;
+
+		membar_datadep_consumer();
 
-	LIST_FOREACH(ep, &encaptab, chain) {
 		if (ep->af != AF_INET6)
 			continue;
 		if (ep->proto >= 0 && ep->proto != proto)
 			continue;
-		if (ep->func)
+
+		psref_acquire(&elem_psref, &ep->psref,
+		    encaptab.elem_class);
+
+		if (ep->func) {
+			pserialize_read_exit(s);
+			/* XXXX ep->func is sleepable. */
 			prio = (*ep->func)(m, off, proto, ep->arg);
-		else
+			s = pserialize_read_enter();
+		} else {
+#ifdef USE_RADIX
+			psref_release(&elem_psref, &ep->psref,
+			    encaptab.elem_class);
 			continue;
+#else
+			prio = mask_match(ep, (struct sockaddr *)&pack.mine,
+			    (struct sockaddr *)&pack.yours);
+#endif
+		}
 
 		/* see encap4_lookup() for issues here */
-		if (prio <= 0)
+		if (prio <= 0) {
+			psref_release(&elem_psref, &ep->psref,
+			    encaptab.elem_class);
 			continue;
+		}
 		if (prio > matchprio) {
+			/* release last matched ep */
+			if (match != NULL)
+				psref_release(match_psref, &match->psref,
+				    encaptab.elem_class);
+
+			psref_copy(match_psref, &elem_psref,
+			    encaptab.elem_class);
 			matchprio = prio;
 			match = ep;
 		}
+		KASSERTMSG((match == NULL) || psref_held(&match->psref,
+			encaptab.elem_class),
+		    "current match = %p, but not hold its psref", match);
+
+		psref_release(&elem_psref, &ep->psref,
+		    encaptab.elem_class);
 	}
+	pserialize_read_exit(s);
 
 	return match;
 }
@@ -330,16 +497,23 @@ encap6_input(struct mbuf **mp, int *offp, int proto)
 	struct mbuf *m = *mp;
 	const struct encapsw *esw;
 	struct encaptab *match;
+	struct psref match_psref;
 
-	match = encap6_lookup(m, *offp, proto, INBOUND);
+	match = encap6_lookup(m, *offp, proto, INBOUND, &match_psref);
 
 	if (match) {
 		/* found a match */
 		esw = match->esw;
 		if (esw && esw->encapsw6.pr_input) {
+			int ret;
 			encap_fillarg(m, match);
-			return (*esw->encapsw6.pr_input)(mp, offp, proto);
+			ret = (*esw->encapsw6.pr_input)(mp, offp, proto);
+			psref_release(&match_psref, &match->psref,
+			    encaptab.elem_class);
+			return ret;
 		} else {
+			psref_release(&match_psref, &match->psref,
+			    encaptab.elem_class);
 			m_freem(m);
 			return IPPROTO_DONE;
 		}
@@ -350,39 +524,111 @@ encap6_input(struct mbuf **mp, int *offp, int proto)
 }
 #endif
 
+/*
+ * XXX
+ * The encaptab list and the rnh radix tree must be manipulated atomically.
+ */
 static int
 encap_add(struct encaptab *ep)
 {
+#ifdef USE_RADIX
 	struct radix_node_head *rnh = encap_rnh(ep->af);
-	int error = 0;
+#endif
+
+	KASSERT(encap_lock_held());
 
-	LIST_INSERT_HEAD(&encaptab, ep, chain);
+#ifdef USE_RADIX
 	if (!ep->func && rnh) {
+		/* Disable access to the radix tree for reader. */
+		encap_head_updating = true;
+		/* Wait for all readers to drain. */
+		pserialize_perform(encaptab.psz);
+
 		if (!rnh->rnh_addaddr((void *)ep->addrpack,
 		    (void *)ep->maskpack, rnh, ep->nodes)) {
-			error = EEXIST;
-			goto fail;
+			encap_head_updating = false;
+			return EEXIST;
 		}
+
+		/*
+		 * The ep added to the radix tree must be skipped while
+		 * encap[46]_lookup walks encaptab list. In other words,
+		 * encap_add() does not need to care whether the ep has
+		 * been added encaptab list or not yet.
+		 * So, we can re-enable access to the radix tree for now.
+		 */
+		encap_head_updating = false;
 	}
-	return error;
+#endif
 
- fail:
-	LIST_REMOVE(ep, chain);
-	return error;
+/*
+ * XXX
+ * need memory barrier to use queue(3) with pserialize(9).
+ * see https://mail-index.netbsd.org/tech-kern/2014/11/21/msg018055.html
+ */
+#define LIST_INSERT_HEAD_PSZ(_head, _elm, _field) do {			\
+		(_elm)->_field.le_next = (_head)->lh_first;		\
+		(_elm)->_field.le_prev = &(_head)->lh_first;		\
+		membar_producer();					\
+		if ((_elm)->_field.le_next != LIST_END(_head))		\
+			(_head)->lh_first->_field.le_prev = &(_elm)->_field.le_next; \
+		(_head)->lh_first = (_elm);				\
+	} while (/*CONSTCOND*/0)
+
+	LIST_INSERT_HEAD_PSZ(&encap_table, ep, chain);
+
+#undef LIST_INSERT_HEAD_PSZ
+
+	return 0;
 }
 
+/*
+ * XXX
+ * The encaptab list and the rnh radix tree must be manipulated atomically.
+ */
 static int
 encap_remove(struct encaptab *ep)
 {
+#ifdef USE_RADIX
 	struct radix_node_head *rnh = encap_rnh(ep->af);
+#endif
 	int error = 0;
 
-	LIST_REMOVE(ep, chain);
+	KASSERT(encap_lock_held());
+
+#ifdef USE_RADIX
 	if (!ep->func && rnh) {
+		/* Disable access to the radix tree for reader. */
+		encap_head_updating = true;
+		/* Wait for all readers to drain. */
+		pserialize_perform(encaptab.psz);
+
 		if (!rnh->rnh_deladdr((void *)ep->addrpack,
 		    (void *)ep->maskpack, rnh))
 			error = ESRCH;
+
+		/*
+		 * The ep added to the radix tree must be skipped while
+		 * encap[46]_lookup walks encaptab list. In other words,
+		 * encap_add() does not need to care whether the ep has
+		 * been added encaptab list or not yet.
+		 * So, we can re-enable access to the radix tree for now.
+		 */
+		encap_head_updating = false;
 	}
+#endif
+
+/*
+ * XXX
+ * need memory barrier to use queue(3) with pserialize(9).
+ * see https://mail-index.netbsd.org/tech-kern/2014/11/21/msg018055.html
+ */
+#define LIST_REMOVE_PSZ LIST_REMOVE
+
+	LIST_REMOVE(ep, chain);
+
+#undef LIST_REMOVE_PSZ
+
 	return error;
 }
 
@@ -434,7 +680,7 @@ encap_attach(int af, int proto,
 {
 	struct encaptab *ep;
 	int error;
-	int s;
+	int s, pss;
 	size_t l;
 	struct ip_pack4 *pack4;
 #ifdef INET6
@@ -448,7 +694,10 @@ encap_attach(int af, int proto,
 		goto fail;
 
 	/* check if anyone have already attached with exactly same config */
-	LIST_FOREACH(ep, &encaptab, chain) {
+	pss = pserialize_read_enter();
+	LIST_FOREACH(ep, &encap_table, chain) {
+		membar_datadep_consumer();
+
 		if (ep->af != af)
 			continue;
 		if (ep->proto != proto)
@@ -471,8 +720,10 @@ encap_attach(int af, int proto,
 			continue;
 
 		error = EEXIST;
+		pserialize_read_exit(pss);
 		goto fail;
 	}
+	pserialize_read_exit(pss);
 
 	switch (af) {
 	case AF_INET:
@@ -535,6 +786,7 @@ encap_attach(int af, int proto,
 	memcpy(ep->dstmask, dm, dp->sa_len);
 	ep->esw = esw;
 	ep->arg = arg;
+	psref_target_init(&ep->psref, encaptab.elem_class);
 
 	error = encap_add(ep);
 	if (error)
@@ -614,6 +866,7 @@ encap6_ctlinput(int cmd, const struct sockaddr *sa, void *d0)
 	int off;
 	struct ip6ctlparam *ip6cp = NULL;
 	int nxt;
+	int s;
 	struct encaptab *ep;
 	const struct encapsw *esw;
 
@@ -641,13 +894,17 @@ encap6_ctlinput(int cmd, const struct sockaddr *sa, void *d0)
 		if (ip6 && cmd == PRC_MSGSIZE) {
 			int valid = 0;
 			struct encaptab *match;
+			struct psref elem_psref;
 
 			/*
 		 	* Check to see if we have a valid encap configuration.
 		 	*/
-			match = encap6_lookup(m, off, nxt, OUTBOUND);
+			match = encap6_lookup(m, off, nxt, OUTBOUND,
+			    &elem_psref);
 			if (match)
 				valid++;
+			psref_release(&elem_psref, &match->psref,
+			    encaptab.elem_class);
 
 			/*
 		 	* Depending on the value of "valid" and routing table
@@ -665,7 +922,13 @@ encap6_ctlinput(int cmd, const struct sockaddr *sa, void *d0)
 	}
 
 	/* inform all listeners */
-	LIST_FOREACH(ep, &encaptab, chain) {
+
+	s = pserialize_read_enter();
+	LIST_FOREACH(ep, &encap_table, chain) {
+		struct psref elem_psref;
+
+		membar_datadep_consumer();
+
 		if (ep->af != AF_INET6)
 			continue;
 		if (ep->proto >= 0 && ep->proto != nxt)
@@ -674,11 +937,16 @@ encap6_ctlinput(int cmd, const struct sockaddr *sa, void *d0)
 		/* should optimize by looking at address pairs */
 
 		/* XXX need to pass ep->arg or ep itself to listeners */
+		psref_acquire(&elem_psref, &ep->psref,
+		    encaptab.elem_class);
 		esw = ep->esw;
 		if (esw && esw->encapsw6.pr_ctlinput) {
 			(*esw->encapsw6.pr_ctlinput)(cmd, sa, d, ep->arg);
 		}
+		psref_release(&elem_psref, &ep->psref,
+		    encaptab.elem_class);
 	}
+	pserialize_read_exit(s);
 
 	rip6_ctlinput(cmd, sa, d0);
 	return NULL;
@@ -692,11 +960,20 @@ encap_detach(const struct encaptab *cookie)
 	struct encaptab *p, *np;
 	int error;
 
-	LIST_FOREACH_SAFE(p, &encaptab, chain, np) {
+	KASSERT(encap_lock_held());
+
+	LIST_FOREACH_SAFE(p, &encap_table, chain, np) {
+		membar_datadep_consumer();
+
 		if (p == ep) {
 			error = encap_remove(p);
 			if (error)
 				return error;
+
+			pserialize_perform(encaptab.psz);
+
+			psref_target_destroy(&p->psref,
+			    encaptab.elem_class);
 			if (!ep->func) {
 				kmem_free(p->addrpack, ep->addrpack->sa_len);
 				kmem_free(p->maskpack, ep->maskpack->sa_len);
@@ -709,6 +986,7 @@ encap_detach(const struct encaptab *cookie)
 	return ENOENT;
 }
 
+#ifdef USE_RADIX
 static struct radix_node_head *
 encap_rnh(int af)
 {
@@ -742,6 +1020,63 @@ mask_matchlen(const struct sockaddr *sa)
 	}
 	return l;
 }
+#endif
+
+#ifndef USE_RADIX
+static int
+mask_match(const struct encaptab *ep,
+	   const struct sockaddr *sp,
+	   const struct sockaddr *dp)
+{
+	struct sockaddr_storage s;
+	struct sockaddr_storage d;
+	int i;
+	const u_int8_t *p, *q;
+	u_int8_t *r;
+	int matchlen;
+
+	KASSERTMSG(ep->func == NULL, "wrong encaptab passed to mask_match");
+
+	if (sp->sa_len > sizeof(s) || dp->sa_len > sizeof(d))
+		return 0;
+	if (sp->sa_family != ep->af || dp->sa_family != ep->af)
+		return 0;
+	if (sp->sa_len != ep->src->sa_len || dp->sa_len != ep->dst->sa_len)
+		return 0;
+
+	matchlen = 0;
+
+	p = (const u_int8_t *)sp;
+	q = (const u_int8_t *)ep->srcmask;
+	r = (u_int8_t *)&s;
+	for (i = 0 ; i < sp->sa_len; i++) {
+		r[i] = p[i] & q[i];
+		/* XXX estimate */
+		matchlen += (q[i] ? 8 : 0);
+	}
+
+	p = (const u_int8_t *)dp;
+	q = (const u_int8_t *)ep->dstmask;
+	r = (u_int8_t *)&d;
+	for (i = 0 ; i < dp->sa_len; i++) {
+		r[i] = p[i] & q[i];
+		/* XXX rough estimate */
+		matchlen += (q[i] ? 8 : 0);
+	}
+
+	/* need to overwrite len/family portion as we don't compare them */
+	s.ss_len = sp->sa_len;
+	s.ss_family = sp->sa_family;
+	d.ss_len = dp->sa_len;
+	d.ss_family = dp->sa_family;
+
+	if (memcmp(&s, ep->src, ep->src->sa_len) == 0 &&
+	    memcmp(&d, ep->dst, ep->dst->sa_len) == 0) {
+		return matchlen;
+	} else
+		return 0;
+}
+#endif
 
 static void
 encap_fillarg(struct mbuf *m, const struct encaptab *ep)
@@ -769,3 +1104,41 @@ encap_getarg(struct mbuf *m)
 	}
 	return p;
 }
+
+int
+encap_lock_enter(void)
+{
+	int error;
+
+	mutex_enter(&encap_whole.lock);
+	while (encap_whole.busy != NULL) {
+		error = cv_wait_sig(&encap_whole.cv, &encap_whole.lock);
+		if (error) {
+			mutex_exit(&encap_whole.lock);
+			return error;
+		}
+	}
+	KASSERT(encap_whole.busy == NULL);
+	encap_whole.busy = curlwp;
+	mutex_exit(&encap_whole.lock);
+
+	return 0;
+}
+
+void
+encap_lock_exit(void)
+{
+
+	mutex_enter(&encap_whole.lock);
+	KASSERT(encap_whole.busy == curlwp);
+	encap_whole.busy = NULL;
+	cv_broadcast(&encap_whole.cv);
+	mutex_exit(&encap_whole.lock);
+}
+
+bool
+encap_lock_held(void)
+{
+
+	return (encap_whole.busy == curlwp);
+}
diff --git a/sys/netinet/ip_encap.h b/sys/netinet/ip_encap.h
index 13b14b3..1013e17 100644
--- a/sys/netinet/ip_encap.h
+++ b/sys/netinet/ip_encap.h
@@ -39,6 +39,8 @@
 #include <net/radix.h>
 #endif
 
+#include <sys/psref.h>
+
 struct encapsw {
 	union {
 		struct encapsw4 {
@@ -73,6 +75,7 @@ struct encaptab {
 	int (*func) (struct mbuf *, int, int, void *);
 	const struct encapsw *esw;
 	void *arg;			/* passed via PACKET_TAG_ENCAP */
+	struct psref_target	psref;
 };
 
 /* to lookup a pair of address using radix tree */
@@ -93,6 +96,8 @@ struct ip_pack6 {
 	struct sockaddr_in6 yours;
 };
 
+void	encapinit(void);
+
 void	encap_init(void);
 void	encap4_input(struct mbuf *, ...);
 int	encap6_input(struct mbuf **, int *, int);
@@ -106,6 +111,10 @@ void	*encap6_ctlinput(int, const struct sockaddr *, void *);
 int	encap_detach(const struct encaptab *);
 void	*encap_getarg(struct mbuf *);
 
+void	encap_lock_enter(void);
+void	encap_lock_exit(void);
+bool	encap_lock_held(void);
+
 #define	ENCAP_PR_WRAP_CTLINPUT(name)				\
 static void *							\
 name##_wrapper(int a, const struct sockaddr *b, void *c, void *d) \
@@ -117,5 +126,4 @@ name##_wrapper(int a, const struct sockaddr *b, void *c, void *d) \
 	return rv;						\
 }
 #endif
-
 #endif /* !_NETINET_IP_ENCAP_H_ */
diff --git a/sys/netinet/ip_mroute.c b/sys/netinet/ip_mroute.c
index 7108db5..ab533ff 100644
--- a/sys/netinet/ip_mroute.c
+++ b/sys/netinet/ip_mroute.c
@@ -833,8 +833,12 @@ add_vif(struct vifctl *vifcp)
 		 * this requires both radix tree lookup and then a
 		 * function to check, and this is not supported yet.
 		 */
+		error = encap_lock_enter();
+		if (error)
+			return error;
 		vifp->v_encap_cookie = encap_attach_func(AF_INET, IPPROTO_IPV4,
 		    vif_encapcheck, &vif_encapsw, vifp);
+		encap_lock_exit();
 		if (!vifp->v_encap_cookie)
 			return (EINVAL);
 
@@ -930,7 +934,9 @@ reset_vif(struct vif *vifp)
 	callout_stop(&vifp->v_repq_ch);
 
 	/* detach this vif from decapsulator dispatch table */
+	encap_lock_enter();
 	encap_detach(vifp->v_encap_cookie);
+	encap_lock_exit();
 	vifp->v_encap_cookie = NULL;
 
 	/*
diff --git a/sys/netinet6/in6_gif.c b/sys/netinet6/in6_gif.c
index ecf6d02..290c8c1 100644
--- a/sys/netinet6/in6_gif.c
+++ b/sys/netinet6/in6_gif.c
@@ -215,7 +215,8 @@ in6_gif_input(struct mbuf **mp, int *offp, int proto)
 
 	gifp = (struct ifnet *)encap_getarg(m);
 
-	if (gifp == NULL || (gifp->if_flags & IFF_UP) == 0) {
+	if (gifp == NULL || (gifp->if_flags & (IFF_UP|IFF_RUNNING))
+		!= (IFF_UP|IFF_RUNNING)) {
 		m_freem(m);
 		IP6_STATINC(IP6_STAT_NOGIF);
 		return IPPROTO_DONE;
@@ -386,12 +387,22 @@ in6_gif_detach(struct gif_softc *sc)
 {
 	int error;
 
+	error = in6_gif_pause(sc);
+
+	rtcache_free(&sc->gif_ro);
+
+	return error;
+}
+
+int
+in6_gif_pause(struct gif_softc *sc)
+{
+	int error;
+
 	error = encap_detach(sc->encap_cookie6);
 	if (error == 0)
 		sc->encap_cookie6 = NULL;
 
-	rtcache_free(&sc->gif_ro);
-
 	return error;
 }
 
diff --git a/sys/netinet6/in6_gif.h b/sys/netinet6/in6_gif.h
index e59985c..081a2fb 100644
--- a/sys/netinet6/in6_gif.h
+++ b/sys/netinet6/in6_gif.h
@@ -45,6 +45,7 @@ int gif_encapcheck6(struct mbuf *, int, int, void *);
 #endif
 int in6_gif_attach(struct gif_softc *);
 int in6_gif_detach(struct gif_softc *);
+int in6_gif_pause(struct gif_softc *);
 void *in6_gif_ctlinput(int, const struct sockaddr *, void *, void *);
 
 #endif /* !_NETINET6_IN6_GIF_H_ */
diff --git a/sys/netipsec/xform_ipip.c b/sys/netipsec/xform_ipip.c
index 21b9fd7..936b35b 100644
--- a/sys/netipsec/xform_ipip.c
+++ b/sys/netipsec/xform_ipip.c
@@ -721,6 +721,11 @@ ipe4_attach(void)
 	xform_register(&ipe4_xformsw);
 	/* attach to encapsulation framework */
 	/* XXX save return cookie for detach on module remove */
+
+	encapinit();
+	/* This function is called before ifinit(). Who else gets lock? */
+	(void)encap_lock_enter();
+	/* ipe4_encapsw and ipe4_encapsw must be added atomically */
 #ifdef INET
 	(void) encap_attach_func(AF_INET, -1,
 		ipe4_encapcheck, &ipe4_encapsw, NULL);
@@ -729,6 +734,7 @@ ipe4_attach(void)
 	(void) encap_attach_func(AF_INET6, -1,
 		ipe4_encapcheck, &ipe4_encapsw6, NULL);
 #endif
+	encap_lock_exit();
 }
 
 #ifdef SYSINIT
diff --git a/sys/sys/psref.h b/sys/sys/psref.h
new file mode 100644
index 0000000..73652e0
--- /dev/null
+++ b/sys/sys/psref.h
@@ -0,0 +1,111 @@
+/*	$NetBSD$	*/
+
+/*-
+ * Copyright (c) 2016 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Taylor R. Campbell.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef	_SYS_PSREF_H
+#define	_SYS_PSREF_H
+
+#include <sys/types.h>
+#include <sys/queue.h>
+
+/*
+ * PSREF_DEBUG
+ *
+ *	If nonzero, enable debugging of psrefs.  WARNING: This changes
+ *	the ABI by adding extra fields to struct psref_target and
+ *	struct psref, which are exposed to callers and embedded in
+ *	other structures.
+ */
+#ifdef _KERNEL_OPT
+#include "opt_psref.h"
+#endif
+
+struct cpu_info;
+struct lwp;
+
+struct psref;
+struct psref_class;
+struct psref_target;
+
+/*
+ * struct psref_target
+ *
+ *	Bookkeeping for an object to which users can acquire passive
+ *	references.  This is compact so that it can easily be embedded
+ *	into many multitudes of objects, e.g. IP packet flows.
+ *
+ *	prt_draining is false on initialization, and may be written
+ *	only once, to make it true, when someone has prevented new
+ *	references from being created and wants to drain the target in
+ *	order to destroy it.
+ */
+struct psref_target {
+#ifdef PSREF_DEBUG
+	struct psref_class	*prt_class;
+#endif
+	bool			prt_draining;
+};
+
+/*
+ * struct psref
+ *
+ *	Bookkeeping for a single passive reference.  There should only
+ *	be a few of these per CPU in the system at once, no matter how
+ *	many targets are stored, so these are a bit larger than struct
+ *	psref_target.  The contents of struct psref may be read and
+ *	written only on the local CPU.
+ */
+struct psref {
+	LIST_ENTRY(psref)	psref_entry;
+	struct psref_target	*psref_target;
+#ifdef PSREF_DEBUG
+	struct lwp		*psref_lwp;
+	struct cpu_info		*psref_cpu;
+#endif
+};
+
+struct psref_class *
+	psref_class_create(const char *, int);
+void	psref_class_destroy(struct psref_class *);
+
+void	psref_target_init(struct psref_target *, struct psref_class *);
+void	psref_target_destroy(struct psref_target *, struct psref_class *);
+
+void	psref_acquire(struct psref *, struct psref_target *,
+	    struct psref_class *);
+void	psref_release(struct psref *, struct psref_target *,
+	    struct psref_class *);
+void	psref_copy(struct psref *, const struct psref *,
+	    struct psref_class *);
+
+/* For use only in assertions.  */
+bool	psref_held(struct psref_target *, struct psref_class *);
+
+#endif	/* _SYS_PSREF_H */