From fd1ce1b6fcf5936a6f995941e0cd49adf8221360 Mon Sep 17 00:00:00 2001
From: Taylor R Campbell <riastradh@NetBSD.org>
Date: Mon, 18 Apr 2022 18:51:32 +0000
Subject: [PATCH 1/4] mmap(2): If we fail with a hint, try again without it.

`Hint' here means nonzero addr, but no MAP_FIXED or MAP_TRYFIXED.

This is suboptimal -- we could teach uvm_mmap to do a fancier search
using the address as a hint.  But this should do for now.

Candidate fix for PR kern/55533.
---
 sys/uvm/uvm_mmap.c | 37 +++++++++++++++++++++++++++++++++----
 1 file changed, 33 insertions(+), 4 deletions(-)

diff --git a/sys/uvm/uvm_mmap.c b/sys/uvm/uvm_mmap.c
index 3e4a66ce9f41..a0f202668fef 100644
--- a/sys/uvm/uvm_mmap.c
+++ b/sys/uvm/uvm_mmap.c
@@ -277,7 +277,8 @@ sys_mmap(struct lwp *l, const struct sys_mmap_args *uap, register_t *retval)
 	vsize_t size, pageoff, newsize;
 	vm_prot_t prot, maxprot, extraprot;
 	int flags, fd, advice;
-	vaddr_t defaddr;
+	vaddr_t defaddr = 0;	/* XXXGCC */
+	bool addrhint = false;
 	struct file *fp = NULL;
 	struct uvm_object *uobj;
 	int error;
@@ -349,6 +350,12 @@ sys_mmap(struct lwp *l, const struct sys_mmap_args *uap, register_t *retval)
 			addr = MAX(addr, defaddr);
 		else
 			addr = MIN(addr, defaddr);
+
+		/*
+		 * If addr is nonzero and not the default, then the
+		 * address is a hint.
+		 */
+		addrhint = (addr != 0 && addr != defaddr);
 	}
 
 	/*
@@ -399,11 +406,30 @@ sys_mmap(struct lwp *l, const struct sys_mmap_args *uap, register_t *retval)
 	pax_aslr_mmap(l, &addr, orig_addr, flags);
 
 	/*
-	 * now let kernel internal function uvm_mmap do the work.
+	 * Now let kernel internal function uvm_mmap do the work.
+	 *
+	 * If the user provided a hint, take a reference to uobj in
+	 * case the first attempt to satisfy the hint fails, so we can
+	 * try again with the default address.
 	 */
-
+	if (addrhint) {
+		if (uobj)
+			(*uobj->pgops->pgo_reference)(uobj);
+	}
 	error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
 	    flags, advice, uobj, pos, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
+	if (addrhint) {
+		if (error) {
+			addr = defaddr;
+			pax_aslr_mmap(l, &addr, orig_addr, flags);
+			error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size,
+			    prot, maxprot, flags, advice, uobj, pos,
+			    p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
+		} else if (uobj) {
+			/* Release the exta reference we took.  */
+			(*uobj->pgops->pgo_detach)(uobj);
+		}
+	}
 
 	/* remember to add offset */
 	*retval = (register_t)(addr + pageoff);
@@ -818,9 +844,12 @@ sys_munlockall(struct lwp *l, const void *v, register_t *retval)
  * - used by sys_mmap and various framebuffers
  * - uobj is a struct uvm_object pointer or NULL for MAP_ANON
  * - caller must page-align the file offset
+ *
+ * XXX This appears to leak the uobj in various error branches?  Need
+ * to clean up the contract around uobj reference.
  */
 
-int
+static int
 uvm_mmap(struct vm_map *map, vaddr_t *addr, vsize_t size, vm_prot_t prot,
     vm_prot_t maxprot, int flags, int advice, struct uvm_object *uobj,
     voff_t foff, vsize_t locklimit)

From 9f41717cc97f092b5e5844fd018ef9e18fb046fd Mon Sep 17 00:00:00 2001
From: Taylor R Campbell <riastradh@NetBSD.org>
Date: Wed, 1 Jun 2022 19:09:22 +0000
Subject: [PATCH 2/4] uvm(9): Fix 19-year-old bug in assertion about mmap hint.

Previously this would _first_ remember the original hint, and _then_
clamp the hint to the VM map's range:

	orig_hint = hint;
	if (hint < vm_map_min(map)) {	/* check ranges ... */
		if (flags & UVM_FLAG_FIXED) {
			UVMHIST_LOG(maphist,"<- VA below map range",0,0,0,0);
			return (NULL);
		}
		hint = vm_map_min(map);
	...
	KASSERTMSG(!topdown || hint <= orig_hint, "hint: %#jx, orig_hint: %#jx",
	    (uintmax_t)hint, (uintmax_t)orig_hint);

Even if nothing else happens in the ellipsis, taking the branch
guarantees the assertion will fail in the topdown case.
---
 sys/uvm/uvm_map.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/sys/uvm/uvm_map.c b/sys/uvm/uvm_map.c
index 9faa5a60c2c6..75a4c5d0ba1e 100644
--- a/sys/uvm/uvm_map.c
+++ b/sys/uvm/uvm_map.c
@@ -1813,12 +1813,15 @@ uvm_map_findspace(struct vm_map *map, vaddr_t hint, vsize_t length,
 	uvm_map_check(map, "map_findspace entry");
 
 	/*
-	 * remember the original hint.  if we are aligning, then we
-	 * may have to try again with no alignment constraint if
-	 * we fail the first time.
+	 * Remember the original hint, clamped to the min/max address.
+	 * If we are aligning, then we may have to try again with no
+	 * alignment constraint if we fail the first time.  We use the
+	 * original hint to verify later that the search has been
+	 * monotonic -- that is, decreasing or increasing, according to
+	 * topdown or !topdown respectively.  But the clamping is not
+	 * monotonic.
 	 */
 
-	orig_hint = hint;
 	if (hint < vm_map_min(map)) {	/* check ranges ... */
 		if (flags & UVM_FLAG_FIXED) {
 			UVMHIST_LOG(maphist,"<- VA below map range",0,0,0,0);
@@ -1831,6 +1834,7 @@ uvm_map_findspace(struct vm_map *map, vaddr_t hint, vsize_t length,
 		    hint, vm_map_min(map), vm_map_max(map), 0);
 		return (NULL);
 	}
+	orig_hint = hint;
 
 	UVMHIST_LOG(maphist,"<- VA %#jx vs range [%#jx->%#jx]",
 	    hint, vm_map_min(map), vm_map_max(map), 0);

From 0ceda4e9e6b75eb18063b82e1ab9ad8c575617ac Mon Sep 17 00:00:00 2001
From: Taylor R Campbell <riastradh@NetBSD.org>
Date: Wed, 1 Jun 2022 22:17:51 +0000
Subject: [PATCH 3/4] uvm(9): Fix mmap optimization for topdown case.

PR kern/51393
---
 sys/uvm/uvm_map.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sys/uvm/uvm_map.c b/sys/uvm/uvm_map.c
index 75a4c5d0ba1e..24f4eba8a9ff 100644
--- a/sys/uvm/uvm_map.c
+++ b/sys/uvm/uvm_map.c
@@ -1874,7 +1874,8 @@ uvm_map_findspace(struct vm_map *map, vaddr_t hint, vsize_t length,
 	 * it, there would be four cases).
 	 */
 
-	if ((flags & UVM_FLAG_FIXED) == 0 && hint == vm_map_min(map)) {
+	if ((flags & UVM_FLAG_FIXED) == 0 &&
+	    hint == (topdown ? vm_map_max(map) : vm_map_min(map))) {
 		entry = map->first_free;
 	} else {
 		if (uvm_map_lookup_entry(map, hint, &entry)) {

From 4e181c86873b23884100a354fa9fede744736671 Mon Sep 17 00:00:00 2001
From: Taylor R Campbell <riastradh@NetBSD.org>
Date: Wed, 1 Jun 2022 22:54:39 +0000
Subject: [PATCH 4/4] tests/lib/libc: Test mmap(2) with bad hints.

---
 tests/lib/libc/sys/t_mmap.c | 66 +++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)

diff --git a/tests/lib/libc/sys/t_mmap.c b/tests/lib/libc/sys/t_mmap.c
index 8ede0f788c89..28125991a953 100644
--- a/tests/lib/libc/sys/t_mmap.c
+++ b/tests/lib/libc/sys/t_mmap.c
@@ -619,6 +619,71 @@ ATF_TC_BODY(mmap_va0, tc)
 	map_check(map, val);
 }
 
+static void
+test_mmap_hint(uintptr_t hintaddr)
+{
+	void *hint = (void *)hintaddr;
+	void *map1 = MAP_FAILED, *map2 = MAP_FAILED, *map3 = MAP_FAILED;
+
+	map1 = mmap(hint, page, PROT_READ|PROT_WRITE, MAP_ANON, -1, 0);
+	if (map1 == MAP_FAILED) {
+		atf_tc_fail_nonfatal("mmap1 hint=%p: errno=%d", hint, errno);
+		goto out;
+	}
+	map2 = mmap(map1, page, PROT_READ|PROT_WRITE, MAP_ANON, -1, 0);
+	if (map2 == MAP_FAILED) {
+		atf_tc_fail_nonfatal("mmap2 hint=%p map1=%p failed: errno=%d",
+		    hint, map1, errno);
+		goto out;
+	}
+	map3 = mmap(hint, page, PROT_READ|PROT_WRITE, MAP_ANON, -1, 0);
+	if (map3 == MAP_FAILED) {
+		atf_tc_fail_nonfatal("mmap3 hint=%p map1=%p failed: errno=%d",
+		    hint, map1, errno);
+		goto out;
+	}
+out:
+	if (map3 != MAP_FAILED) {
+		ATF_CHECK_MSG(munmap(map3, page) == 0, "munmap3 %p hint=%p",
+		    map3, hint);
+	}
+	if (map2 != MAP_FAILED) {
+		ATF_CHECK_MSG(munmap(map2, page) == 0, "munmap2 %p hint=%p",
+		    map2, hint);
+	}
+	if (map1 != MAP_FAILED) {
+		ATF_CHECK_MSG(munmap(map1, page) == 0, "munmap1 %p hint=%p",
+		    map1, hint);
+	}
+}
+
+ATF_TC(mmap_hint);
+ATF_TC_HEAD(mmap_hint, tc)
+{
+	atf_tc_set_md_var(tc, "descr", "Test mmap with hints");
+}
+ATF_TC_BODY(mmap_hint, tc)
+{
+	static const int minaddress_mib[] = { CTL_VM, VM_MINADDRESS };
+	static const int maxaddress_mib[] = { CTL_VM, VM_MAXADDRESS };
+	long minaddress, maxaddress;
+	size_t minaddresssz = sizeof(minaddress);
+	size_t maxaddresssz = sizeof(maxaddress);
+
+	ATF_REQUIRE_MSG(sysctl(minaddress_mib, __arraycount(minaddress_mib),
+		&minaddress, &minaddresssz, NULL, 0) == 0,
+	    "sysctl vm.minaddress: errno=%d", errno);
+	ATF_REQUIRE_MSG(sysctl(maxaddress_mib, __arraycount(maxaddress_mib),
+		&maxaddress, &maxaddresssz, NULL, 0) == 0,
+	    "sysctl vm.maxaddress: errno=%d", errno);
+
+	test_mmap_hint(0);
+	test_mmap_hint(-1);
+	test_mmap_hint(page);
+	test_mmap_hint(minaddress);
+	test_mmap_hint(maxaddress);
+}
+
 ATF_TP_ADD_TCS(tp)
 {
 	page = sysconf(_SC_PAGESIZE);
@@ -634,6 +699,7 @@ ATF_TP_ADD_TCS(tp)
 	ATF_TP_ADD_TC(tp, mmap_truncate);
 	ATF_TP_ADD_TC(tp, mmap_truncate_signal);
 	ATF_TP_ADD_TC(tp, mmap_va0);
+	ATF_TP_ADD_TC(tp, mmap_hint);
 
 	return atf_no_error();
 }