From c545d345faa558f46498ebe47afd95664bd81af9 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Sat, 18 Dec 2021 15:49:52 +0000 Subject: [PATCH] uvm: Stop-gap fix for COW TLB memory ordering bug. struct { long x; char pad[512]; int y; } *f = mmap(...); /* thread A */ for (;;) f->y = 1; /* trigger cow */ /* thread B */ for (long x = 0;; x++) { lock(); f->x = x; /* write to new page after cow tlb flush */ unlock(); } /* thread C */ for (;;) { lock(); long x0 = f->x; /* read from old page before tlb flush */ /* XXX tlb flush may happen asynchronously here */ long x1 = f->x; /* read from new page after tlb flush */ unlock(); assert(x0 == x1); } /* main thread */ for (;;) { pid_t pid = fork(); if (pid == 0) _exit(0); waitpid(pid, NULL, 0); } When thread A triggers copy-on-write, the kernel allocates a new backing page for f, copies the old page content to the new page, and then updates the page tables to point at the new page with write permission. However, the new page may be exposed writably to thread B while thread C still has TLB entries pointing at the old page. So the following sequence of events may happen: thread B thread C -------- -------- *interrupt* tlb flush lock(); f->x = x; /* write to new page */ unlock(); lock(); x0 = f->x; /* read from old page */ *interrupt* tlb flush x1 = f->x; /* read from new page */ unlock(); assert(x0 == x1); *crash* This patch addresses the problem by making the old page nonwritable before copying its content to the new page and publishing the new page; that way, in this scenario, threads B and C will both fault and wait for the cow handling to complete. Another approach would be to make the new page nonwritable (but still readable) on all CPUs first, and then update it to be writable. That change is a little more involved. --- sys/uvm/uvm_fault.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sys/uvm/uvm_fault.c b/sys/uvm/uvm_fault.c index 7e5133f124e9..97a1986afb87 100644 --- a/sys/uvm/uvm_fault.c +++ b/sys/uvm/uvm_fault.c @@ -635,6 +635,7 @@ uvmfault_promote(struct uvm_faultinfo *ufi, /* copy page [pg now dirty] */ if (opg) { + pmap_page_protect(opg, VM_PROT_NONE); uvm_pagecopy(opg, pg); } KASSERT(uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_DIRTY);