Index: arch/x86/include/cpu.h =================================================================== RCS file: /cvsroot/src/sys/arch/x86/include/cpu.h,v retrieving revision 1.90 diff -u -p -r1.90 cpu.h --- arch/x86/include/cpu.h 30 Mar 2018 19:51:53 -0000 1.90 +++ arch/x86/include/cpu.h 31 Mar 2018 20:08:18 -0000 @@ -201,6 +201,25 @@ struct cpu_info { vaddr_t ci_svs_utls; #endif +#if defined(__HAVE_PCID) + /* + * ASID (or PCID, how Intel calls it) related structures. + * + * A context is simply a small number that differentiates multiple + * mappings of the same address. + * + * There is no lock for allocation, since there is no mutual + * exclusion necessary. + */ + tlb_asid_t ci_pmap_next_ctx; /* Next available PCID */ + tlb_asid_t ci_current_kctx; /* Current kernel PCID */ + uint32_t ci_pmap_ctxgen; /* Current generation number */ +#ifdef SVS +# define ci_current_uctx ci_current_kctx | PCID_SVS_USERSPACE +#endif + +#endif /* __HAVE_PCID */ + #if defined(XEN) && (defined(PAE) || defined(__x86_64__)) /* Currently active user PGD (can't use rcr3() with Xen) */ pd_entry_t * ci_kpm_pdir; /* per-cpu PMD (va) */ @@ -343,6 +362,7 @@ void cpu_boot_secondary_processors(void) void cpu_init_idle_lwps(void); void cpu_init_msrs(struct cpu_info *, bool); void cpu_load_pmap(struct pmap *, struct pmap *); +void cpu_pmap_init(struct cpu_info *); void cpu_broadcast_halt(void); void cpu_kick(struct cpu_info *); @@ -417,6 +437,16 @@ extern int x86_fpu_save; extern unsigned int x86_fpu_save_size; extern uint64_t x86_xsave_features; +#ifdef __HAVE_PCID +#define PCID_NUM 0x1000UL /* Number of supported PCIDs */ +#define PCID_MASK (PCID_NUM - 1) +#define PCID_SVS_USERSPACE 0x0800UL /* Userspace PCID bit */ + +#define PCID_NO_TLB_FLUSH (1UL << 63) /* No TLB flush on %cr3 change */ + +extern bool x86_use_pcid; +#endif /* __HAVE_PCID */ + extern void (*x86_cpu_idle)(void); #define cpu_idle() (*x86_cpu_idle)() @@ -537,8 +567,9 @@ void x86_bus_space_mallocok(void); */ #define CPU_FPU_SAVE_SIZE 16 /* int: FPU Instruction layout size */ #define CPU_XSAVE_FEATURES 17 /* quad: XSAVE features */ +#define CPU_PCID 18 /* int: OS/CPU supports PCID+INVPCID */ -#define CPU_MAXID 18 /* number of valid machdep ids */ +#define CPU_MAXID 19 /* number of valid machdep ids */ /* * Structure for CPU_DISKINFO sysctl call. Index: arch/x86/include/pmap.h =================================================================== RCS file: /cvsroot/src/sys/arch/x86/include/pmap.h,v retrieving revision 1.76 diff -u -p -r1.76 pmap.h --- arch/x86/include/pmap.h 4 Mar 2018 10:13:08 -0000 1.76 +++ arch/x86/include/pmap.h 31 Mar 2018 20:08:18 -0000 @@ -235,6 +235,31 @@ struct pmap { ptp mapped */ uint64_t pm_ncsw; /* for assertions */ struct vm_page *pm_gc_ptp; /* pages from pmap g/c */ + +#ifdef __HAVE_PCID + /* + * We record the context used on any cpu here. If the context + * is actually present in the TLB, it will be the plain context + * number. Kernel pmap doesn't have space for this array + * allocated, so kernel pmap must never use it. + * + * If this pmap has no context allocated on that cpu, the entry + * will be 0. + */ + struct { + tlb_asid_t pc_ctx; /* Current context per cpu */ + uint32_t pc_ctxgen; /* Context generation per cpu */ + } pm_ctx[]; + /* Variable length */ + +/* Compute the sizeof of a pmap structure. */ +#define PMAP_SIZEOF(x) (ALIGN(offsetof(struct pmap, pm_ctx[(x)]))) + +#else /* ! __HAVE_PCID */ + +#define PMAP_SIZEOF(x) sizeof(struct pmap) + +#endif /* __HAVE_PCID */ }; /* macro to access pm_pdirpa slots */ @@ -315,6 +340,7 @@ void pmap_remove_all(struct pmap *); void pmap_ldt_cleanup(struct lwp *); void pmap_ldt_sync(struct pmap *); void pmap_kremove_local(vaddr_t, vsize_t); +void pmap_update_pg_shootdown(vaddr_t, struct pmap *); void pmap_emap_enter(vaddr_t, paddr_t, vm_prot_t); void pmap_emap_remove(vaddr_t, vsize_t); @@ -380,21 +406,39 @@ bool pmap_pageidlezero(paddr_t); * inline functions */ -__inline static bool __unused -pmap_pdes_valid(vaddr_t va, pd_entry_t * const *pdes, pd_entry_t *lastpde) -{ - return pmap_pdes_invalid(va, pdes, lastpde) == 0; -} - +#ifdef __HAVE_PCID /* - * pmap_update_pg: flush one page from the TLB (or flush the whole thing - * if hardware doesn't support one-page flushing) + * PCID support functions. */ +#define INVPCID_ADDR 0 +#define INVPCID_CTX 1 +#define INVPCID_CTXGLOB 2 +#define INVPCID_ALLCTX 3 + +struct invpcid_descr { + uint64_t pcid:12 __packed; + uint64_t pad:52 __packed; + uint64_t addr; +} __packed; -__inline static void __unused -pmap_update_pg(vaddr_t va) +static __inline void __unused +invpcid(int type, tlb_asid_t asid, vaddr_t addr) +{ + struct invpcid_descr d; + + memset(&d, 0, sizeof(d)); + d.pcid = asid; + d.addr = addr; + + __asm __volatile("invpcid (%0),%1" + : : "r" (&d), "r" ((u_long)type) : "memory"); +} +#endif /* __HAVE_PCID */ + +__inline static bool __unused +pmap_pdes_valid(vaddr_t va, pd_entry_t * const *pdes, pd_entry_t *lastpde) { - invlpg(va); + return pmap_pdes_invalid(va, pdes, lastpde) == 0; } /* Index: arch/x86/x86/cpu.c =================================================================== RCS file: /cvsroot/src/sys/arch/x86/x86/cpu.c,v retrieving revision 1.153 diff -u -p -r1.153 cpu.c --- arch/x86/x86/cpu.c 28 Mar 2018 14:56:59 -0000 1.153 +++ arch/x86/x86/cpu.c 31 Mar 2018 20:08:18 -0000 @@ -300,10 +300,7 @@ cpu_vm_init(struct cpu_info *ci) aprint_debug_dev(ci->ci_dev, "%d page colors\n", ncolors); uvm_page_recolor(ncolors); - pmap_tlb_cpu_init(ci); -#ifndef __HAVE_DIRECT_MAP - pmap_vpage_cpu_init(ci); -#endif + cpu_pmap_init(ci); } static void @@ -619,6 +616,12 @@ cpu_init(struct cpu_info *ci) if (cpu_feature[5] & CPUID_SEF_SMAP) cr4 |= CR4_SMAP; +#ifdef __HAVE_PCID + /* If PCID is supported, enable it */ + if (x86_use_pcid) + cr4 |= CR4_PCIDE; +#endif + if (cr4) { cr4 |= rcr4(); lcr4(cr4); @@ -907,9 +910,9 @@ cpu_hatch(void *v) for (i = 0 ; i < PDP_SIZE; i++) { l3_pd[i] = pmap_kernel()->pm_pdirpa[i] | PG_V; } - lcr3(ci->ci_pae_l3_pdirpa); + lcr3(ci->ci_pae_l3_pdirpa); /* hatch, PCID not yet up */ #else - lcr3(pmap_pdirpa(pmap_kernel(), 0)); + lcr3(pmap_pdirpa(pmap_kernel(), 0)); /* hatch, PCID not yet up */ #endif pcb = lwp_getpcb(curlwp); @@ -1309,9 +1312,24 @@ cpu_load_pmap(struct pmap *pmap, struct if (interrupts_enabled) x86_enable_intr(); - tlbflush(); + tlbflush(); /* PCID not in use */ #else /* PAE */ - lcr3(pmap_pdirpa(pmap, 0)); + + vaddr_t kpcid = 0; + +#ifdef __HAVE_PCID + if (x86_use_pcid) { + const struct cpu_info *ci = curcpu(); + KASSERTMSG( + (ci->ci_current_kctx != 0 && pmap != pmap_kernel()) + || (ci->ci_current_kctx == 0 && pmap == pmap_kernel()), + "pmap %p (kernel %p) ctx %u unexpected", + pmap, pmap_kernel(), ci->ci_current_kctx); + kpcid = ci->ci_current_kctx | PCID_NO_TLB_FLUSH; + } +#endif /* __HAVE_PCID */ + + lcr3(pmap_pdirpa(pmap, 0) | kpcid); #endif /* PAE */ } Index: arch/x86/x86/db_memrw.c =================================================================== RCS file: /cvsroot/src/sys/arch/x86/x86/db_memrw.c,v retrieving revision 1.6 diff -u -p -r1.6 db_memrw.c --- arch/x86/x86/db_memrw.c 16 Mar 2018 04:48:19 -0000 1.6 +++ arch/x86/x86/db_memrw.c 31 Mar 2018 20:08:18 -0000 @@ -130,6 +130,7 @@ db_write_text(vaddr_t addr, size_t size, pt_entry_t *ppte, pte; size_t limit; char *dst; + struct cpu_info *ci = curcpu(); if (size == 0) return; @@ -167,7 +168,7 @@ db_write_text(vaddr_t addr, size_t size, */ pmap_pte_clearbits(ppte, PG_KR); pmap_pte_setbits(ppte, PG_KW); - pmap_update_pg(addr); + pmap_update_pg_shootdown(addr, ci->ci_pmap); /* * MULTIPROCESSOR: no shootdown required as the PTE continues to @@ -186,7 +187,7 @@ db_write_text(vaddr_t addr, size_t size, */ pmap_pte_clearbits(ppte, PG_KW); pmap_pte_setbits(ppte, PG_KR); - pmap_update_pg(addr); + pmap_update_pg_shootdown(addr, ci->ci_pmap); /* * MULTIPROCESSOR: no shootdown required as all other CPUs Index: arch/x86/x86/identcpu.c =================================================================== RCS file: /cvsroot/src/sys/arch/x86/x86/identcpu.c,v retrieving revision 1.71 diff -u -p -r1.71 identcpu.c --- arch/x86/x86/identcpu.c 30 Mar 2018 19:51:53 -0000 1.71 +++ arch/x86/x86/identcpu.c 31 Mar 2018 20:08:18 -0000 @@ -779,6 +779,18 @@ cpu_probe_fpu(struct cpu_info *ci) #endif } +#ifdef __HAVE_PCID +static void +cpu_probe_pcid(struct cpu_info *ci) +{ + /* If PCID and also INVPCID is supported, enable it */ + if (ci->ci_feat_val[1] & CPUID2_PCID + && ci->ci_feat_val[5] & CPUID_SEF_INVPCID) { + x86_use_pcid = true; + } +} +#endif /* __HAVE_PCID */ + void cpu_probe(struct cpu_info *ci) { @@ -896,6 +908,9 @@ cpu_probe(struct cpu_info *ci) cpu_probe_vortex86(ci); cpu_probe_fpu(ci); +#ifdef __HAVE_PCID + cpu_probe_pcid(ci); +#endif x86_cpu_topology(ci); Index: arch/x86/x86/mtrr_i686.c =================================================================== RCS file: /cvsroot/src/sys/arch/x86/x86/mtrr_i686.c,v retrieving revision 1.30 diff -u -p -r1.30 mtrr_i686.c --- arch/x86/x86/mtrr_i686.c 4 Mar 2018 10:02:10 -0000 1.30 +++ arch/x86/x86/mtrr_i686.c 31 Mar 2018 20:08:18 -0000 @@ -221,7 +221,7 @@ i686_mtrr_reload(int synch) * to CR3) */ - tlbflush(); + tlbflushg(); /* * 8. Disable all range registers (by clearing the E flag in @@ -261,7 +261,7 @@ i686_mtrr_reload(int synch) */ wbinvd(); - tlbflush(); + tlbflushg(); /* * 12. Enter the normal cache mode to reenable caching (set the CD and Index: arch/x86/x86/pmap.c =================================================================== RCS file: /cvsroot/src/sys/arch/x86/x86/pmap.c,v retrieving revision 1.289 diff -u -p -r1.289 pmap.c --- arch/x86/x86/pmap.c 4 Mar 2018 23:25:35 -0000 1.289 +++ arch/x86/x86/pmap.c 31 Mar 2018 20:08:18 -0000 @@ -188,6 +188,7 @@ __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.2 #include #include #include +#include #include #include @@ -348,8 +349,13 @@ static vaddr_t pmap_maxkvaddr; /* * Misc. event counters. */ -struct evcnt pmap_iobmp_evcnt; +#ifndef __x86_64__ +struct evcnt pmap_iobmp_evcnt; /* only used by i386/locore.S */ +#endif struct evcnt pmap_ldt_evcnt; +#ifdef __HAVE_PCID +struct evcnt pmap_pcid_wrap_evcnt; +#endif /* * PAT @@ -365,6 +371,29 @@ struct evcnt pmap_ldt_evcnt; static bool cpu_pat_enabled __read_mostly = false; /* + * PCID + */ +#ifndef PCID_MASK +#define PCID_MASK 0 +#endif + +#ifdef __HAVE_PCID +bool x86_use_pcid __read_mostly = false; +static tlb_asid_t pmap_num_pcid __read_mostly = PCID_NUM; + +#define pmap_ctx(ci, pm) ((pm)->pm_ctx[(ci)->ci_index]).pc_ctx +#define pmap_ctxgen(ci, pm) ((pm)->pm_ctx[(ci)->ci_index]).pc_ctxgen + +/* Initialize the pmap's pm_ctx. It's variable sized, depending on ncpu */ +#define PMAP_CTX_INIT(pm) \ + memset(pm->pm_ctx, 0, ncpu * sizeof(pm->pm_ctx[0])); + +static void ctx_alloc(struct pmap *, struct cpu_info *); +static void tlbflush_allctx(void); +static void tlbflush_current(struct cpu_info *); +#endif /* __HAVE_PCID */ + +/* * Global data structures */ @@ -470,6 +499,7 @@ pvhash_remove(struct pv_hash_head *hh, s static pt_entry_t protection_codes[8] __read_mostly; static bool pmap_initialized __read_mostly = false; /* pmap_init done yet? */ +static ONCE_DECL(pmap_cache_initialized); /* * The following two vaddr_t's are used during system startup to keep track of @@ -577,7 +607,7 @@ static bool pmap_remove_pte(struct pmap vaddr_t, struct pv_entry **); static void pmap_remove_ptes(struct pmap *, struct vm_page *, vaddr_t, vaddr_t, vaddr_t, struct pv_entry **); - +static void pmap_update_pg_current(vaddr_t); static paddr_t pmap_get_physpage(void); static void pmap_alloc_level(struct pmap *, vaddr_t, long *); @@ -772,6 +802,10 @@ pmap_map_ptes(struct pmap *pmap, struct ci->ci_tlbstate = TLBSTATE_VALID; kcpuset_atomic_set(pmap->pm_cpus, cid); kcpuset_atomic_set(pmap->pm_kernel_cpus, cid); +#ifdef __HAVE_PCID + if (x86_use_pcid) + ctx_alloc(pmap, ci); +#endif cpu_load_pmap(pmap, curpmap); } pmap->pm_ncsw = l->l_ncsw; @@ -852,7 +886,7 @@ pmap_exec_account(struct pmap *pm, vaddr return; if ((opte ^ npte) & PG_X) - pmap_update_pg(va); + pmap_update_pg_current(va); /* * Executability was removed on the last executable change. @@ -1059,7 +1093,7 @@ pmap_emap_sync(bool canload) pmap_load(); KASSERT(ci->ci_want_pmapload == 0); } else { - tlbflush(); + tlbflush_current(ci); } } @@ -1280,6 +1314,10 @@ pmap_bootstrap(vaddr_t kva_start) kpm->pm_ldt_len = 0; kpm->pm_ldt_sel = GSYSSEL(GLDT_SEL, SEL_KPL); +#ifdef __HAVE_PCID + PMAP_CTX_INIT(kpm); +#endif /* __HAVE_PCID */ + /* * the above is just a rough estimate and not critical to the proper * operation of the system. @@ -1488,7 +1526,7 @@ pmap_init_pcpu(void) pa = pmap_bootstrap_palloc(1); *pte = (pa & PG_FRAME) | pteflags; - pmap_update_pg(tmpva); + pmap_update_pg_current(tmpva); memset((void *)tmpva, 0, PAGE_SIZE); L4_BASE[L4e_idx+i] = pa | pteflags | PG_U; @@ -1502,7 +1540,7 @@ pmap_init_pcpu(void) pa = pmap_bootstrap_palloc(1); *pte = (pa & PG_FRAME) | pteflags; - pmap_update_pg(tmpva); + pmap_update_pg_current(tmpva); memset((void *)tmpva, 0, PAGE_SIZE); L3_BASE[L3e_idx+i] = pa | pteflags | PG_U; @@ -1517,7 +1555,7 @@ pmap_init_pcpu(void) pa = pmap_bootstrap_palloc(1); *pte = (pa & PG_FRAME) | pteflags; - pmap_update_pg(tmpva); + pmap_update_pg_current(tmpva); memset((void *)tmpva, 0, PAGE_SIZE); L2_BASE[L2e_idx+i] = pa | pteflags | PG_U; @@ -1535,11 +1573,11 @@ pmap_init_pcpu(void) } *pte = 0; - pmap_update_pg(tmpva); + pmap_update_pg_current(tmpva); pcpuarea = (struct pcpu_area *)startva; - tlbflush(); + tlbflush_allctx(); /* one-time */ } #endif @@ -1602,7 +1640,7 @@ pmap_init_directmap(struct pmap *kpm) pa = pmap_bootstrap_palloc(1); *pte = (pa & PG_FRAME) | pteflags; - pmap_update_pg(tmpva); + pmap_update_pg_current(tmpva); memset((void *)tmpva, 0, PAGE_SIZE); L4_BASE[L4e_idx+i] = pa | pteflags | PG_U; @@ -1616,7 +1654,7 @@ pmap_init_directmap(struct pmap *kpm) pa = pmap_bootstrap_palloc(1); *pte = (pa & PG_FRAME) | pteflags; - pmap_update_pg(tmpva); + pmap_update_pg_current(tmpva); memset((void *)tmpva, 0, PAGE_SIZE); L3_BASE[L3e_idx+i] = pa | pteflags | PG_U; @@ -1640,14 +1678,14 @@ pmap_init_directmap(struct pmap *kpm) } *pte = 0; - pmap_update_pg(tmpva); + pmap_update_pg_current(tmpva); pmap_direct_base = startva; pmap_direct_end = endva; pmap_direct_pdpe = L4e_idx; pmap_direct_npdp = nL4e; - tlbflush(); + tlbflush_allctx(); /* one-time */ } #endif /* __HAVE_DIRECT_MAP */ @@ -1769,6 +1807,43 @@ pmap_remap_largepages(void) #endif /* !XEN */ /* + * Initialize the per CPU parts for the cpu running this code. + */ +void +cpu_pmap_init(struct cpu_info *ci) +{ +#ifdef __HAVE_PCID + ci->ci_pmap_next_ctx = 1; + ci->ci_pmap_ctxgen = 1; + ci->ci_current_kctx = 0; + +#ifdef SVS + if (svs_enabled) { + /* Need two PCIDs per PMAP for SVS */ + pmap_num_pcid = PCID_NUM / 2; + } +#endif + +#endif /* __HAVE_PCID */ + + pmap_tlb_cpu_init(ci); +#ifndef __HAVE_DIRECT_MAP + pmap_vpage_cpu_init(ci); +#endif +} + +static int +pmap_cache_init(void) +{ + KASSERTMSG(!cold && ncpu > 0, "unexpected cold %d ncpu %d", cold, ncpu); + + pool_cache_bootstrap(&pmap_cache, PMAP_SIZEOF(ncpu), 0, 0, 0, + "pmappl", NULL, IPL_NONE, NULL, NULL, NULL); + + return 0; +} + +/* * pmap_init: called from uvm_init, our job is to get the pmap * system ready to manage mappings... */ @@ -1789,8 +1864,7 @@ pmap_init(void) * initialize caches. */ - pool_cache_bootstrap(&pmap_cache, sizeof(struct pmap), 0, 0, 0, - "pmappl", NULL, IPL_NONE, NULL, NULL, NULL); + /* pmap_cache initialized on first pmap_create() call */ #ifdef XEN /* @@ -1816,12 +1890,18 @@ pmap_init(void) pmap_tlb_init(); /* XXX: Since cpu_hatch() is only for secondary CPUs. */ - pmap_tlb_cpu_init(curcpu()); + cpu_pmap_init(curcpu()); +#ifndef __x86_64__ evcnt_attach_dynamic(&pmap_iobmp_evcnt, EVCNT_TYPE_MISC, NULL, "x86", "io bitmap copy"); +#endif evcnt_attach_dynamic(&pmap_ldt_evcnt, EVCNT_TYPE_MISC, NULL, "x86", "ldt sync"); +#ifdef __HAVE_PCID + evcnt_attach_dynamic(&pmap_pcid_wrap_evcnt, EVCNT_TYPE_MISC, + NULL, "x86", "pcid wrap"); +#endif /* * done: pmap module is up (and ready for business) @@ -2459,6 +2539,8 @@ pmap_create(void) struct pmap *pmap; int i; + RUN_ONCE(&pmap_cache_initialized, pmap_cache_init); + pmap = pool_cache_get(&pmap_cache, PR_WAITOK); /* init uvm_object */ @@ -2487,6 +2569,10 @@ pmap_create(void) pmap->pm_ldt_len = 0; pmap->pm_ldt_sel = GSYSSEL(GLDT_SEL, SEL_KPL); +#ifdef __HAVE_PCID + PMAP_CTX_INIT(pmap); +#endif + /* allocate PDP */ try_again: pmap->pm_pdir = pool_cache_get(&pmap_pdp_cache, PR_WAITOK); @@ -2868,6 +2954,9 @@ pmap_activate(struct lwp *l) if (pmap == pmap_kernel()) { ci->ci_want_pmapload = 0; +#ifdef __HAVE_PCID + ci->ci_current_kctx = 0; +#endif return; } @@ -2883,7 +2972,7 @@ pmap_activate(struct lwp *l) KASSERT(pmap_pdirpa(pmap, 0) == pmap_pte2pa(ci->ci_pae_l3_pdir[0])) #elif !defined(XEN) #define KASSERT_PDIRPA(pmap) \ - KASSERT(pmap_pdirpa(pmap, 0) == pmap_pte2pa(rcr3())) + KASSERT(pmap_pdirpa(pmap, 0) == pmap_pte2pa(rcr3() & ~PCID_MASK)) #else #define KASSERT_PDIRPA(pmap) KASSERT(true) /* nothing to do */ #endif @@ -2926,9 +3015,14 @@ pmap_reactivate(struct pmap *pmap) kcpuset_atomic_set(pmap->pm_cpus, cid); u_int gen = uvm_emap_gen_return(); - tlbflush(); + tlbflush_allctx(); /* don't know which are out of sync */ uvm_emap_update(gen); } + +#ifdef __HAVE_PCID + if (x86_use_pcid) + ctx_alloc(pmap, ci); +#endif } /* @@ -3008,6 +3102,11 @@ pmap_load(void) kcpuset_atomic_set(pmap->pm_kernel_cpus, cid); ci->ci_pmap = pmap; +#ifdef __HAVE_PCID + if (x86_use_pcid) + ctx_alloc(pmap, ci); +#endif + /* * update tss. now that we have registered for invalidations * from other CPUs, we're good to load the page tables. @@ -3046,6 +3145,15 @@ pmap_load(void) kpreempt_enable(); } +void +pmap_print_cr3(void); + +void +pmap_print_cr3(void) +{ + printf("cr3: %lx\n", rcr3() & PCID_MASK); +} + /* * pmap_deactivate: deactivate a process' pmap. * @@ -3285,7 +3393,7 @@ pmap_zero_page(paddr_t pa) pmap_pte_set(zpte, pmap_pa2pte(pa) | pteflags); pmap_pte_flush(); - pmap_update_pg(zerova); /* flush TLB */ + pmap_update_pg_current(zerova); /* flush TLB */ memset((void *)zerova, 0, PAGE_SIZE); @@ -3327,7 +3435,7 @@ pmap_pageidlezero(paddr_t pa) pmap_pte_set(zpte, pmap_pa2pte(pa) | pteflags); pmap_pte_flush(); - pmap_update_pg(zerova); /* flush TLB */ + pmap_update_pg_current(zerova); /* flush TLB */ rv = sse2_idlezero_page((void *)zerova); @@ -3378,8 +3486,8 @@ pmap_copy_page(paddr_t srcpa, paddr_t ds pmap_pte_set(srcpte, pmap_pa2pte(srcpa) | pteflags); pmap_pte_set(dstpte, pmap_pa2pte(dstpa) | pteflags | PG_M); pmap_pte_flush(); - pmap_update_pg(srcva); - pmap_update_pg(dstva); + pmap_update_pg_current(srcva); + pmap_update_pg_current(dstva); memcpy((void *)dstva, (void *)srcva, PAGE_SIZE); @@ -3418,7 +3526,7 @@ pmap_map_ptp(struct vm_page *ptp) pmap_pte_set(ptppte, pmap_pa2pte(VM_PAGE_TO_PHYS(ptp)) | pteflags); pmap_pte_flush(); - pmap_update_pg(ptpva); + pmap_update_pg_current(ptpva); return (pt_entry_t *)ptpva; #endif @@ -4484,7 +4592,7 @@ pmap_get_physpage(void) pmap_pte_set(early_zero_pte, pmap_pa2pte(pa) | PG_V | PG_RW | pmap_pg_nx); pmap_pte_flush(); - pmap_update_pg((vaddr_t)early_zerop); + pmap_update_pg_current((vaddr_t)early_zerop); memset(early_zerop, 0, PAGE_SIZE); #if defined(DIAGNOSTIC) || defined(XEN) pmap_pte_set(early_zero_pte, 0); @@ -4899,3 +5007,172 @@ x86_mmap_flags(paddr_t mdpgno) return pflag; } + +/* + * pmap_update_pg_current: flush one page from the TLB (or flush the whole + * thing if hardware doesn't support one-page flushing) in currently + * active pmap + */ + +__inline static void +pmap_update_pg_current(vaddr_t va) +{ + invlpg(va); + +#if defined(__HAVE_PCID) && defined(SVS) + if (x86_use_pcid && svs_enabled) { + /* Flush also the user address space part */ + struct cpu_info *ci = curcpu(); + + if (!ci || !ci->ci_pmap || ci->ci_pmap == pmap_kernel()) { + /* Nothing else to do */ + return; + } + + /* Flush also the user address space part */ + KASSERT(ci->ci_pmap); + invpcid(INVPCID_ADDR, + pmap_ctx(ci, ci->ci_pmap) | PCID_SVS_USERSPACE, va); + } +#endif +} + +/* + * pmap_update_pg_shootdown: flush one page from the TLB (or flush the whole + * thing if hardware doesn't support one-page flushing), executed + * from shootdown callback. + */ +void +pmap_update_pg_shootdown(vaddr_t va, struct pmap *pmap) +{ + KASSERT(pmap != NULL); + +#ifdef __HAVE_PCID + /* + * INVLPG invalidates TLB entries only for VA for current PCID. Need + * to use INVPCID to shootdown entries for other PCID. + */ + if (x86_use_pcid) { + if (pmap == pmap_kernel()) { + /* + * If we are updating kernel pmap, we need to flush + * the address mappings for all PCIDs. INVPCID + * can't flush specific address + all PCIDs, so just + * flush all TLB mappings, including globals. + * XXX maybe flush just context 0 ? + */ + invpcid(INVPCID_CTXGLOB, 0, 0); + } else { + KASSERT(pmap != pmap_kernel()); + + const tlb_asid_t ctx = pmap_ctx(curcpu(), pmap); + + if (ctx == 0) { + /* No context on this CPU */ + return; /* XXX safe ? */ + } + + invpcid(INVPCID_ADDR, ctx, va); +#ifdef SVS + if (svs_enabled) { + /* Flush also the user address space part */ + invpcid(INVPCID_ADDR, + ctx | PCID_SVS_USERSPACE, va); + } +#endif + } + return; + } +#endif /* __HAVE_PCID */ + + invlpg(va); +} + +/* Flush TLB for all contexts */ +static void +tlbflush_allctx(void) +{ +#ifdef __HAVE_PCID + if (x86_use_pcid) + invpcid(INVPCID_ALLCTX, 0, 0); + else +#endif + tlbflush(); /* PCID not in use */ +} + +/* + * Flush TLB for current address space. If SVS is used, flush the both the + * kernel and user address space parts. + */ +static void +tlbflush_current(struct cpu_info *ci) +{ + tlbflush(); /* flush TLB for current address space */ + +#ifdef SVS + if (svs_enabled) + invpcid(INVPCID_CTX, ci->ci_current_uctx, 0); +#endif +} + +#ifdef __HAVE_PCID +/* + * Allocate a hardware context to the given pmap. + */ +static void +ctx_alloc(struct pmap *pm, struct cpu_info *ci) +{ + KASSERT(pm != pmap_kernel()); + KASSERT(ci->ci_pmap_ctxgen > 0); + + if (pmap_ctx(ci, pm) != 0 + && pmap_ctxgen(ci, pm) == ci->ci_pmap_ctxgen) { + /* Already has context */ + goto out; + } + + tlb_asid_t ctx = ci->ci_pmap_next_ctx++; + + /* + * if we have run out of contexts, remove all user entries from + * the TLB and start over with context 1 again. + */ + + if (__predict_false(ctx == pmap_num_pcid)) { + /* Flush TLB for all PCIDs and all VAs, excluding globals */ + invpcid(INVPCID_ALLCTX, 0, 0); + + pmap_pcid_wrap_evcnt.ev_count++; + ci->ci_pmap_ctxgen++; + +#ifdef DIAGNOSTIC + if (__predict_false(ci->ci_pmap_ctxgen == 0)) { + /* + * The generation number has wrapped. We could + * handle this scenario by traversing all of + * the pmaps, and invalidating the generation + * number on those which are not currently + * in use by this processor. + * + * However... considering that we're using + * an unsigned 32-bit integer for generation + * numbers, with 12-bit PCID we wrap in around + * 32686 years (16343 with SVS) if 1000 new processes + * run on the processor every second. + * + * So, we don't bother. + */ + panic("%s: too much uptime", __func__); + } +#endif + + ctx = 1; + ci->ci_pmap_next_ctx = 2; + } + pmap_ctx(ci, pm) = ctx; + pmap_ctxgen(ci, pm) = ci->ci_pmap_ctxgen; + +out: + ci->ci_current_kctx = pmap_ctx(ci, pm); +} +#endif /* __HAVE_PCID */ Index: arch/x86/x86/svs.c =================================================================== RCS file: /cvsroot/src/sys/arch/x86/x86/svs.c,v retrieving revision 1.17 diff -u -p -r1.17 svs.c --- arch/x86/x86/svs.c 30 Mar 2018 19:58:05 -0000 1.17 +++ arch/x86/x86/svs.c 31 Mar 2018 20:08:18 -0000 @@ -520,6 +520,20 @@ svs_lwp_switch(struct lwp *oldlwp, struc */ pte = ci->ci_svs_rsp0_pte; *pte = L1_BASE[pl1_i(va)]; + +#ifdef __HAVE_PCID + if (x86_use_pcid) { + /* + * Actually need to flush the TLB for user address space, + * this won't be flushed implicitely when PCID is in use. + * XXX why TLB contains inconsistent data at this moment? + * XXX the mapping should remain the same all the time + * XXX It's also not enough to flush just address mapping + * XXX for va + */ + invpcid(INVPCID_CTX, ci->ci_current_uctx, 0); + } +#endif } static inline pt_entry_t @@ -534,7 +548,7 @@ svs_pte_atomic_read(struct pmap *pmap, s /* * We may come here with the pmap unlocked. So read its PTEs atomically. If * a remote CPU is updating them at the same time, it's not a problem: the - * remote CPU will call svs_pmap_sync afterwards, and our updirpa will be + * remote CPU will call svs_pmap_sync afterwards, and our kpdirpa will be * synchronized properly. */ void @@ -550,6 +564,15 @@ svs_pdir_switch(struct pmap *pmap) ci->ci_svs_kpdirpa = pmap_pdirpa(pmap, 0); +#ifdef __HAVE_PCID + if (x86_use_pcid) { + ci->ci_svs_kpdirpa &= ~PCID_MASK; + ci->ci_svs_kpdirpa |= ci->ci_current_kctx | PCID_NO_TLB_FLUSH; + ci->ci_svs_updirpa &= ~PCID_MASK; + ci->ci_svs_updirpa |= ci->ci_current_uctx | PCID_NO_TLB_FLUSH; + } +#endif + /* Update the info in the UTLS page */ utls = (struct svs_utls *)ci->ci_svs_utls; utls->kpdirpa = ci->ci_svs_kpdirpa; Index: arch/x86/x86/x86_machdep.c =================================================================== RCS file: /cvsroot/src/sys/arch/x86/x86/x86_machdep.c,v retrieving revision 1.110 diff -u -p -r1.110 x86_machdep.c --- arch/x86/x86/x86_machdep.c 31 Mar 2018 08:43:52 -0000 1.110 +++ arch/x86/x86/x86_machdep.c 31 Mar 2018 20:08:18 -0000 @@ -1311,6 +1311,10 @@ SYSCTL_SETUP(sysctl_machdep_setup, "sysc CPU_FPU_PRESENT); const_sysctl(clog, "osfxsr", CTLTYPE_INT, i386_use_fxsave, CPU_OSFXSR); +#ifdef __HAVE_PCID + const_sysctl(clog, "pcid", CTLTYPE_INT, x86_use_pcid, + CPU_PCID); +#endif /* __HAVE_PCID */ const_sysctl(clog, "sse", CTLTYPE_INT, i386_has_sse, CPU_SSE); const_sysctl(clog, "sse2", CTLTYPE_INT, i386_has_sse2, Index: arch/x86/x86/x86_tlb.c =================================================================== RCS file: /cvsroot/src/sys/arch/x86/x86/x86_tlb.c,v retrieving revision 1.1 diff -u -p -r1.1 x86_tlb.c --- arch/x86/x86/x86_tlb.c 22 Jan 2018 19:37:45 -0000 1.1 +++ arch/x86/x86/x86_tlb.c 31 Mar 2018 20:08:18 -0000 @@ -61,16 +61,21 @@ __KERNEL_RCSID(0, "$NetBSD: x86_tlb.c,v /* * TLB shootdown structures. */ +struct pmap_tlb_ctx_t { + uintptr_t tc_va; + struct pmap *tc_pmap; +}; typedef struct { #ifdef _LP64 - uintptr_t tp_va[14]; /* whole struct: 128 bytes */ + struct pmap_tlb_ctx_t tp_ctx[7]; /* whole struct: 128 bytes */ + uint16_t _tp_pad; #else - uintptr_t tp_va[13]; /* whole struct: 64 bytes */ + struct pmap_tlb_ctx_t tp_ctx[7]; /* whole struct: 64 bytes */ #endif - uint16_t tp_count; + uint8_t tp_count; /* <= TP_MAXVA or 0xff */ + uint8_t tp_userpmap; uint16_t tp_pte; - int tp_userpmap; kcpuset_t * tp_cpumask; } pmap_tlb_packet_t; @@ -198,20 +203,32 @@ pmap_tlb_invalidate(const pmap_tlb_packe int i; /* Find out what we need to invalidate. */ - if (tp->tp_count == (uint16_t)-1) { + if (tp->tp_count == (uint8_t)-1) { u_int egen = uvm_emap_gen_return(); if (tp->tp_pte & PG_G) { - /* Invalidating user and kernel TLB entries. */ - tlbflushg(); + /* Invalidating all user and kernel TLB entries. */ +#ifdef __HAVE_PCID + if (x86_use_pcid) + invpcid(INVPCID_CTXGLOB, 0, 0); + else +#endif /* __HAVE_PCID */ + tlbflushg(); } else { - /* Invalidating user TLB entries only. */ - tlbflush(); + /* Invalidating all user TLB entries only. */ +#ifdef __HAVE_PCID + if (x86_use_pcid) + invpcid(INVPCID_ALLCTX, 0, 0); + else +#endif /* __HAVE_PCID */ + tlbflush(); /* PCID not in use */ } uvm_emap_update(egen); } else { /* Invalidating a single page or a range of pages. */ for (i = tp->tp_count - 1; i >= 0; i--) { - pmap_update_pg(tp->tp_va[i]); + KASSERT(tp->tp_ctx[i].tc_pmap != NULL); + pmap_update_pg_shootdown(tp->tp_ctx[i].tc_va, + tp->tp_ctx[i].tc_pmap); } } } @@ -249,19 +266,22 @@ pmap_tlb_shootdown(struct pmap *pm, vadd /* Whole address flush will be needed if PG_G is set. */ CTASSERT(PG_G == (uint16_t)PG_G); + CTASSERT(TP_MAXVA < __arraycount(tp->tp_ctx)); tp->tp_pte |= (uint16_t)pte; - if (tp->tp_count == (uint16_t)-1) { + if (tp->tp_count == (uint8_t)-1) { /* * Already flushing everything. */ } else if (tp->tp_count < TP_MAXVA && va != (vaddr_t)-1LL) { /* Flush a single page. */ - tp->tp_va[tp->tp_count++] = va; + tp->tp_ctx[tp->tp_count].tc_va = va; + tp->tp_ctx[tp->tp_count].tc_pmap = pm; + tp->tp_count++; KASSERT(tp->tp_count > 0); } else { /* Flush everything. */ - tp->tp_count = (uint16_t)-1; + tp->tp_count = (uint8_t)-1; } if (pm != pmap_kernel()) { @@ -284,10 +304,10 @@ static inline void pmap_tlb_processpacket(pmap_tlb_packet_t *tp, kcpuset_t *target) { - if (tp->tp_count != (uint16_t)-1) { + if (tp->tp_count != (uint8_t)-1) { /* Invalidating a single page or a range of pages. */ for (int i = tp->tp_count - 1; i >= 0; i--) { - xen_mcast_invlpg(tp->tp_va[i], target); + xen_mcast_invlpg(tp->tp_ctx[i].tc_va, target); } } else { xen_mcast_tlbflush(target); @@ -420,7 +440,7 @@ pmap_tlb_shootnow(void) * Clear out our local buffer. */ #ifdef TLBSTATS - if (tp->tp_count != (uint16_t)-1) { + if (tp->tp_count != (uint8_t)-1) { atomic_add_64(&tlbstat_single_issue.ev_count, tp->tp_count); } #endif Index: arch/amd64/include/types.h =================================================================== RCS file: /cvsroot/src/sys/arch/amd64/include/types.h,v retrieving revision 1.55 diff -u -p -r1.55 types.h --- arch/amd64/include/types.h 16 Mar 2018 12:19:35 -0000 1.55 +++ arch/amd64/include/types.h 31 Mar 2018 20:08:18 -0000 @@ -106,12 +106,17 @@ typedef unsigned char __cpu_simple_lock #define __HAVE_DIRECT_MAP 1 #define __HAVE_MM_MD_DIRECT_MAPPED_IO #define __HAVE_MM_MD_DIRECT_MAPPED_PHYS +#define __HAVE_PCID #if !defined(NO_PCI_MSI_MSIX) #define __HAVE_PCI_MSI_MSIX #endif #endif #endif +#ifdef __HAVE_PCID +typedef unsigned short tlb_asid_t; +#endif /* __HAVE_PCID */ + #else /* !__x86_64__ */ #include