diff --git a/sys/arch/amd64/amd64/genassym.cf b/sys/arch/amd64/amd64/genassym.cf index 6ba30f18a462..a41c1157a6c9 100644 --- a/sys/arch/amd64/amd64/genassym.cf +++ b/sys/arch/amd64/amd64/genassym.cf @@ -166,7 +166,6 @@ define L_MD_FLAGS offsetof(struct lwp, l_md.md_flags) define L_MD_ASTPENDING offsetof(struct lwp, l_md.md_astpending) define LW_SYSTEM LW_SYSTEM -define LW_SYSTEM_FPU LW_SYSTEM_FPU define MDL_IRET MDL_IRET define MDL_COMPAT32 MDL_COMPAT32 define MDL_FPU_IN_CPU MDL_FPU_IN_CPU diff --git a/sys/arch/amd64/amd64/locore.S b/sys/arch/amd64/amd64/locore.S index 4a062cdb3b0e..371e2139daf3 100644 --- a/sys/arch/amd64/amd64/locore.S +++ b/sys/arch/amd64/amd64/locore.S @@ -1271,7 +1271,7 @@ ENTRY(cpu_switchto) /* Don't bother with the rest if switching to a system process. */ testl $LW_SYSTEM,L_FLAG(%r12) - jnz .Lswitch_system + jnz .Lswitch_return /* Is this process using RAS (restartable atomic sequences)? */ movq L_PROC(%r12),%rdi @@ -1360,21 +1360,6 @@ ENTRY(cpu_switchto) popq %r12 popq %rbx ret - -.Lswitch_system: - /* - * If it has LWP_SYSTEM_FPU set, meaning it's running in - * kthread_fpu_enter/exit, we need to restore the FPU state - * and enable FPU instructions with fpu_handle_deferred. - * - * No need to test MDL_FPU_IN_CPU via HANDLE_DEFERRED_FPU -- - * fpu_switch guarantees it is clear, so we can just call - * fpu_handle_deferred unconditionally. - */ - testl $LW_SYSTEM_FPU,L_FLAG(%r12) - jz .Lswitch_return - callq _C_LABEL(fpu_handle_deferred) - jmp .Lswitch_return END(cpu_switchto) /* diff --git a/sys/arch/i386/i386/genassym.cf b/sys/arch/i386/i386/genassym.cf index 271dff397eae..44167686a9a0 100644 --- a/sys/arch/i386/i386/genassym.cf +++ b/sys/arch/i386/i386/genassym.cf @@ -175,7 +175,6 @@ define L_MD_FLAGS offsetof(struct lwp, l_md.md_flags) define L_MD_ASTPENDING offsetof(struct lwp, l_md.md_astpending) define LW_SYSTEM LW_SYSTEM -define LW_SYSTEM_FPU LW_SYSTEM_FPU define MDL_FPU_IN_CPU MDL_FPU_IN_CPU define P_FLAG offsetof(struct proc, p_flag) diff --git a/sys/arch/i386/i386/locore.S b/sys/arch/i386/i386/locore.S index b3d01a0cc2a0..44b8ca17e374 100644 --- a/sys/arch/i386/i386/locore.S +++ b/sys/arch/i386/i386/locore.S @@ -1471,7 +1471,7 @@ ENTRY(cpu_switchto) /* Don't bother with the rest if switching to a system process. */ testl $LW_SYSTEM,L_FLAG(%edi) - jnz .Lswitch_system + jnz switch_return #ifndef XENPV /* Restore thread-private %fs/%gs descriptors. */ @@ -1525,21 +1525,6 @@ switch_return: popl %ebx ret -.Lswitch_system: - /* - * If it has LWP_SYSTEM_FPU set, meaning it's running in - * kthread_fpu_enter/exit, we need to restore the FPU state - * and enable FPU instructions with fpu_handle_deferred. - * - * No need to test MDL_FPU_IN_CPU via HANDLE_DEFERRED_FPU -- - * fpu_switch guarantees it is clear, so we can just call - * fpu_handle_deferred unconditionally. - */ - testl $LW_SYSTEM_FPU,L_FLAG(%edi) - jz switch_return - call _C_LABEL(fpu_handle_deferred) - jmp switch_return - .Lcopy_iobitmap: /* Copy I/O bitmap. */ incl _C_LABEL(pmap_iobmp_evcnt)+EV_COUNT diff --git a/sys/arch/x86/x86/fpu.c b/sys/arch/x86/x86/fpu.c index 041c7e305742..9effcac3aab5 100644 --- a/sys/arch/x86/x86/fpu.c +++ b/sys/arch/x86/x86/fpu.c @@ -101,33 +101,29 @@ __KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.83 2023/02/25 18:28:57 riastradh Exp $"); #include "opt_multiprocessor.h" #include -#include - +#include #include #include #include -#include -#include #include +#include #include -#include #include #include -#include -#include #include +#include #include +#include #include -#include #include - +#include #include #include #ifdef XENPV -#define clts() HYPERVISOR_fpu_taskswitch(0) -#define stts() HYPERVISOR_fpu_taskswitch(1) +#define clts() HYPERVISOR_fpu_taskswitch(0) +#define stts() HYPERVISOR_fpu_taskswitch(1) #endif void fpu_handle_deferred(void); @@ -135,35 +131,13 @@ void fpu_switch(struct lwp *, struct lwp *); uint32_t x86_fpu_mxcsr_mask __read_mostly = 0; -/* - * True if this a thread that is allowed to use the FPU -- either a - * user thread, or a system thread with LW_SYSTEM_FPU enabled. - */ -static inline bool -lwp_can_haz_fpu(struct lwp *l) -{ - - return (l->l_flag & (LW_SYSTEM|LW_SYSTEM_FPU)) != LW_SYSTEM; -} - -/* - * True if this is a system thread with its own private FPU state. - */ -static inline bool -lwp_system_fpu_p(struct lwp *l) -{ - - return (l->l_flag & (LW_SYSTEM|LW_SYSTEM_FPU)) == - (LW_SYSTEM|LW_SYSTEM_FPU); -} - static inline union savefpu * fpu_lwp_area(struct lwp *l) { struct pcb *pcb = lwp_getpcb(l); union savefpu *area = &pcb->pcb_savefpu; - KASSERT(lwp_can_haz_fpu(l)); + KASSERT((l->l_flag & LW_SYSTEM) == 0); if (l == curlwp) { fpu_save(); } @@ -181,9 +155,8 @@ fpu_save_lwp(struct lwp *l) s = splvm(); if (l->l_md.md_flags & MDL_FPU_IN_CPU) { - KASSERT(lwp_can_haz_fpu(l)); - fpu_area_save(area, x86_xsave_features, - !(l->l_proc->p_flag & PK_32)); + KASSERT((l->l_flag & LW_SYSTEM) == 0); + fpu_area_save(area, x86_xsave_features, !(l->l_proc->p_flag & PK_32)); l->l_md.md_flags &= ~MDL_FPU_IN_CPU; } splx(s); @@ -196,14 +169,12 @@ fpu_save_lwp(struct lwp *l) void fpu_save(void) { - fpu_save_lwp(curlwp); } void fpuinit(struct cpu_info *ci) { - /* * This might not be strictly necessary since it will be initialized * for each process. However it does no harm. @@ -284,7 +255,6 @@ fpu_errata_amd(void) void fpu_area_save(void *area, uint64_t xsave_features, bool is_64bit) { - switch (x86_fpu_save) { case FPU_SAVE_FSAVE: fnsave(area); @@ -306,7 +276,6 @@ fpu_area_save(void *area, uint64_t xsave_features, bool is_64bit) void fpu_area_restore(const void *area, uint64_t xsave_features, bool is_64bit) { - clts(); switch (x86_fpu_save) { @@ -331,7 +300,6 @@ void fpu_handle_deferred(void) { struct pcb *pcb = lwp_getpcb(curlwp); - fpu_area_restore(&pcb->pcb_savefpu, x86_xsave_features, !(curlwp->l_proc->p_flag & PK_32)); } @@ -346,7 +314,7 @@ fpu_switch(struct lwp *oldlwp, struct lwp *newlwp) cpu_index(ci), ci->ci_ilevel); if (oldlwp->l_md.md_flags & MDL_FPU_IN_CPU) { - KASSERT(lwp_can_haz_fpu(oldlwp)); + KASSERT(!(oldlwp->l_flag & LW_SYSTEM)); pcb = lwp_getpcb(oldlwp); fpu_area_save(&pcb->pcb_savefpu, x86_xsave_features, !(oldlwp->l_proc->p_flag & PK_32)); @@ -362,11 +330,11 @@ fpu_lwp_fork(struct lwp *l1, struct lwp *l2) union savefpu *fpu_save; /* Kernel threads have no FPU. */ - if (__predict_false(!lwp_can_haz_fpu(l2))) { + if (__predict_false(l2->l_flag & LW_SYSTEM)) { return; } /* For init(8). */ - if (__predict_false(!lwp_can_haz_fpu(l1))) { + if (__predict_false(l1->l_flag & LW_SYSTEM)) { memset(&pcb2->pcb_savefpu, 0, x86_fpu_save_size); return; } @@ -388,14 +356,7 @@ fpu_lwp_abandon(struct lwp *l) splx(s); } -/* ------------------------------------------------------------------------- */ - -static const union savefpu safe_fpu __aligned(64) = { - .sv_xmm = { - .fx_mxcsr = __SAFE_MXCSR__, - }, -}; -static const union savefpu zero_fpu __aligned(64); +/* -------------------------------------------------------------------------- */ /* * fpu_kern_enter() @@ -412,15 +373,15 @@ static const union savefpu zero_fpu __aligned(64); void fpu_kern_enter(void) { + static const union savefpu safe_fpu __aligned(64) = { + .sv_xmm = { + .fx_mxcsr = __SAFE_MXCSR__, + }, + }; struct lwp *l = curlwp; struct cpu_info *ci; int s; - if (lwp_system_fpu_p(l) && !cpu_intr_p()) { - KASSERT(!cpu_softintr_p()); - return; - } - s = splvm(); ci = curcpu(); @@ -437,8 +398,8 @@ fpu_kern_enter(void) ci->ci_kfpu_spl = s; /* - * If we are in a softint and have a pinned lwp, the fpu state - * is that of the pinned lwp, so save it there. + * If we are in a softint and have a pinned lwp, the fpu state is that + * of the pinned lwp, so save it there. */ while ((l->l_pflag & LP_INTR) && (l->l_switchto != NULL)) l = l->l_switchto; @@ -466,16 +427,10 @@ fpu_kern_enter(void) void fpu_kern_leave(void) { - struct cpu_info *ci; + static const union savefpu zero_fpu __aligned(64); + struct cpu_info *ci = curcpu(); int s; - if (lwp_system_fpu_p(curlwp) && !cpu_intr_p()) { - KASSERT(!cpu_softintr_p()); - return; - } - - ci = curcpu(); - #if 0 /* * Can't assert this because if the caller holds a spin lock at @@ -504,25 +459,7 @@ fpu_kern_leave(void) splx(s); } -void -kthread_fpu_enter_md(void) -{ - - /* Enable the FPU by clearing CR0_TS, and enter a safe FPU state. */ - clts(); - fpu_area_restore(&safe_fpu, x86_xsave_features, /*is_64bit*/false); -} - -void -kthread_fpu_exit_md(void) -{ - - /* Zero the FPU state and disable the FPU by setting CR0_TS. */ - fpu_area_restore(&zero_fpu, x86_xsave_features, /*is_64bit*/false); - stts(); -} - -/* ------------------------------------------------------------------------- */ +/* -------------------------------------------------------------------------- */ /* * The following table is used to ensure that the FPE_... value @@ -713,7 +650,6 @@ fpudna(struct trapframe *frame) static inline void fpu_xstate_reload(union savefpu *fpu_save, uint64_t xstate) { - /* * Force a reload of the given xstate during the next XRSTOR. */ @@ -912,7 +848,6 @@ process_read_xstate(struct lwp *l, struct xstate *xstate) int process_verify_xstate(const struct xstate *xstate) { - /* xstate_bv must be a subset of RFBM */ if (xstate->xs_xstate_bv & ~xstate->xs_rfbm) return EINVAL; @@ -942,10 +877,8 @@ process_write_xstate(struct lwp *l, const struct xstate *xstate) /* Convert data into legacy FSAVE format. */ if (x86_fpu_save == FPU_SAVE_FSAVE) { - if (xstate->xs_xstate_bv & XCR0_X87) { - process_xmm_to_s87(&xstate->xs_fxsave, - &fpu_save->sv_87); - } + if (xstate->xs_xstate_bv & XCR0_X87) + process_xmm_to_s87(&xstate->xs_fxsave, &fpu_save->sv_87); return 0; } @@ -978,16 +911,15 @@ process_write_xstate(struct lwp *l, const struct xstate *xstate) /* * Invalid bits in mxcsr or mxcsr_mask will cause faults. */ - fpu_save->sv_xmm.fx_mxcsr_mask = - xstate->xs_fxsave.fx_mxcsr_mask & x86_fpu_mxcsr_mask; + fpu_save->sv_xmm.fx_mxcsr_mask = xstate->xs_fxsave.fx_mxcsr_mask + & x86_fpu_mxcsr_mask; fpu_save->sv_xmm.fx_mxcsr = xstate->xs_fxsave.fx_mxcsr & fpu_save->sv_xmm.fx_mxcsr_mask; } if (xstate->xs_xstate_bv & XCR0_SSE) { memcpy(&fpu_save->sv_xsave_hdr.xsh_fxsave[160], - xstate->xs_fxsave.fx_xmm, - sizeof(xstate->xs_fxsave.fx_xmm)); + xstate->xs_fxsave.fx_xmm, sizeof(xstate->xs_fxsave.fx_xmm)); } #define COPY_COMPONENT(xcr0_val, xsave_val, field) \