]> git.dujemihanovic.xyz Git - linux.git/commitdiff
openrisc: Move FPU state out of pt_regs
authorStafford Horne <shorne@gmail.com>
Sat, 30 Mar 2024 14:56:39 +0000 (14:56 +0000)
committerStafford Horne <shorne@gmail.com>
Mon, 15 Apr 2024 14:20:39 +0000 (15:20 +0100)
My original, naive, FPU support patch had the FPCSR register stored
during both the *mode switch* and *context switch*.  This is wasteful.

Also, the original patches did not save the FPU state when handling
signals during the system call fast path.

We fix this by moving the FPCSR state to thread_struct in task_struct.
We also introduce new helper functions save_fpu and restore_fpu which
can be used to sync the FPU with thread_struct.  These functions are now
called when needed:

 - Setting up and restoring sigcontext when handling signals
 - Before and after __switch_to during context switches
 - When handling FPU exceptions
 - When reading and writing FPU register sets

In the future we can further optimize this by doing lazy FPU save and
restore.  For example, FPU sync is not needed when switching to and from
kernel threads (x86 does this).  FPU save and restore does not need to
be done two times if we have both rescheduling and signal work to do.
However, since OpenRISC FPU state is a single register, I leave these
optimizations for future consideration.

Signed-off-by: Stafford Horne <shorne@gmail.com>
arch/openrisc/include/asm/fpu.h [new file with mode: 0644]
arch/openrisc/include/asm/processor.h
arch/openrisc/include/asm/ptrace.h
arch/openrisc/kernel/entry.S
arch/openrisc/kernel/process.c
arch/openrisc/kernel/ptrace.c
arch/openrisc/kernel/signal.c
arch/openrisc/kernel/traps.c

diff --git a/arch/openrisc/include/asm/fpu.h b/arch/openrisc/include/asm/fpu.h
new file mode 100644 (file)
index 0000000..57bc44d
--- /dev/null
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_OPENRISC_FPU_H
+#define __ASM_OPENRISC_FPU_H
+
+struct task_struct;
+
+#ifdef CONFIG_FPU
+static inline void save_fpu(struct task_struct *task)
+{
+       task->thread.fpcsr = mfspr(SPR_FPCSR);
+}
+
+static inline void restore_fpu(struct task_struct *task)
+{
+       mtspr(SPR_FPCSR, task->thread.fpcsr);
+}
+#else
+#define save_fpu(tsk)                  do { } while (0)
+#define restore_fpu(tsk)               do { } while (0)
+#endif
+
+#endif /* __ASM_OPENRISC_FPU_H */
index 3b736e74e6eddca9cce901b3fd7d9df1ebc05869..e05d1b59e24e1afca4f46fb96ad13377e58b94fd 100644 (file)
@@ -44,6 +44,7 @@
 struct task_struct;
 
 struct thread_struct {
+       long fpcsr;             /* Floating point control status register. */
 };
 
 /*
index 375147ff71fcdb338088d3f388b7a88613e47672..1da3e66292e21ac885b3de50fb24b5cf1182bd62 100644 (file)
@@ -59,7 +59,7 @@ struct pt_regs {
         * -1 for all other exceptions.
         */
        long  orig_gpr11;       /* For restarting system calls */
-       long fpcsr;             /* Floating point control status register. */
+       long dummy;             /* Cheap alignment fix */
        long dummy2;            /* Cheap alignment fix */
 };
 
@@ -115,6 +115,5 @@ static inline long regs_return_value(struct pt_regs *regs)
 #define PT_GPR31      124
 #define PT_PC        128
 #define PT_ORIG_GPR11 132
-#define PT_FPCSR      136
 
 #endif /* __ASM_OPENRISC_PTRACE_H */
index c9f48e750b72b5ce0ffe1922754dac85963bacd5..440711d7bf40e3538dcaf9a64c2451079253b9d7 100644 (file)
        l.mtspr r0,r3,SPR_EPCR_BASE                             ;\
        l.lwz   r3,PT_SR(r1)                                    ;\
        l.mtspr r0,r3,SPR_ESR_BASE                              ;\
-       l.lwz   r3,PT_FPCSR(r1)                                 ;\
-       l.mtspr r0,r3,SPR_FPCSR                                 ;\
        l.lwz   r2,PT_GPR2(r1)                                  ;\
        l.lwz   r3,PT_GPR3(r1)                                  ;\
        l.lwz   r4,PT_GPR4(r1)                                  ;\
@@ -177,8 +175,6 @@ handler:                                                    ;\
        /* r30 already save */                                  ;\
        l.sw    PT_GPR31(r1),r31                                        ;\
        TRACE_IRQS_OFF_ENTRY                                            ;\
-       l.mfspr r30,r0,SPR_FPCSR                                ;\
-       l.sw    PT_FPCSR(r1),r30                                ;\
        /* Store -1 in orig_gpr11 for non-syscall exceptions */ ;\
        l.addi  r30,r0,-1                                       ;\
        l.sw    PT_ORIG_GPR11(r1),r30
@@ -219,8 +215,6 @@ handler:                                                    ;\
        /* Store -1 in orig_gpr11 for non-syscall exceptions */ ;\
        l.addi  r30,r0,-1                                       ;\
        l.sw    PT_ORIG_GPR11(r1),r30                           ;\
-       l.mfspr r30,r0,SPR_FPCSR                                ;\
-       l.sw    PT_FPCSR(r1),r30                                ;\
        l.addi  r3,r1,0                                         ;\
        /* r4 is exception EA */                                ;\
        l.addi  r5,r0,vector                                    ;\
@@ -852,6 +846,7 @@ _syscall_badsys:
 
 EXCEPTION_ENTRY(_fpe_trap_handler)
        CLEAR_LWA_FLAG(r3)
+
        /* r4: EA of fault (set by EXCEPTION_HANDLE) */
        l.jal   do_fpe_trap
         l.addi  r3,r1,0 /* pt_regs */
@@ -1100,10 +1095,6 @@ ENTRY(_switch)
        l.sw    PT_GPR28(r1),r28
        l.sw    PT_GPR30(r1),r30
 
-       /* Store the old FPU state to new pt_regs */
-       l.mfspr r29,r0,SPR_FPCSR
-       l.sw    PT_FPCSR(r1),r29
-
        l.addi  r11,r10,0                       /* Save old 'current' to 'last' return value*/
 
        /* We use thread_info->ksp for storing the address of the above
@@ -1126,10 +1117,6 @@ ENTRY(_switch)
        l.lwz   r29,PT_SP(r1)
        l.sw    TI_KSP(r10),r29
 
-       /* Restore the old value of FPCSR */
-       l.lwz   r29,PT_FPCSR(r1)
-       l.mtspr r0,r29,SPR_FPCSR
-
        /* ...and restore the registers, except r11 because the return value
         * has already been set above.
         */
index 3c27d1c727189d7a78f6eae5ab47b74576e78009..eef99fee2110cb26fcd95fadcd44254208039600 100644 (file)
@@ -36,6 +36,7 @@
 #include <linux/reboot.h>
 
 #include <linux/uaccess.h>
+#include <asm/fpu.h>
 #include <asm/io.h>
 #include <asm/processor.h>
 #include <asm/spr_defs.h>
@@ -244,6 +245,8 @@ struct task_struct *__switch_to(struct task_struct *old,
 
        local_irq_save(flags);
 
+       save_fpu(current);
+
        /* current_set is an array of saved current pointers
         * (one for each cpu). we need them at user->kernel transition,
         * while we save them at kernel->user transition
@@ -256,6 +259,8 @@ struct task_struct *__switch_to(struct task_struct *old,
        current_thread_info_set[smp_processor_id()] = new_ti;
        last = (_switch(old_ti, new_ti))->task;
 
+       restore_fpu(current);
+
        local_irq_restore(flags);
 
        return last;
index cf410193095fcad02bd11620d51502f4c02707b5..5091b18eab4c36ea4c6d40636d8e4af4e6e2f905 100644 (file)
@@ -98,9 +98,7 @@ static int fpregs_get(struct task_struct *target,
                       const struct user_regset *regset,
                       struct membuf to)
 {
-       const struct pt_regs *regs = task_pt_regs(target);
-
-       return membuf_store(&to, regs->fpcsr);
+       return membuf_store(&to, target->thread.fpcsr);
 }
 
 static int fpregs_set(struct task_struct *target,
@@ -108,13 +106,9 @@ static int fpregs_set(struct task_struct *target,
                       unsigned int pos, unsigned int count,
                       const void *kbuf, const void __user *ubuf)
 {
-       struct pt_regs *regs = task_pt_regs(target);
-       int ret;
-
        /* FPCSR */
-       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-                                &regs->fpcsr, 0, 4);
-       return ret;
+       return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                 &target->thread.fpcsr, 0, 4);
 }
 #endif
 
index e2f21a5d8ad9a9b4b5bcc3fa7a555f7841c4d2e7..c7ab42e2cb7a4f3dc21742d7171fb02be12057f6 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/stddef.h>
 #include <linux/resume_user_mode.h>
 
+#include <asm/fpu.h>
 #include <asm/processor.h>
 #include <asm/syscall.h>
 #include <asm/ucontext.h>
@@ -39,6 +40,37 @@ asmlinkage long _sys_rt_sigreturn(struct pt_regs *regs);
 asmlinkage int do_work_pending(struct pt_regs *regs, unsigned int thread_flags,
                               int syscall);
 
+#ifdef CONFIG_FPU
+static long restore_fp_state(struct sigcontext __user *sc)
+{
+       long err;
+
+       err = __copy_from_user(&current->thread.fpcsr, &sc->fpcsr, sizeof(unsigned long));
+       if (unlikely(err))
+               return err;
+
+       /* Restore the FPU state */
+       restore_fpu(current);
+
+       return 0;
+}
+
+static long save_fp_state(struct sigcontext __user *sc)
+{
+       long err;
+
+       /* Sync the user FPU state so we can copy to sigcontext */
+       save_fpu(current);
+
+       err = __copy_to_user(&sc->fpcsr, &current->thread.fpcsr, sizeof(unsigned long));
+
+       return err;
+}
+#else
+#define save_fp_state(sc) (0)
+#define restore_fp_state(sc) (0)
+#endif
+
 static int restore_sigcontext(struct pt_regs *regs,
                              struct sigcontext __user *sc)
 {
@@ -55,7 +87,7 @@ static int restore_sigcontext(struct pt_regs *regs,
        err |= __copy_from_user(regs, sc->regs.gpr, 32 * sizeof(unsigned long));
        err |= __copy_from_user(&regs->pc, &sc->regs.pc, sizeof(unsigned long));
        err |= __copy_from_user(&regs->sr, &sc->regs.sr, sizeof(unsigned long));
-       err |= __copy_from_user(&regs->fpcsr, &sc->fpcsr, sizeof(unsigned long));
+       err |= restore_fp_state(sc);
 
        /* make sure the SM-bit is cleared so user-mode cannot fool us */
        regs->sr &= ~SPR_SR_SM;
@@ -118,7 +150,7 @@ static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
        err |= __copy_to_user(sc->regs.gpr, regs, 32 * sizeof(unsigned long));
        err |= __copy_to_user(&sc->regs.pc, &regs->pc, sizeof(unsigned long));
        err |= __copy_to_user(&sc->regs.sr, &regs->sr, sizeof(unsigned long));
-       err |= __copy_to_user(&sc->fpcsr, &regs->fpcsr, sizeof(unsigned long));
+       err |= save_fp_state(sc);
 
        return err;
 }
index 57e0d674eb04c732827c34265565a26a1ebda2e8..c195be9cc9fccb454ea0a158b82d91bd60d6d33a 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/uaccess.h>
 
 #include <asm/bug.h>
+#include <asm/fpu.h>
 #include <asm/io.h>
 #include <asm/processor.h>
 #include <asm/unwinder.h>
@@ -84,9 +85,8 @@ void show_registers(struct pt_regs *regs)
                in_kernel = 0;
 
        pr_info("CPU #: %d\n"
-               "   PC: %08lx    SR: %08lx    SP: %08lx FPCSR: %08lx\n",
-               smp_processor_id(), regs->pc, regs->sr, regs->sp,
-               regs->fpcsr);
+               "   PC: %08lx    SR: %08lx    SP: %08lx\n",
+               smp_processor_id(), regs->pc, regs->sr, regs->sp);
        pr_info("GPR00: %08lx GPR01: %08lx GPR02: %08lx GPR03: %08lx\n",
                0L, regs->gpr[1], regs->gpr[2], regs->gpr[3]);
        pr_info("GPR04: %08lx GPR05: %08lx GPR06: %08lx GPR07: %08lx\n",
@@ -183,7 +183,10 @@ asmlinkage void do_fpe_trap(struct pt_regs *regs, unsigned long address)
        if (user_mode(regs)) {
                int code = FPE_FLTUNK;
 #ifdef CONFIG_FPU
-               unsigned long fpcsr = regs->fpcsr;
+               unsigned long fpcsr;
+
+               save_fpu(current);
+               fpcsr = current->thread.fpcsr;
 
                if (fpcsr & SPR_FPCSR_IVF)
                        code = FPE_FLTINV;
@@ -197,7 +200,8 @@ asmlinkage void do_fpe_trap(struct pt_regs *regs, unsigned long address)
                        code = FPE_FLTRES;
 
                /* Clear all flags */
-               regs->fpcsr &= ~SPR_FPCSR_ALLF;
+               current->thread.fpcsr &= ~SPR_FPCSR_ALLF;
+               restore_fpu(current);
 #endif
                force_sig_fault(SIGFPE, code, (void __user *)regs->pc);
        } else {