From cfca4b5abe0cc13f9d9f45f760efd8260e31200f Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 12 May 2020 22:18:08 -0700 Subject: [PATCH] ARC: entry: use gp to cache task pointer (vs. r25) The motivation is eventual ABI considerations for ARCv3 but even without it this change us worthwhile as diffstat reduces 100 net lines r25 is a callee saved register, normally not saved by entry code in pt_regs. However because of its usage in CONFIG_ARC_CURR_IN_REG it needs to be. This in turn requires a whole bunch of special casing when we need to access r25. Then there is distinction between user mode r25 vs. kernel mode r25 - hence distinct SAVE_CALLEE_SAVED_{USER,KERNEL} Instead use gp which is a scratch register and thus saved already in entry code. This cleans things up significantly and much nocer on eyes: - SAVE_CALLEE_SAVED_{USER,KERNEL} are now exactly same - no special user_r25 slot in pt_reggs Note that typical global asm registers are callee-saved (r25), but gp is not callee-saved thus needs additional -ffixed- toggle Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 6 +- arch/arc/Makefile | 6 +- arch/arc/include/asm/current.h | 2 +- arch/arc/include/asm/entry-arcv2.h | 17 ++-- arch/arc/include/asm/entry-compact.h | 35 +++------ arch/arc/include/asm/entry.h | 111 ++++++++------------------- arch/arc/include/asm/ptrace.h | 6 +- arch/arc/kernel/asm-offsets.c | 2 - arch/arc/kernel/ctx_sw.c | 13 +--- arch/arc/kernel/ctx_sw_asm.S | 2 +- arch/arc/kernel/entry.S | 3 +- arch/arc/kernel/process.c | 11 --- arch/arc/kernel/ptrace.c | 2 - 13 files changed, 60 insertions(+), 156 deletions(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 47b4acc7d0c9..c92bacc1ff4c 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -492,11 +492,11 @@ config ARC_KVADDR_SIZE kernel-user gutter) config ARC_CURR_IN_REG - bool "Dedicate Register r25 for current_task pointer" + bool "cache current task pointer in gp" default y help - This reserved Register R25 to point to Current Task in - kernel mode. This saves memory access for each such access + This reserves gp register to point to Current Task in + kernel mode eliding memory access for each access config ARC_EMUL_UNALIGNED diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 329400a1c355..2390dd042e36 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -28,14 +28,14 @@ cflags-y += $(tune-mcpu-def-y) endif endif - ifdef CONFIG_ARC_CURR_IN_REG # For a global register definition, make sure it gets passed to every file # We had a customer reported bug where some code built in kernel was NOT using -# any kernel headers, and missing the r25 global register +# any kernel headers, and missing the global register # Can't do unconditionally because of recursive include issues # due to LINUXINCLUDE += -include $(srctree)/arch/arc/include/asm/current.h +cflags-y += -ffixed-gp endif cflags-y += -fsection-anchors @@ -67,7 +67,7 @@ cflags-$(CONFIG_ARC_DW2_UNWIND) += -fasynchronous-unwind-tables $(cfi) # small data is default for elf32 tool-chain. If not usable, disable it # This also allows repurposing GP as scratch reg to gcc reg allocator disable_small_data := y -cflags-$(disable_small_data) += -mno-sdata -fcall-used-gp +cflags-$(disable_small_data) += -mno-sdata cflags-$(CONFIG_CPU_BIG_ENDIAN) += -mbig-endian ldflags-$(CONFIG_CPU_BIG_ENDIAN) += -EB diff --git a/arch/arc/include/asm/current.h b/arch/arc/include/asm/current.h index 9b9bdd3e6538..06be89f6f2f0 100644 --- a/arch/arc/include/asm/current.h +++ b/arch/arc/include/asm/current.h @@ -13,7 +13,7 @@ #ifdef CONFIG_ARC_CURR_IN_REG -register struct task_struct *curr_arc asm("r25"); +register struct task_struct *curr_arc asm("gp"); #define current (curr_arc) #else diff --git a/arch/arc/include/asm/entry-arcv2.h b/arch/arc/include/asm/entry-arcv2.h index 0ff4c0610561..858742feab71 100644 --- a/arch/arc/include/asm/entry-arcv2.h +++ b/arch/arc/include/asm/entry-arcv2.h @@ -18,7 +18,6 @@ * | orig_r0 | * | event/ECR | * | bta | - * | user_r25 | * | gp | * | fp | * | sp | @@ -56,7 +55,7 @@ ; hardware does even if CONFIG_ARC_IRQ_NO_AUTOSAVE ; 4. Auto save: (optional) r0-r11, blink, LPE,LPS,LPC, JLI,LDI,EI ; - ; (B) Manually saved some regs: r12,r25,r30, sp,fp,gp, ACCL pair + ; (B) Manually saved some regs: r12,r30, sp,fp,gp, ACCL pair #ifdef CONFIG_ARC_IRQ_NO_AUTOSAVE ; carve pt_regs on stack (case #3), PC/STAT32 already on stack @@ -157,17 +156,17 @@ st r10, [sp, PT_sp] ; SP (pt_regs->sp) -#ifdef CONFIG_ARC_CURR_IN_REG - st r25, [sp, PT_user_r25] - GET_CURR_TASK_ON_CPU r25 -#endif - #ifdef CONFIG_ARC_HAS_ACCL_REGS ST2 r58, r59, PT_r58 #endif /* clobbers r10, r11 registers pair */ DSP_SAVE_REGFILE_IRQ + +#ifdef CONFIG_ARC_CURR_IN_REG + GET_CURR_TASK_ON_CPU gp +#endif + .endm /*------------------------------------------------------------------------*/ @@ -188,10 +187,6 @@ sr r10, [AUX_USER_SP] 1: -#ifdef CONFIG_ARC_CURR_IN_REG - ld r25, [sp, PT_user_r25] -#endif - /* clobbers r10, r11 registers pair */ DSP_RESTORE_REGFILE_IRQ diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h index 67ff06e15cea..e3383e1cb040 100644 --- a/arch/arc/include/asm/entry-compact.h +++ b/arch/arc/include/asm/entry-compact.h @@ -151,14 +151,6 @@ /* ARC700 doesn't provide auto-stack switching */ SWITCH_TO_KERNEL_STK -#ifdef CONFIG_ARC_CURR_IN_REG - /* Treat r25 as scratch reg (save on stack) and load with "current" */ - PUSH r25 - GET_CURR_TASK_ON_CPU r25 -#else - sub sp, sp, 4 -#endif - st.a r0, [sp, -8] /* orig_r0 needed for syscall (skip ECR slot) */ sub sp, sp, 4 /* skip pt_regs->sp, already saved above */ @@ -179,6 +171,11 @@ lr r10, [ecr] st r10, [sp, PT_event] /* EV_Trap expects r10 to have ECR */ + +#ifdef CONFIG_ARC_CURR_IN_REG + /* gp already saved on stack: now load with "current" */ + GET_CURR_TASK_ON_CPU gp +#endif .endm /*-------------------------------------------------------------- @@ -208,11 +205,8 @@ POP gp RESTORE_R12_TO_R0 -#ifdef CONFIG_ARC_CURR_IN_REG - ld r25, [sp, 12] -#endif ld sp, [sp] /* restore original sp */ - /* orig_r0, ECR, user_r25 skipped automatically */ + /* orig_r0, ECR skipped automatically */ .endm /* Dummy ECR values for Interrupts */ @@ -229,13 +223,6 @@ SWITCH_TO_KERNEL_STK -#ifdef CONFIG_ARC_CURR_IN_REG - /* Treat r25 as scratch reg (save on stack) and load with "current" */ - PUSH r25 - GET_CURR_TASK_ON_CPU r25 -#else - sub sp, sp, 4 -#endif PUSH 0x003\LVL\()abcd /* Dummy ECR */ sub sp, sp, 8 /* skip orig_r0 (not needed) @@ -255,6 +242,10 @@ PUSHAX lp_start PUSHAX bta_l\LVL\() +#ifdef CONFIG_ARC_CURR_IN_REG + /* gp already saved on stack: now load with "current" */ + GET_CURR_TASK_ON_CPU gp +#endif .endm /*-------------------------------------------------------------- @@ -282,11 +273,7 @@ POP gp RESTORE_R12_TO_R0 -#ifdef CONFIG_ARC_CURR_IN_REG - ld r25, [sp, 12] -#endif - ld sp, [sp] /* restore original sp */ - /* orig_r0, ECR, user_r25 skipped automatically */ + ld sp, [sp] /* restore original sp; orig_r0, ECR skipped implicitly */ .endm /* Get thread_info of "current" tsk */ diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h index 2980bc9b7653..49c2e090cb5c 100644 --- a/arch/arc/include/asm/entry.h +++ b/arch/arc/include/asm/entry.h @@ -91,7 +91,7 @@ * Helpers to save/restore callee-saved regs: * used by several macros below *-------------------------------------------------------------*/ -.macro SAVE_R13_TO_R24 +.macro SAVE_R13_TO_R25 PUSH r13 PUSH r14 PUSH r15 @@ -104,9 +104,11 @@ PUSH r22 PUSH r23 PUSH r24 + PUSH r25 .endm -.macro RESTORE_R24_TO_R13 +.macro RESTORE_R25_TO_R13 + POP r25 POP r24 POP r23 POP r22 @@ -121,81 +123,31 @@ POP r13 .endm -/*-------------------------------------------------------------- - * Collect User Mode callee regs as struct callee_regs - needed by - * fork/do_signal/unaligned-access-emulation. - * (By default only scratch regs are saved on entry to kernel) - * - * Special handling for r25 if used for caching Task Pointer. - * It would have been saved in task->thread.user_r25 already, but to keep - * the interface same it is copied into regular r25 placeholder in - * struct callee_regs. - *-------------------------------------------------------------*/ +/* + * save user mode callee regs as struct callee_regs + * - needed by fork/do_signal/unaligned-access-emulation. + */ .macro SAVE_CALLEE_SAVED_USER - - mov r12, sp ; save SP as ref to pt_regs - SAVE_R13_TO_R24 - -#ifdef CONFIG_ARC_CURR_IN_REG - ; Retrieve orig r25 and save it with rest of callee_regs - ld r12, [r12, PT_user_r25] - PUSH r12 -#else - PUSH r25 -#endif - + SAVE_R13_TO_R25 .endm -/*-------------------------------------------------------------- - * Save kernel Mode callee regs at the time of Contect Switch. - * - * Special handling for r25 if used for caching Task Pointer. - * Kernel simply skips saving it since it will be loaded with - * incoming task pointer anyways - *-------------------------------------------------------------*/ -.macro SAVE_CALLEE_SAVED_KERNEL - - SAVE_R13_TO_R24 - -#ifdef CONFIG_ARC_CURR_IN_REG - sub sp, sp, 4 -#else - PUSH r25 -#endif -.endm - -/*-------------------------------------------------------------- - * Opposite of SAVE_CALLEE_SAVED_KERNEL - *-------------------------------------------------------------*/ -.macro RESTORE_CALLEE_SAVED_KERNEL - -#ifdef CONFIG_ARC_CURR_IN_REG - add sp, sp, 4 /* skip usual r25 placeholder */ -#else - POP r25 -#endif - RESTORE_R24_TO_R13 -.endm - -/*-------------------------------------------------------------- - * Opposite of SAVE_CALLEE_SAVED_USER - * - * ptrace tracer or unaligned-access fixup might have changed a user mode - * callee reg which is saved back to usual r25 storage location - *-------------------------------------------------------------*/ +/* + * restore user mode callee regs as struct callee_regs + * - could have been changed by ptrace tracer or unaligned-access fixup + */ .macro RESTORE_CALLEE_SAVED_USER + RESTORE_R25_TO_R13 +.endm -#ifdef CONFIG_ARC_CURR_IN_REG - POP r12 -#else - POP r25 -#endif - RESTORE_R24_TO_R13 +/* + * save/restore kernel mode callee regs at the time of context switch + */ +.macro SAVE_CALLEE_SAVED_KERNEL + SAVE_R13_TO_R25 +.endm - ; SP is back to start of pt_regs -#ifdef CONFIG_ARC_CURR_IN_REG - st r12, [sp, PT_user_r25] -#endif +.macro RESTORE_CALLEE_SAVED_KERNEL + RESTORE_R25_TO_R13 .endm /*-------------------------------------------------------------- @@ -231,10 +183,10 @@ #ifdef CONFIG_SMP -/*------------------------------------------------- +/* * Retrieve the current running task on this CPU - * 1. Determine curr CPU id. - * 2. Use it to index into _current_task[ ] + * - loads it from backing _current_task[] (and can't use the + * caching reg for current task */ .macro GET_CURR_TASK_ON_CPU reg GET_CPU_ID \reg @@ -256,7 +208,7 @@ add2 \tmp, @_current_task, \tmp st \tsk, [\tmp] #ifdef CONFIG_ARC_CURR_IN_REG - mov r25, \tsk + mov gp, \tsk #endif .endm @@ -271,21 +223,20 @@ .macro SET_CURR_TASK_ON_CPU tsk, tmp st \tsk, [@_current_task] #ifdef CONFIG_ARC_CURR_IN_REG - mov r25, \tsk + mov gp, \tsk #endif .endm #endif /* SMP / UNI */ -/* ------------------------------------------------------------------ +/* * Get the ptr to some field of Current Task at @off in task struct - * -Uses r25 for Current task ptr if that is enabled + * - Uses current task cached in reg if enabled */ - #ifdef CONFIG_ARC_CURR_IN_REG .macro GET_CURR_TASK_FIELD_PTR off, reg - add \reg, r25, \off + add \reg, gp, \off .endm #else diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h index cf90fcd2a628..e9798f46cdc4 100644 --- a/arch/arc/include/asm/ptrace.h +++ b/arch/arc/include/asm/ptrace.h @@ -52,11 +52,9 @@ struct pt_regs { }; unsigned long event; }; - - unsigned long user_r25; }; -#define MAX_REG_OFFSET offsetof(struct pt_regs, user_r25) +#define MAX_REG_OFFSET offsetof(struct pt_regs, event) #else @@ -79,8 +77,6 @@ struct pt_regs { unsigned long bta; /* bta_l1, bta_l2, erbta */ - unsigned long user_r25; - unsigned long r26; /* gp */ unsigned long fp; unsigned long sp; /* user/kernel sp depending on where we came from */ diff --git a/arch/arc/kernel/asm-offsets.c b/arch/arc/kernel/asm-offsets.c index 0e884036ab74..37324fd9a72f 100644 --- a/arch/arc/kernel/asm-offsets.c +++ b/arch/arc/kernel/asm-offsets.c @@ -63,8 +63,6 @@ int main(void) DEFINE(PT_blink, offsetof(struct pt_regs, blink)); DEFINE(PT_lpe, offsetof(struct pt_regs, lp_end)); DEFINE(PT_lpc, offsetof(struct pt_regs, lp_count)); - DEFINE(PT_user_r25, offsetof(struct pt_regs, user_r25)); - DEFINE(SZ_CALLEE_REGS, sizeof(struct callee_regs)); DEFINE(SZ_PT_REGS, sizeof(struct pt_regs)); diff --git a/arch/arc/kernel/ctx_sw.c b/arch/arc/kernel/ctx_sw.c index bf16f777a0bc..40d89440b7e4 100644 --- a/arch/arc/kernel/ctx_sw.c +++ b/arch/arc/kernel/ctx_sw.c @@ -38,11 +38,7 @@ __switch_to(struct task_struct *prev_task, struct task_struct *next_task) "st.a r22, [sp, -4] \n\t" "st.a r23, [sp, -4] \n\t" "st.a r24, [sp, -4] \n\t" -#ifndef CONFIG_ARC_CURR_IN_REG "st.a r25, [sp, -4] \n\t" -#else - "sub sp, sp, 4 \n\t" /* usual r25 placeholder */ -#endif /* set ksp of outgoing task in tsk->thread.ksp */ #if KSP_WORD_OFF <= 255 @@ -58,7 +54,7 @@ __switch_to(struct task_struct *prev_task, struct task_struct *next_task) /* * setup _current_task with incoming tsk. - * optionally, set r25 to that as well + * optionally, set caching reg to that as well * For SMP extra work to get to &_current_task[cpu] * (open coded SET_CURR_TASK_ON_CPU) */ @@ -72,19 +68,14 @@ __switch_to(struct task_struct *prev_task, struct task_struct *next_task) "st %2, [r24] \n\t" #endif #ifdef CONFIG_ARC_CURR_IN_REG - "mov r25, %2 \n\t" + "mov gp, %2 \n\t" #endif /* get ksp of incoming task from tsk->thread.ksp */ "ld.as sp, [%2, %1] \n\t" /* start loading it's CALLEE reg file */ - -#ifndef CONFIG_ARC_CURR_IN_REG "ld.ab r25, [sp, 4] \n\t" -#else - "add sp, sp, 4 \n\t" -#endif "ld.ab r24, [sp, 4] \n\t" "ld.ab r23, [sp, 4] \n\t" "ld.ab r22, [sp, 4] \n\t" diff --git a/arch/arc/kernel/ctx_sw_asm.S b/arch/arc/kernel/ctx_sw_asm.S index 02c461484761..95cba6265e2b 100644 --- a/arch/arc/kernel/ctx_sw_asm.S +++ b/arch/arc/kernel/ctx_sw_asm.S @@ -49,7 +49,7 @@ __switch_to: SET_CURR_TASK_ON_CPU r1, r3 /* reload SP with kernel mode stack pointer in task->thread.ksp */ - ld.as sp, [r1, (TASK_THREAD + THREAD_KSP)/4] + ld.as sp, [r1, KSP_WORD_OFF] /* restore the registers */ RESTORE_CALLEE_SAVED_KERNEL diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index 54e91df678dd..cd26e0fa5044 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -210,7 +210,6 @@ trap_with_param: ; Save callee regs in case gdb wants to have a look ; SP will grow up by size of CALLEE Reg-File - ; NOTE: clobbers r12 SAVE_CALLEE_SAVED_USER ; save location of saved Callee Regs @ thread_struct->pc @@ -318,7 +317,7 @@ resume_user_mode_begin: ; tracer might call PEEKUSR(CALLEE reg) ; ; NOTE: SP will grow up by size of CALLEE Reg-File - SAVE_CALLEE_SAVED_USER ; clobbers r12 + SAVE_CALLEE_SAVED_USER ; save location of saved Callee Regs @ thread_struct->callee GET_CURR_TASK_FIELD_PTR TASK_THREAD, r10 diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index 980b71da2f61..96f591508142 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -162,7 +162,6 @@ asmlinkage void ret_from_fork(void); * | SP | * | orig_r0 | * | event/ECR | - * | user_r25 | * ------------------ <===== END of PAGE */ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) @@ -243,16 +242,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) */ c_callee->r25 = task_thread_info(p)->thr_ptr; -#ifdef CONFIG_ARC_CURR_IN_REG - /* - * setup usermode thread pointer #2: - * however for this special use of r25 in kernel, __switch_to() sets - * r25 for kernel needs and only in the final return path is usermode - * r25 setup, from pt_regs->user_r25. So set that up as well - */ - c_regs->user_r25 = c_callee->r25; -#endif - return 0; } diff --git a/arch/arc/kernel/ptrace.c b/arch/arc/kernel/ptrace.c index 2abdcd9b09e8..8226df3163fe 100644 --- a/arch/arc/kernel/ptrace.c +++ b/arch/arc/kernel/ptrace.c @@ -47,7 +47,6 @@ static const struct pt_regs_offset regoffset_table[] = { REG_OFFSET_NAME(sp), REG_OFFSET_NAME(orig_r0), REG_OFFSET_NAME(event), - REG_OFFSET_NAME(user_r25), REG_OFFSET_END, }; @@ -57,7 +56,6 @@ static const struct pt_regs_offset regoffset_table[] = { REG_OFFSET_NAME(orig_r0), REG_OFFSET_NAME(event), REG_OFFSET_NAME(bta), - REG_OFFSET_NAME(user_r25), REG_OFFSET_NAME(r26), REG_OFFSET_NAME(fp), REG_OFFSET_NAME(sp),