ARM: 9195/1: entry: avoid explicit literal loads
ARMv7 has MOVW/MOVT instruction pairs to load symbol addresses into registers without having to rely on literal loads that go via the D-cache. For older cores, we now support a similar arrangement, based on PC-relative group relocations. This means we can elide most literal loads entirely from the entry path, by switching to the ldr_va macro to emit the appropriate sequence depending on the target architecture revision. While at it, switch to the bl_r macro for invoking the right PABT/DABT helpers instead of setting the LR register explicitly, which does not play well with cores that speculate across function returns. Signed-off-by: Ard Biesheuvel <ardb@kernel.org> Reviewed-by: Linus Walleij <linus.walleij@linaro.org> Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
This commit is contained in:
parent
952f033163
commit
508074607c
4 changed files with 18 additions and 50 deletions
|
@ -666,12 +666,11 @@ THUMB( orr \reg , \reg , #PSR_T_BIT )
|
||||||
__adldst_l str, \src, \sym, \tmp, \cond
|
__adldst_l str, \src, \sym, \tmp, \cond
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
.macro __ldst_va, op, reg, tmp, sym, cond
|
.macro __ldst_va, op, reg, tmp, sym, cond, offset
|
||||||
#if __LINUX_ARM_ARCH__ >= 7 || \
|
#if __LINUX_ARM_ARCH__ >= 7 || \
|
||||||
!defined(CONFIG_ARM_HAS_GROUP_RELOCS) || \
|
!defined(CONFIG_ARM_HAS_GROUP_RELOCS) || \
|
||||||
(defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS))
|
(defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS))
|
||||||
mov_l \tmp, \sym, \cond
|
mov_l \tmp, \sym, \cond
|
||||||
\op\cond \reg, [\tmp]
|
|
||||||
#else
|
#else
|
||||||
/*
|
/*
|
||||||
* Avoid a literal load, by emitting a sequence of ADD/LDR instructions
|
* Avoid a literal load, by emitting a sequence of ADD/LDR instructions
|
||||||
|
@ -683,20 +682,21 @@ THUMB( orr \reg , \reg , #PSR_T_BIT )
|
||||||
.reloc .L0_\@, R_ARM_ALU_PC_G0_NC, \sym
|
.reloc .L0_\@, R_ARM_ALU_PC_G0_NC, \sym
|
||||||
.reloc .L1_\@, R_ARM_ALU_PC_G1_NC, \sym
|
.reloc .L1_\@, R_ARM_ALU_PC_G1_NC, \sym
|
||||||
.reloc .L2_\@, R_ARM_LDR_PC_G2, \sym
|
.reloc .L2_\@, R_ARM_LDR_PC_G2, \sym
|
||||||
.L0_\@: sub\cond \tmp, pc, #8
|
.L0_\@: sub\cond \tmp, pc, #8 - \offset
|
||||||
.L1_\@: sub\cond \tmp, \tmp, #4
|
.L1_\@: sub\cond \tmp, \tmp, #4 - \offset
|
||||||
.L2_\@: \op\cond \reg, [\tmp, #0]
|
.L2_\@:
|
||||||
#endif
|
#endif
|
||||||
|
\op\cond \reg, [\tmp, #\offset]
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ldr_va - load a 32-bit word from the virtual address of \sym
|
* ldr_va - load a 32-bit word from the virtual address of \sym
|
||||||
*/
|
*/
|
||||||
.macro ldr_va, rd:req, sym:req, cond, tmp
|
.macro ldr_va, rd:req, sym:req, cond, tmp, offset=0
|
||||||
.ifnb \tmp
|
.ifnb \tmp
|
||||||
__ldst_va ldr, \rd, \tmp, \sym, \cond
|
__ldst_va ldr, \rd, \tmp, \sym, \cond, \offset
|
||||||
.else
|
.else
|
||||||
__ldst_va ldr, \rd, \rd, \sym, \cond
|
__ldst_va ldr, \rd, \rd, \sym, \cond, \offset
|
||||||
.endif
|
.endif
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
|
@ -704,7 +704,7 @@ THUMB( orr \reg , \reg , #PSR_T_BIT )
|
||||||
* str_va - store a 32-bit word to the virtual address of \sym
|
* str_va - store a 32-bit word to the virtual address of \sym
|
||||||
*/
|
*/
|
||||||
.macro str_va, rn:req, sym:req, tmp:req, cond
|
.macro str_va, rn:req, sym:req, tmp:req, cond
|
||||||
__ldst_va str, \rn, \tmp, \sym, \cond
|
__ldst_va str, \rn, \tmp, \sym, \cond, 0
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -61,9 +61,8 @@
|
||||||
.macro pabt_helper
|
.macro pabt_helper
|
||||||
@ PABORT handler takes pt_regs in r2, fault address in r4 and psr in r5
|
@ PABORT handler takes pt_regs in r2, fault address in r4 and psr in r5
|
||||||
#ifdef MULTI_PABORT
|
#ifdef MULTI_PABORT
|
||||||
ldr ip, .LCprocfns
|
ldr_va ip, processor, offset=PROCESSOR_PABT_FUNC
|
||||||
mov lr, pc
|
bl_r ip
|
||||||
ldr pc, [ip, #PROCESSOR_PABT_FUNC]
|
|
||||||
#else
|
#else
|
||||||
bl CPU_PABORT_HANDLER
|
bl CPU_PABORT_HANDLER
|
||||||
#endif
|
#endif
|
||||||
|
@ -82,9 +81,8 @@
|
||||||
@ the fault status register in r1. r9 must be preserved.
|
@ the fault status register in r1. r9 must be preserved.
|
||||||
@
|
@
|
||||||
#ifdef MULTI_DABORT
|
#ifdef MULTI_DABORT
|
||||||
ldr ip, .LCprocfns
|
ldr_va ip, processor, offset=PROCESSOR_DABT_FUNC
|
||||||
mov lr, pc
|
bl_r ip
|
||||||
ldr pc, [ip, #PROCESSOR_DABT_FUNC]
|
|
||||||
#else
|
#else
|
||||||
bl CPU_DABORT_HANDLER
|
bl CPU_DABORT_HANDLER
|
||||||
#endif
|
#endif
|
||||||
|
@ -302,16 +300,6 @@ __fiq_svc:
|
||||||
UNWIND(.fnend )
|
UNWIND(.fnend )
|
||||||
ENDPROC(__fiq_svc)
|
ENDPROC(__fiq_svc)
|
||||||
|
|
||||||
.align 5
|
|
||||||
.LCcralign:
|
|
||||||
.word cr_alignment
|
|
||||||
#ifdef MULTI_DABORT
|
|
||||||
.LCprocfns:
|
|
||||||
.word processor
|
|
||||||
#endif
|
|
||||||
.LCfp:
|
|
||||||
.word fp_enter
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Abort mode handlers
|
* Abort mode handlers
|
||||||
*/
|
*/
|
||||||
|
@ -370,7 +358,7 @@ ENDPROC(__fiq_abt)
|
||||||
THUMB( stmia sp, {r0 - r12} )
|
THUMB( stmia sp, {r0 - r12} )
|
||||||
|
|
||||||
ATRAP( mrc p15, 0, r7, c1, c0, 0)
|
ATRAP( mrc p15, 0, r7, c1, c0, 0)
|
||||||
ATRAP( ldr r8, .LCcralign)
|
ATRAP( ldr_va r8, cr_alignment)
|
||||||
|
|
||||||
ldmia r0, {r3 - r5}
|
ldmia r0, {r3 - r5}
|
||||||
add r0, sp, #S_PC @ here for interlock avoidance
|
add r0, sp, #S_PC @ here for interlock avoidance
|
||||||
|
@ -379,8 +367,6 @@ ENDPROC(__fiq_abt)
|
||||||
str r3, [sp] @ save the "real" r0 copied
|
str r3, [sp] @ save the "real" r0 copied
|
||||||
@ from the exception stack
|
@ from the exception stack
|
||||||
|
|
||||||
ATRAP( ldr r8, [r8, #0])
|
|
||||||
|
|
||||||
@
|
@
|
||||||
@ We are now ready to fill in the remaining blanks on the stack:
|
@ We are now ready to fill in the remaining blanks on the stack:
|
||||||
@
|
@
|
||||||
|
@ -505,9 +491,7 @@ __und_usr_thumb:
|
||||||
*/
|
*/
|
||||||
#if __LINUX_ARM_ARCH__ < 7
|
#if __LINUX_ARM_ARCH__ < 7
|
||||||
/* If the target CPU may not be Thumb-2-capable, a run-time check is needed: */
|
/* If the target CPU may not be Thumb-2-capable, a run-time check is needed: */
|
||||||
#define NEED_CPU_ARCHITECTURE
|
ldr_va r5, cpu_architecture
|
||||||
ldr r5, .LCcpu_architecture
|
|
||||||
ldr r5, [r5]
|
|
||||||
cmp r5, #CPU_ARCH_ARMv7
|
cmp r5, #CPU_ARCH_ARMv7
|
||||||
blo __und_usr_fault_16 @ 16bit undefined instruction
|
blo __und_usr_fault_16 @ 16bit undefined instruction
|
||||||
/*
|
/*
|
||||||
|
@ -654,12 +638,6 @@ call_fpe:
|
||||||
ret.w lr @ CP#14 (Debug)
|
ret.w lr @ CP#14 (Debug)
|
||||||
ret.w lr @ CP#15 (Control)
|
ret.w lr @ CP#15 (Control)
|
||||||
|
|
||||||
#ifdef NEED_CPU_ARCHITECTURE
|
|
||||||
.align 2
|
|
||||||
.LCcpu_architecture:
|
|
||||||
.word __cpu_architecture
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef CONFIG_NEON
|
#ifdef CONFIG_NEON
|
||||||
.align 6
|
.align 6
|
||||||
|
|
||||||
|
@ -685,9 +663,8 @@ call_fpe:
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
do_fpe:
|
do_fpe:
|
||||||
ldr r4, .LCfp
|
|
||||||
add r10, r10, #TI_FPSTATE @ r10 = workspace
|
add r10, r10, #TI_FPSTATE @ r10 = workspace
|
||||||
ldr pc, [r4] @ Call FP module USR entry point
|
ldr_va pc, fp_enter, tmp=r4 @ Call FP module USR entry point
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The FP module is called with these registers set:
|
* The FP module is called with these registers set:
|
||||||
|
|
|
@ -198,7 +198,7 @@ ENTRY(vector_swi)
|
||||||
#endif
|
#endif
|
||||||
reload_current r10, ip
|
reload_current r10, ip
|
||||||
zero_fp
|
zero_fp
|
||||||
alignment_trap r10, ip, __cr_alignment
|
alignment_trap r10, ip, cr_alignment
|
||||||
asm_trace_hardirqs_on save=0
|
asm_trace_hardirqs_on save=0
|
||||||
enable_irq_notrace
|
enable_irq_notrace
|
||||||
ct_user_exit save=0
|
ct_user_exit save=0
|
||||||
|
@ -328,14 +328,6 @@ __sys_trace_return:
|
||||||
bl syscall_trace_exit
|
bl syscall_trace_exit
|
||||||
b ret_slow_syscall
|
b ret_slow_syscall
|
||||||
|
|
||||||
.align 5
|
|
||||||
#ifdef CONFIG_ALIGNMENT_TRAP
|
|
||||||
.type __cr_alignment, #object
|
|
||||||
__cr_alignment:
|
|
||||||
.word cr_alignment
|
|
||||||
#endif
|
|
||||||
.ltorg
|
|
||||||
|
|
||||||
.macro syscall_table_start, sym
|
.macro syscall_table_start, sym
|
||||||
.equ __sys_nr, 0
|
.equ __sys_nr, 0
|
||||||
.type \sym, #object
|
.type \sym, #object
|
||||||
|
|
|
@ -48,8 +48,7 @@
|
||||||
.macro alignment_trap, rtmp1, rtmp2, label
|
.macro alignment_trap, rtmp1, rtmp2, label
|
||||||
#ifdef CONFIG_ALIGNMENT_TRAP
|
#ifdef CONFIG_ALIGNMENT_TRAP
|
||||||
mrc p15, 0, \rtmp2, c1, c0, 0
|
mrc p15, 0, \rtmp2, c1, c0, 0
|
||||||
ldr \rtmp1, \label
|
ldr_va \rtmp1, \label
|
||||||
ldr \rtmp1, [\rtmp1]
|
|
||||||
teq \rtmp1, \rtmp2
|
teq \rtmp1, \rtmp2
|
||||||
mcrne p15, 0, \rtmp1, c1, c0, 0
|
mcrne p15, 0, \rtmp1, c1, c0, 0
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Add table
Reference in a new issue