x86: inline the 'rep movs' in user copies for the FSRM case
This does the same thing for the user copies as commit 0db7058e8e
("x86/clear_user: Make it faster") did for clear_user(). In other
words, it inlines the "rep movs" case when X86_FEATURE_FSRM is set,
avoiding the function call entirely.
In order to do that, it makes the calling convention for the out-of-line
case ("copy_user_generic_unrolled") match the 'rep movs' calling
convention, although it does also end up clobbering a number of
additional registers.
Also, to simplify code sharing in the low-level assembly with the
__copy_user_nocache() function (that uses the normal C calling
convention), we end up with a kind of mixed return value for the
low-level asm code: it will return the result in both %rcx (to work as
an alternative for the 'rep movs' case), _and_ in %rax (for the nocache
case).
We could avoid this by wrapping __copy_user_nocache() callers in an
inline asm, but since the cost is just an extra register copy, it's
probably not worth it.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
3639a53558
commit
577e6a7fd5
3 changed files with 31 additions and 48 deletions
|
@ -18,29 +18,26 @@
|
||||||
|
|
||||||
/* Handles exceptions in both to and from, but doesn't do access_ok */
|
/* Handles exceptions in both to and from, but doesn't do access_ok */
|
||||||
__must_check unsigned long
|
__must_check unsigned long
|
||||||
copy_user_fast_string(void *to, const void *from, unsigned len);
|
|
||||||
__must_check unsigned long
|
|
||||||
copy_user_generic_unrolled(void *to, const void *from, unsigned len);
|
copy_user_generic_unrolled(void *to, const void *from, unsigned len);
|
||||||
|
|
||||||
static __always_inline __must_check unsigned long
|
static __always_inline __must_check unsigned long
|
||||||
copy_user_generic(void *to, const void *from, unsigned len)
|
copy_user_generic(void *to, const void *from, unsigned long len)
|
||||||
{
|
{
|
||||||
unsigned ret;
|
|
||||||
|
|
||||||
stac();
|
stac();
|
||||||
/*
|
/*
|
||||||
* If CPU has FSRM feature, use 'rep movs'.
|
* If CPU has FSRM feature, use 'rep movs'.
|
||||||
* Otherwise, use copy_user_generic_unrolled.
|
* Otherwise, use copy_user_generic_unrolled.
|
||||||
*/
|
*/
|
||||||
alternative_call(copy_user_generic_unrolled,
|
asm volatile(
|
||||||
copy_user_fast_string,
|
"1:\n\t"
|
||||||
X86_FEATURE_FSRM,
|
ALTERNATIVE("rep movsb",
|
||||||
ASM_OUTPUT2("=a" (ret), "=D" (to), "=S" (from),
|
"call copy_user_generic_unrolled", ALT_NOT(X86_FEATURE_FSRM))
|
||||||
"=d" (len)),
|
"2:\n"
|
||||||
"1" (to), "2" (from), "3" (len)
|
_ASM_EXTABLE_UA(1b, 2b)
|
||||||
: "memory", "rcx", "r8", "r9", "r10", "r11");
|
:"+c" (len), "+D" (to), "+S" (from), ASM_CALL_CONSTRAINT
|
||||||
|
: : "memory", "rax", "rdx", "r8", "r9", "r10", "r11");
|
||||||
clac();
|
clac();
|
||||||
return ret;
|
return len;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __always_inline __must_check unsigned long
|
static __always_inline __must_check unsigned long
|
||||||
|
|
|
@ -45,13 +45,29 @@
|
||||||
* Input:
|
* Input:
|
||||||
* rdi destination
|
* rdi destination
|
||||||
* rsi source
|
* rsi source
|
||||||
* rdx count
|
* rcx count
|
||||||
*
|
*
|
||||||
* Output:
|
* Output:
|
||||||
* eax uncopied bytes or 0 if successful.
|
* rcx uncopied bytes or 0 if successful.
|
||||||
|
*
|
||||||
|
* NOTE! The calling convention is very intentionally the same as
|
||||||
|
* for 'rep movs', so that we can rewrite the function call with
|
||||||
|
* just a plain 'rep movs' on machines that have FSRM.
|
||||||
|
*
|
||||||
|
* HOWEVER! This function ends up having a lot of the code common
|
||||||
|
* with __copy_user_nocache(), which is a normal C function, and
|
||||||
|
* has a similar calling convention, but gets the 'count' in %rdx,
|
||||||
|
* and returns the result in %rax.
|
||||||
|
*
|
||||||
|
* To share as much code as possible, we end up returning the
|
||||||
|
* result in *both* %rcx/%rax, and we also move the initial count
|
||||||
|
* into %rdx.
|
||||||
|
*
|
||||||
|
* We can clobber rdx/rsi/rdi and r8-r11
|
||||||
*/
|
*/
|
||||||
SYM_FUNC_START(copy_user_generic_unrolled)
|
SYM_FUNC_START(copy_user_generic_unrolled)
|
||||||
cmpl $8,%edx
|
movl %ecx,%edx
|
||||||
|
cmpl $8,%ecx
|
||||||
jb .Lcopy_user_short_string_bytes
|
jb .Lcopy_user_short_string_bytes
|
||||||
ALIGN_DESTINATION
|
ALIGN_DESTINATION
|
||||||
movl %edx,%ecx
|
movl %edx,%ecx
|
||||||
|
@ -103,37 +119,6 @@ SYM_FUNC_START(copy_user_generic_unrolled)
|
||||||
SYM_FUNC_END(copy_user_generic_unrolled)
|
SYM_FUNC_END(copy_user_generic_unrolled)
|
||||||
EXPORT_SYMBOL(copy_user_generic_unrolled)
|
EXPORT_SYMBOL(copy_user_generic_unrolled)
|
||||||
|
|
||||||
/*
|
|
||||||
* Some CPUs support FSRM for Fast Short REP MOVS.
|
|
||||||
*
|
|
||||||
* Only 4GB of copy is supported. This shouldn't be a problem
|
|
||||||
* because the kernel normally only writes from/to page sized chunks
|
|
||||||
* even if user space passed a longer buffer.
|
|
||||||
* And more would be dangerous because both Intel and AMD have
|
|
||||||
* errata with rep movsq > 4GB. If someone feels the need to fix
|
|
||||||
* this please consider this.
|
|
||||||
*
|
|
||||||
* Input:
|
|
||||||
* rdi destination
|
|
||||||
* rsi source
|
|
||||||
* rdx count
|
|
||||||
*
|
|
||||||
* Output:
|
|
||||||
* eax uncopied bytes or 0 if successful.
|
|
||||||
*/
|
|
||||||
SYM_FUNC_START(copy_user_fast_string)
|
|
||||||
movl %edx,%ecx
|
|
||||||
1: rep movsb
|
|
||||||
xorl %eax,%eax
|
|
||||||
RET
|
|
||||||
|
|
||||||
12: movl %ecx,%eax /* ecx is zerorest also */
|
|
||||||
RET
|
|
||||||
|
|
||||||
_ASM_EXTABLE_CPY(1b, 12b)
|
|
||||||
SYM_FUNC_END(copy_user_fast_string)
|
|
||||||
EXPORT_SYMBOL(copy_user_fast_string)
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Try to copy last bytes and clear the rest if needed.
|
* Try to copy last bytes and clear the rest if needed.
|
||||||
* Since protection fault in copy_from/to_user is not a normal situation,
|
* Since protection fault in copy_from/to_user is not a normal situation,
|
||||||
|
@ -160,6 +145,7 @@ SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
|
||||||
|
|
||||||
3:
|
3:
|
||||||
movl %edx,%eax
|
movl %edx,%eax
|
||||||
|
movl %edx,%ecx
|
||||||
RET
|
RET
|
||||||
|
|
||||||
_ASM_EXTABLE_CPY(1b, 2b)
|
_ASM_EXTABLE_CPY(1b, 2b)
|
||||||
|
@ -203,6 +189,7 @@ SYM_CODE_START_LOCAL(copy_user_short_string)
|
||||||
decl %ecx
|
decl %ecx
|
||||||
jnz 21b
|
jnz 21b
|
||||||
23: xor %eax,%eax
|
23: xor %eax,%eax
|
||||||
|
xor %ecx,%ecx
|
||||||
RET
|
RET
|
||||||
|
|
||||||
40: leal (%rdx,%rcx,8),%edx
|
40: leal (%rdx,%rcx,8),%edx
|
||||||
|
|
|
@ -1286,7 +1286,6 @@ static const char *uaccess_safe_builtin[] = {
|
||||||
"ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */
|
"ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */
|
||||||
"clear_user_original",
|
"clear_user_original",
|
||||||
"copy_user_generic_unrolled",
|
"copy_user_generic_unrolled",
|
||||||
"copy_user_fast_string",
|
|
||||||
"__copy_user_nocache",
|
"__copy_user_nocache",
|
||||||
NULL
|
NULL
|
||||||
};
|
};
|
||||||
|
|
Loading…
Add table
Reference in a new issue