- A fix for fanotify_mark() missing the conversion of x86_32 native
syscalls which take 64-bit arguments to the compat handlers due to former having a general compat handler. (Brian Gerst) - Add a forgotten pmd page destructor call to pud_free_pmd_page() where a pmd page is freed. (Dan Williams) - Make IN/OUT insns with an u8 immediate port operand handling for SEV-ES guests more precise by using only the single port byte and not the whole s32 value of the insn decoder. (Peter Gonda) - Correct a straddling end range check before returning the proper MTRR type, when the end address is the same as top of memory. (Ying-Tsun Huang) - Change PQR_ASSOC MSR update scheme when moving a task to a resctrl resource group to avoid significant performance overhead with some resctrl workloads. (Fenghua Yu) - Avoid the actual task move overhead when the task is already in the resource group. (Fenghua Yu) -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAl/61xYACgkQEsHwGGHe VUrcvQ//dAWAteCC/BXVHpgcWrBOgPrkwv7aAo70bIO50fUj4pHPYbfhOJU1ey7j 5o4FrqdsOVhGfZjQzvT/juLsr9mQHsfszxKpDTLyK3wVtUtIODYXzgiXRc/qfZDO ozXCVUsUSKJgrIcKTBQbmugK36iZZk+ER+qzUaqd0aq8mocdtSSO8b14uaRJw3MR vumqmEmEEcyM9XK0UgTLPcf6Uhu+Mlg3YSNkV5Qhu0yiCTJaqeEySsytUcRsnnF/ z8AkxZP03Q65o3aoRoSGZihHNKTkNucbavYp70LkcqopoHlC+XERvya9ANRibLPi /+s9GQUm4QPg7XRHLB8dXFZ9RY3YGUeE60BUxVZa4vI3pwciPQD5tbvUF3F/jEN0 PYLy/zVlAkDfI6Z8wTl8DNmd8nd/rE0F4p5zayjpQUWsjjfZDrh+GzBl/YsMuYRp G8dk3tEUc8KREBEccv/YzuVcE0AhX4t1tkn3l2Le5v+4PbwRWBm2uNOiRfn4OM31 iB4E4yCHBnBhTyBA0TkWuHV1TJX6Tb2+0g+D49ZoMGFVoBd8NL6f+dBr0psjX/U+ RsZucit0FcJG2VhJNXEPD+rwNZ6XPfDmIU9GNTAmXUuoKR/kqT8D/NWYkqmKh/Vw +F2EIgOZVhQVOvLKWRut+4qmQRStm6B3UBJimEDySUJPT72O+dU= =2/Eq -----END PGP SIGNATURE----- Merge tag 'x86_urgent_for_v5.11_rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86 fixes from Borislav Petkov: "As expected, fixes started trickling in after the holidays so here is the accumulated pile of x86 fixes for 5.11: - A fix for fanotify_mark() missing the conversion of x86_32 native syscalls which take 64-bit arguments to the compat handlers due to former having a general compat handler. (Brian Gerst) - Add a forgotten pmd page destructor call to pud_free_pmd_page() where a pmd page is freed. (Dan Williams) - Make IN/OUT insns with an u8 immediate port operand handling for SEV-ES guests more precise by using only the single port byte and not the whole s32 value of the insn decoder. (Peter Gonda) - Correct a straddling end range check before returning the proper MTRR type, when the end address is the same as top of memory. (Ying-Tsun Huang) - Change PQR_ASSOC MSR update scheme when moving a task to a resctrl resource group to avoid significant performance overhead with some resctrl workloads. (Fenghua Yu) - Avoid the actual task move overhead when the task is already in the resource group. (Fenghua Yu)" * tag 'x86_urgent_for_v5.11_rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/resctrl: Don't move a task to the same resource group x86/resctrl: Use an IPI instead of task_work_add() to update PQR_ASSOC MSR x86/mtrr: Correct the range check before performing MTRR type lookups x86/sev-es: Fix SEV-ES OUT/IN immediate opcode vc handling x86/mm: Fix leak of pmd ptlock fanotify: Fix sys_fanotify_mark() on native x86-32
This commit is contained in:
commit
a440e4d761
8 changed files with 94 additions and 83 deletions
|
@ -1105,6 +1105,12 @@ config HAVE_ARCH_PFN_VALID
|
||||||
config ARCH_SUPPORTS_DEBUG_PAGEALLOC
|
config ARCH_SUPPORTS_DEBUG_PAGEALLOC
|
||||||
bool
|
bool
|
||||||
|
|
||||||
|
config ARCH_SPLIT_ARG64
|
||||||
|
bool
|
||||||
|
help
|
||||||
|
If a 32-bit architecture requires 64-bit arguments to be split into
|
||||||
|
pairs of 32-bit arguments, select this option.
|
||||||
|
|
||||||
source "kernel/gcov/Kconfig"
|
source "kernel/gcov/Kconfig"
|
||||||
|
|
||||||
source "scripts/gcc-plugins/Kconfig"
|
source "scripts/gcc-plugins/Kconfig"
|
||||||
|
|
|
@ -19,6 +19,7 @@ config X86_32
|
||||||
select KMAP_LOCAL
|
select KMAP_LOCAL
|
||||||
select MODULES_USE_ELF_REL
|
select MODULES_USE_ELF_REL
|
||||||
select OLD_SIGACTION
|
select OLD_SIGACTION
|
||||||
|
select ARCH_SPLIT_ARG64
|
||||||
|
|
||||||
config X86_64
|
config X86_64
|
||||||
def_bool y
|
def_bool y
|
||||||
|
|
|
@ -167,9 +167,6 @@ static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end,
|
||||||
*repeat = 0;
|
*repeat = 0;
|
||||||
*uniform = 1;
|
*uniform = 1;
|
||||||
|
|
||||||
/* Make end inclusive instead of exclusive */
|
|
||||||
end--;
|
|
||||||
|
|
||||||
prev_match = MTRR_TYPE_INVALID;
|
prev_match = MTRR_TYPE_INVALID;
|
||||||
for (i = 0; i < num_var_ranges; ++i) {
|
for (i = 0; i < num_var_ranges; ++i) {
|
||||||
unsigned short start_state, end_state, inclusive;
|
unsigned short start_state, end_state, inclusive;
|
||||||
|
@ -261,6 +258,9 @@ u8 mtrr_type_lookup(u64 start, u64 end, u8 *uniform)
|
||||||
int repeat;
|
int repeat;
|
||||||
u64 partial_end;
|
u64 partial_end;
|
||||||
|
|
||||||
|
/* Make end inclusive instead of exclusive */
|
||||||
|
end--;
|
||||||
|
|
||||||
if (!mtrr_state_set)
|
if (!mtrr_state_set)
|
||||||
return MTRR_TYPE_INVALID;
|
return MTRR_TYPE_INVALID;
|
||||||
|
|
||||||
|
|
|
@ -525,89 +525,70 @@ static void rdtgroup_remove(struct rdtgroup *rdtgrp)
|
||||||
kfree(rdtgrp);
|
kfree(rdtgrp);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct task_move_callback {
|
static void _update_task_closid_rmid(void *task)
|
||||||
struct callback_head work;
|
|
||||||
struct rdtgroup *rdtgrp;
|
|
||||||
};
|
|
||||||
|
|
||||||
static void move_myself(struct callback_head *head)
|
|
||||||
{
|
{
|
||||||
struct task_move_callback *callback;
|
|
||||||
struct rdtgroup *rdtgrp;
|
|
||||||
|
|
||||||
callback = container_of(head, struct task_move_callback, work);
|
|
||||||
rdtgrp = callback->rdtgrp;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If resource group was deleted before this task work callback
|
* If the task is still current on this CPU, update PQR_ASSOC MSR.
|
||||||
* was invoked, then assign the task to root group and free the
|
* Otherwise, the MSR is updated when the task is scheduled in.
|
||||||
* resource group.
|
|
||||||
*/
|
*/
|
||||||
if (atomic_dec_and_test(&rdtgrp->waitcount) &&
|
if (task == current)
|
||||||
(rdtgrp->flags & RDT_DELETED)) {
|
resctrl_sched_in();
|
||||||
current->closid = 0;
|
}
|
||||||
current->rmid = 0;
|
|
||||||
rdtgroup_remove(rdtgrp);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (unlikely(current->flags & PF_EXITING))
|
static void update_task_closid_rmid(struct task_struct *t)
|
||||||
goto out;
|
{
|
||||||
|
if (IS_ENABLED(CONFIG_SMP) && task_curr(t))
|
||||||
preempt_disable();
|
smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1);
|
||||||
/* update PQR_ASSOC MSR to make resource group go into effect */
|
else
|
||||||
resctrl_sched_in();
|
_update_task_closid_rmid(t);
|
||||||
preempt_enable();
|
|
||||||
|
|
||||||
out:
|
|
||||||
kfree(callback);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __rdtgroup_move_task(struct task_struct *tsk,
|
static int __rdtgroup_move_task(struct task_struct *tsk,
|
||||||
struct rdtgroup *rdtgrp)
|
struct rdtgroup *rdtgrp)
|
||||||
{
|
{
|
||||||
struct task_move_callback *callback;
|
/* If the task is already in rdtgrp, no need to move the task. */
|
||||||
int ret;
|
if ((rdtgrp->type == RDTCTRL_GROUP && tsk->closid == rdtgrp->closid &&
|
||||||
|
tsk->rmid == rdtgrp->mon.rmid) ||
|
||||||
callback = kzalloc(sizeof(*callback), GFP_KERNEL);
|
(rdtgrp->type == RDTMON_GROUP && tsk->rmid == rdtgrp->mon.rmid &&
|
||||||
if (!callback)
|
tsk->closid == rdtgrp->mon.parent->closid))
|
||||||
return -ENOMEM;
|
return 0;
|
||||||
callback->work.func = move_myself;
|
|
||||||
callback->rdtgrp = rdtgrp;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Take a refcount, so rdtgrp cannot be freed before the
|
* Set the task's closid/rmid before the PQR_ASSOC MSR can be
|
||||||
* callback has been invoked.
|
* updated by them.
|
||||||
|
*
|
||||||
|
* For ctrl_mon groups, move both closid and rmid.
|
||||||
|
* For monitor groups, can move the tasks only from
|
||||||
|
* their parent CTRL group.
|
||||||
*/
|
*/
|
||||||
atomic_inc(&rdtgrp->waitcount);
|
|
||||||
ret = task_work_add(tsk, &callback->work, TWA_RESUME);
|
if (rdtgrp->type == RDTCTRL_GROUP) {
|
||||||
if (ret) {
|
tsk->closid = rdtgrp->closid;
|
||||||
/*
|
tsk->rmid = rdtgrp->mon.rmid;
|
||||||
* Task is exiting. Drop the refcount and free the callback.
|
} else if (rdtgrp->type == RDTMON_GROUP) {
|
||||||
* No need to check the refcount as the group cannot be
|
if (rdtgrp->mon.parent->closid == tsk->closid) {
|
||||||
* deleted before the write function unlocks rdtgroup_mutex.
|
|
||||||
*/
|
|
||||||
atomic_dec(&rdtgrp->waitcount);
|
|
||||||
kfree(callback);
|
|
||||||
rdt_last_cmd_puts("Task exited\n");
|
|
||||||
} else {
|
|
||||||
/*
|
|
||||||
* For ctrl_mon groups move both closid and rmid.
|
|
||||||
* For monitor groups, can move the tasks only from
|
|
||||||
* their parent CTRL group.
|
|
||||||
*/
|
|
||||||
if (rdtgrp->type == RDTCTRL_GROUP) {
|
|
||||||
tsk->closid = rdtgrp->closid;
|
|
||||||
tsk->rmid = rdtgrp->mon.rmid;
|
tsk->rmid = rdtgrp->mon.rmid;
|
||||||
} else if (rdtgrp->type == RDTMON_GROUP) {
|
} else {
|
||||||
if (rdtgrp->mon.parent->closid == tsk->closid) {
|
rdt_last_cmd_puts("Can't move task to different control group\n");
|
||||||
tsk->rmid = rdtgrp->mon.rmid;
|
return -EINVAL;
|
||||||
} else {
|
|
||||||
rdt_last_cmd_puts("Can't move task to different control group\n");
|
|
||||||
ret = -EINVAL;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return ret;
|
|
||||||
|
/*
|
||||||
|
* Ensure the task's closid and rmid are written before determining if
|
||||||
|
* the task is current that will decide if it will be interrupted.
|
||||||
|
*/
|
||||||
|
barrier();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* By now, the task's closid and rmid are set. If the task is current
|
||||||
|
* on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource
|
||||||
|
* group go into effect. If the task is not current, the MSR will be
|
||||||
|
* updated when the task is scheduled in.
|
||||||
|
*/
|
||||||
|
update_task_closid_rmid(tsk);
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)
|
static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)
|
||||||
|
|
|
@ -305,14 +305,14 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo)
|
||||||
case 0xe4:
|
case 0xe4:
|
||||||
case 0xe5:
|
case 0xe5:
|
||||||
*exitinfo |= IOIO_TYPE_IN;
|
*exitinfo |= IOIO_TYPE_IN;
|
||||||
*exitinfo |= (u64)insn->immediate.value << 16;
|
*exitinfo |= (u8)insn->immediate.value << 16;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* OUT immediate opcodes */
|
/* OUT immediate opcodes */
|
||||||
case 0xe6:
|
case 0xe6:
|
||||||
case 0xe7:
|
case 0xe7:
|
||||||
*exitinfo |= IOIO_TYPE_OUT;
|
*exitinfo |= IOIO_TYPE_OUT;
|
||||||
*exitinfo |= (u64)insn->immediate.value << 16;
|
*exitinfo |= (u8)insn->immediate.value << 16;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* IN register opcodes */
|
/* IN register opcodes */
|
||||||
|
|
|
@ -829,6 +829,8 @@ int pud_free_pmd_page(pud_t *pud, unsigned long addr)
|
||||||
}
|
}
|
||||||
|
|
||||||
free_page((unsigned long)pmd_sv);
|
free_page((unsigned long)pmd_sv);
|
||||||
|
|
||||||
|
pgtable_pmd_page_dtor(virt_to_page(pmd));
|
||||||
free_page((unsigned long)pmd);
|
free_page((unsigned long)pmd);
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
|
|
|
@ -1285,26 +1285,23 @@ fput_and_out:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef CONFIG_ARCH_SPLIT_ARG64
|
||||||
SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags,
|
SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags,
|
||||||
__u64, mask, int, dfd,
|
__u64, mask, int, dfd,
|
||||||
const char __user *, pathname)
|
const char __user *, pathname)
|
||||||
{
|
{
|
||||||
return do_fanotify_mark(fanotify_fd, flags, mask, dfd, pathname);
|
return do_fanotify_mark(fanotify_fd, flags, mask, dfd, pathname);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_COMPAT
|
#if defined(CONFIG_ARCH_SPLIT_ARG64) || defined(CONFIG_COMPAT)
|
||||||
COMPAT_SYSCALL_DEFINE6(fanotify_mark,
|
SYSCALL32_DEFINE6(fanotify_mark,
|
||||||
int, fanotify_fd, unsigned int, flags,
|
int, fanotify_fd, unsigned int, flags,
|
||||||
__u32, mask0, __u32, mask1, int, dfd,
|
SC_ARG64(mask), int, dfd,
|
||||||
const char __user *, pathname)
|
const char __user *, pathname)
|
||||||
{
|
{
|
||||||
return do_fanotify_mark(fanotify_fd, flags,
|
return do_fanotify_mark(fanotify_fd, flags, SC_VAL64(__u64, mask),
|
||||||
#ifdef __BIG_ENDIAN
|
dfd, pathname);
|
||||||
((__u64)mask0 << 32) | mask1,
|
|
||||||
#else
|
|
||||||
((__u64)mask1 << 32) | mask0,
|
|
||||||
#endif
|
|
||||||
dfd, pathname);
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -251,6 +251,30 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event)
|
||||||
static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
|
static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
|
||||||
#endif /* __SYSCALL_DEFINEx */
|
#endif /* __SYSCALL_DEFINEx */
|
||||||
|
|
||||||
|
/* For split 64-bit arguments on 32-bit architectures */
|
||||||
|
#ifdef __LITTLE_ENDIAN
|
||||||
|
#define SC_ARG64(name) u32, name##_lo, u32, name##_hi
|
||||||
|
#else
|
||||||
|
#define SC_ARG64(name) u32, name##_hi, u32, name##_lo
|
||||||
|
#endif
|
||||||
|
#define SC_VAL64(type, name) ((type) name##_hi << 32 | name##_lo)
|
||||||
|
|
||||||
|
#ifdef CONFIG_COMPAT
|
||||||
|
#define SYSCALL32_DEFINE1 COMPAT_SYSCALL_DEFINE1
|
||||||
|
#define SYSCALL32_DEFINE2 COMPAT_SYSCALL_DEFINE2
|
||||||
|
#define SYSCALL32_DEFINE3 COMPAT_SYSCALL_DEFINE3
|
||||||
|
#define SYSCALL32_DEFINE4 COMPAT_SYSCALL_DEFINE4
|
||||||
|
#define SYSCALL32_DEFINE5 COMPAT_SYSCALL_DEFINE5
|
||||||
|
#define SYSCALL32_DEFINE6 COMPAT_SYSCALL_DEFINE6
|
||||||
|
#else
|
||||||
|
#define SYSCALL32_DEFINE1 SYSCALL_DEFINE1
|
||||||
|
#define SYSCALL32_DEFINE2 SYSCALL_DEFINE2
|
||||||
|
#define SYSCALL32_DEFINE3 SYSCALL_DEFINE3
|
||||||
|
#define SYSCALL32_DEFINE4 SYSCALL_DEFINE4
|
||||||
|
#define SYSCALL32_DEFINE5 SYSCALL_DEFINE5
|
||||||
|
#define SYSCALL32_DEFINE6 SYSCALL_DEFINE6
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Called before coming back to user-mode. Returning to user-mode with an
|
* Called before coming back to user-mode. Returning to user-mode with an
|
||||||
* address limit different than USER_DS can allow to overwrite kernel memory.
|
* address limit different than USER_DS can allow to overwrite kernel memory.
|
||||||
|
|
Loading…
Add table
Reference in a new issue