mm: convert mm_lock_seq to a proper seqcount
Convert mm_lock_seq to be seqcount_t and change all mmap_write_lock variants to increment it, in-line with the usual seqcount usage pattern. This lets us check whether the mmap_lock is write-locked by checking mm_lock_seq.sequence counter (odd=locked, even=unlocked). This will be used when implementing mmap_lock speculation functions. As a result vm_lock_seq is also change to be unsigned to match the type of mm_lock_seq.sequence. Link: https://lkml.kernel.org/r/20241122174416.1367052-2-surenb@google.com Suggested-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Suren Baghdasaryan <surenb@google.com> Reviewed-by: Liam R. Howlett <Liam.Howlett@Oracle.com> Cc: Christian Brauner <brauner@kernel.org> Cc: David Hildenbrand <david@redhat.com> Cc: David Howells <dhowells@redhat.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: Hillf Danton <hdanton@sina.com> Cc: Hugh Dickins <hughd@google.com> Cc: Jann Horn <jannh@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Cc: Mateusz Guzik <mjguzik@gmail.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Mel Gorman <mgorman@techsingularity.net> Cc: Michal Hocko <mhocko@suse.com> Cc: Minchan Kim <minchan@google.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Pasha Tatashin <pasha.tatashin@soleen.com> Cc: Paul E. McKenney <paulmck@kernel.org> Cc: Peter Xu <peterx@redhat.com> Cc: Shakeel Butt <shakeel.butt@linux.dev> Cc: Sourav Panda <souravpanda@google.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Wei Yang <richard.weiyang@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
dba4761a3e
commit
e5e7fb278e
7 changed files with 74 additions and 57 deletions
|
@ -711,7 +711,7 @@ static inline bool vma_start_read(struct vm_area_struct *vma)
|
||||||
* we don't rely on for anything - the mm_lock_seq read against which we
|
* we don't rely on for anything - the mm_lock_seq read against which we
|
||||||
* need ordering is below.
|
* need ordering is below.
|
||||||
*/
|
*/
|
||||||
if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(vma->vm_mm->mm_lock_seq))
|
if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(vma->vm_mm->mm_lock_seq.sequence))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (unlikely(down_read_trylock(&vma->vm_lock->lock) == 0))
|
if (unlikely(down_read_trylock(&vma->vm_lock->lock) == 0))
|
||||||
|
@ -728,7 +728,7 @@ static inline bool vma_start_read(struct vm_area_struct *vma)
|
||||||
* after it has been unlocked.
|
* after it has been unlocked.
|
||||||
* This pairs with RELEASE semantics in vma_end_write_all().
|
* This pairs with RELEASE semantics in vma_end_write_all().
|
||||||
*/
|
*/
|
||||||
if (unlikely(vma->vm_lock_seq == smp_load_acquire(&vma->vm_mm->mm_lock_seq))) {
|
if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&vma->vm_mm->mm_lock_seq))) {
|
||||||
up_read(&vma->vm_lock->lock);
|
up_read(&vma->vm_lock->lock);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -743,7 +743,7 @@ static inline void vma_end_read(struct vm_area_struct *vma)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* WARNING! Can only be used if mmap_lock is expected to be write-locked */
|
/* WARNING! Can only be used if mmap_lock is expected to be write-locked */
|
||||||
static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq)
|
static bool __is_vma_write_locked(struct vm_area_struct *vma, unsigned int *mm_lock_seq)
|
||||||
{
|
{
|
||||||
mmap_assert_write_locked(vma->vm_mm);
|
mmap_assert_write_locked(vma->vm_mm);
|
||||||
|
|
||||||
|
@ -751,7 +751,7 @@ static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq)
|
||||||
* current task is holding mmap_write_lock, both vma->vm_lock_seq and
|
* current task is holding mmap_write_lock, both vma->vm_lock_seq and
|
||||||
* mm->mm_lock_seq can't be concurrently modified.
|
* mm->mm_lock_seq can't be concurrently modified.
|
||||||
*/
|
*/
|
||||||
*mm_lock_seq = vma->vm_mm->mm_lock_seq;
|
*mm_lock_seq = vma->vm_mm->mm_lock_seq.sequence;
|
||||||
return (vma->vm_lock_seq == *mm_lock_seq);
|
return (vma->vm_lock_seq == *mm_lock_seq);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -762,7 +762,7 @@ static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq)
|
||||||
*/
|
*/
|
||||||
static inline void vma_start_write(struct vm_area_struct *vma)
|
static inline void vma_start_write(struct vm_area_struct *vma)
|
||||||
{
|
{
|
||||||
int mm_lock_seq;
|
unsigned int mm_lock_seq;
|
||||||
|
|
||||||
if (__is_vma_write_locked(vma, &mm_lock_seq))
|
if (__is_vma_write_locked(vma, &mm_lock_seq))
|
||||||
return;
|
return;
|
||||||
|
@ -780,7 +780,7 @@ static inline void vma_start_write(struct vm_area_struct *vma)
|
||||||
|
|
||||||
static inline void vma_assert_write_locked(struct vm_area_struct *vma)
|
static inline void vma_assert_write_locked(struct vm_area_struct *vma)
|
||||||
{
|
{
|
||||||
int mm_lock_seq;
|
unsigned int mm_lock_seq;
|
||||||
|
|
||||||
VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma);
|
VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma);
|
||||||
}
|
}
|
||||||
|
|
|
@ -729,7 +729,7 @@ struct vm_area_struct {
|
||||||
* counter reuse can only lead to occasional unnecessary use of the
|
* counter reuse can only lead to occasional unnecessary use of the
|
||||||
* slowpath.
|
* slowpath.
|
||||||
*/
|
*/
|
||||||
int vm_lock_seq;
|
unsigned int vm_lock_seq;
|
||||||
/* Unstable RCU readers are allowed to read this. */
|
/* Unstable RCU readers are allowed to read this. */
|
||||||
struct vma_lock *vm_lock;
|
struct vma_lock *vm_lock;
|
||||||
#endif
|
#endif
|
||||||
|
@ -923,6 +923,9 @@ struct mm_struct {
|
||||||
* Roughly speaking, incrementing the sequence number is
|
* Roughly speaking, incrementing the sequence number is
|
||||||
* equivalent to releasing locks on VMAs; reading the sequence
|
* equivalent to releasing locks on VMAs; reading the sequence
|
||||||
* number can be part of taking a read lock on a VMA.
|
* number can be part of taking a read lock on a VMA.
|
||||||
|
* Incremented every time mmap_lock is write-locked/unlocked.
|
||||||
|
* Initialized to 0, therefore odd values indicate mmap_lock
|
||||||
|
* is write-locked and even values that it's released.
|
||||||
*
|
*
|
||||||
* Can be modified under write mmap_lock using RELEASE
|
* Can be modified under write mmap_lock using RELEASE
|
||||||
* semantics.
|
* semantics.
|
||||||
|
@ -931,7 +934,7 @@ struct mm_struct {
|
||||||
* Can be read with ACQUIRE semantics if not holding write
|
* Can be read with ACQUIRE semantics if not holding write
|
||||||
* mmap_lock.
|
* mmap_lock.
|
||||||
*/
|
*/
|
||||||
int mm_lock_seq;
|
seqcount_t mm_lock_seq;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -71,6 +71,62 @@ static inline void mmap_assert_write_locked(const struct mm_struct *mm)
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_PER_VMA_LOCK
|
#ifdef CONFIG_PER_VMA_LOCK
|
||||||
|
static inline void mm_lock_seqcount_init(struct mm_struct *mm)
|
||||||
|
{
|
||||||
|
seqcount_init(&mm->mm_lock_seq);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void mm_lock_seqcount_begin(struct mm_struct *mm)
|
||||||
|
{
|
||||||
|
do_raw_write_seqcount_begin(&mm->mm_lock_seq);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void mm_lock_seqcount_end(struct mm_struct *mm)
|
||||||
|
{
|
||||||
|
ASSERT_EXCLUSIVE_WRITER(mm->mm_lock_seq);
|
||||||
|
do_raw_write_seqcount_end(&mm->mm_lock_seq);
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
static inline void mm_lock_seqcount_init(struct mm_struct *mm) {}
|
||||||
|
static inline void mm_lock_seqcount_begin(struct mm_struct *mm) {}
|
||||||
|
static inline void mm_lock_seqcount_end(struct mm_struct *mm) {}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static inline void mmap_init_lock(struct mm_struct *mm)
|
||||||
|
{
|
||||||
|
init_rwsem(&mm->mmap_lock);
|
||||||
|
mm_lock_seqcount_init(mm);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void mmap_write_lock(struct mm_struct *mm)
|
||||||
|
{
|
||||||
|
__mmap_lock_trace_start_locking(mm, true);
|
||||||
|
down_write(&mm->mmap_lock);
|
||||||
|
mm_lock_seqcount_begin(mm);
|
||||||
|
__mmap_lock_trace_acquire_returned(mm, true, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass)
|
||||||
|
{
|
||||||
|
__mmap_lock_trace_start_locking(mm, true);
|
||||||
|
down_write_nested(&mm->mmap_lock, subclass);
|
||||||
|
mm_lock_seqcount_begin(mm);
|
||||||
|
__mmap_lock_trace_acquire_returned(mm, true, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int mmap_write_lock_killable(struct mm_struct *mm)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
__mmap_lock_trace_start_locking(mm, true);
|
||||||
|
ret = down_write_killable(&mm->mmap_lock);
|
||||||
|
if (!ret)
|
||||||
|
mm_lock_seqcount_begin(mm);
|
||||||
|
__mmap_lock_trace_acquire_returned(mm, true, ret == 0);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Drop all currently-held per-VMA locks.
|
* Drop all currently-held per-VMA locks.
|
||||||
* This is called from the mmap_lock implementation directly before releasing
|
* This is called from the mmap_lock implementation directly before releasing
|
||||||
|
@ -82,46 +138,7 @@ static inline void mmap_assert_write_locked(const struct mm_struct *mm)
|
||||||
static inline void vma_end_write_all(struct mm_struct *mm)
|
static inline void vma_end_write_all(struct mm_struct *mm)
|
||||||
{
|
{
|
||||||
mmap_assert_write_locked(mm);
|
mmap_assert_write_locked(mm);
|
||||||
/*
|
mm_lock_seqcount_end(mm);
|
||||||
* Nobody can concurrently modify mm->mm_lock_seq due to exclusive
|
|
||||||
* mmap_lock being held.
|
|
||||||
* We need RELEASE semantics here to ensure that preceding stores into
|
|
||||||
* the VMA take effect before we unlock it with this store.
|
|
||||||
* Pairs with ACQUIRE semantics in vma_start_read().
|
|
||||||
*/
|
|
||||||
smp_store_release(&mm->mm_lock_seq, mm->mm_lock_seq + 1);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
static inline void vma_end_write_all(struct mm_struct *mm) {}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static inline void mmap_init_lock(struct mm_struct *mm)
|
|
||||||
{
|
|
||||||
init_rwsem(&mm->mmap_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void mmap_write_lock(struct mm_struct *mm)
|
|
||||||
{
|
|
||||||
__mmap_lock_trace_start_locking(mm, true);
|
|
||||||
down_write(&mm->mmap_lock);
|
|
||||||
__mmap_lock_trace_acquire_returned(mm, true, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass)
|
|
||||||
{
|
|
||||||
__mmap_lock_trace_start_locking(mm, true);
|
|
||||||
down_write_nested(&mm->mmap_lock, subclass);
|
|
||||||
__mmap_lock_trace_acquire_returned(mm, true, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int mmap_write_lock_killable(struct mm_struct *mm)
|
|
||||||
{
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
__mmap_lock_trace_start_locking(mm, true);
|
|
||||||
ret = down_write_killable(&mm->mmap_lock);
|
|
||||||
__mmap_lock_trace_acquire_returned(mm, true, ret == 0);
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void mmap_write_unlock(struct mm_struct *mm)
|
static inline void mmap_write_unlock(struct mm_struct *mm)
|
||||||
|
|
|
@ -448,7 +448,7 @@ static bool vma_lock_alloc(struct vm_area_struct *vma)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
init_rwsem(&vma->vm_lock->lock);
|
init_rwsem(&vma->vm_lock->lock);
|
||||||
vma->vm_lock_seq = -1;
|
vma->vm_lock_seq = UINT_MAX;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -1262,9 +1262,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
|
||||||
seqcount_init(&mm->write_protect_seq);
|
seqcount_init(&mm->write_protect_seq);
|
||||||
mmap_init_lock(mm);
|
mmap_init_lock(mm);
|
||||||
INIT_LIST_HEAD(&mm->mmlist);
|
INIT_LIST_HEAD(&mm->mmlist);
|
||||||
#ifdef CONFIG_PER_VMA_LOCK
|
|
||||||
mm->mm_lock_seq = 0;
|
|
||||||
#endif
|
|
||||||
mm_pgtables_bytes_init(mm);
|
mm_pgtables_bytes_init(mm);
|
||||||
mm->map_count = 0;
|
mm->map_count = 0;
|
||||||
mm->locked_vm = 0;
|
mm->locked_vm = 0;
|
||||||
|
|
|
@ -40,7 +40,7 @@ struct mm_struct init_mm = {
|
||||||
.arg_lock = __SPIN_LOCK_UNLOCKED(init_mm.arg_lock),
|
.arg_lock = __SPIN_LOCK_UNLOCKED(init_mm.arg_lock),
|
||||||
.mmlist = LIST_HEAD_INIT(init_mm.mmlist),
|
.mmlist = LIST_HEAD_INIT(init_mm.mmlist),
|
||||||
#ifdef CONFIG_PER_VMA_LOCK
|
#ifdef CONFIG_PER_VMA_LOCK
|
||||||
.mm_lock_seq = 0,
|
.mm_lock_seq = SEQCNT_ZERO(init_mm.mm_lock_seq),
|
||||||
#endif
|
#endif
|
||||||
.user_ns = &init_user_ns,
|
.user_ns = &init_user_ns,
|
||||||
.cpu_bitmap = CPU_BITS_NONE,
|
.cpu_bitmap = CPU_BITS_NONE,
|
||||||
|
|
|
@ -100,7 +100,7 @@ static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm,
|
||||||
* begun. Linking to the tree will have caused this to be incremented,
|
* begun. Linking to the tree will have caused this to be incremented,
|
||||||
* which means we will get a false positive otherwise.
|
* which means we will get a false positive otherwise.
|
||||||
*/
|
*/
|
||||||
vma->vm_lock_seq = -1;
|
vma->vm_lock_seq = UINT_MAX;
|
||||||
|
|
||||||
return vma;
|
return vma;
|
||||||
}
|
}
|
||||||
|
@ -225,7 +225,7 @@ static bool vma_write_started(struct vm_area_struct *vma)
|
||||||
int seq = vma->vm_lock_seq;
|
int seq = vma->vm_lock_seq;
|
||||||
|
|
||||||
/* We reset after each check. */
|
/* We reset after each check. */
|
||||||
vma->vm_lock_seq = -1;
|
vma->vm_lock_seq = UINT_MAX;
|
||||||
|
|
||||||
/* The vma_start_write() stub simply increments this value. */
|
/* The vma_start_write() stub simply increments this value. */
|
||||||
return seq > -1;
|
return seq > -1;
|
||||||
|
|
|
@ -281,7 +281,7 @@ struct vm_area_struct {
|
||||||
* counter reuse can only lead to occasional unnecessary use of the
|
* counter reuse can only lead to occasional unnecessary use of the
|
||||||
* slowpath.
|
* slowpath.
|
||||||
*/
|
*/
|
||||||
int vm_lock_seq;
|
unsigned int vm_lock_seq;
|
||||||
struct vma_lock *vm_lock;
|
struct vma_lock *vm_lock;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -467,7 +467,7 @@ static inline bool vma_lock_alloc(struct vm_area_struct *vma)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
init_rwsem(&vma->vm_lock->lock);
|
init_rwsem(&vma->vm_lock->lock);
|
||||||
vma->vm_lock_seq = -1;
|
vma->vm_lock_seq = UINT_MAX;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue