mm: introduce mf_dax_kill_procs() for fsdax case
This new function is a variant of mf_generic_kill_procs that accepts a file, offset pair instead of a struct to support multiple files sharing a DAX mapping. It is intended to be called by the file systems as part of the memory_failure handler after the file system performed a reverse mapping from the storage address to the file and file offset. Link: https://lkml.kernel.org/r/20220603053738.1218681-6-ruansy.fnst@fujitsu.com Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com> Reviewed-by: Dan Williams <dan.j.williams@intel.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Miaohe Lin <linmiaohe@huawei.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Dan Williams <dan.j.wiliams@intel.com> Cc: Dave Chinner <david@fromorbit.com> Cc: Goldwyn Rodrigues <rgoldwyn@suse.com> Cc: Goldwyn Rodrigues <rgoldwyn@suse.de> Cc: Jane Chu <jane.chu@oracle.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Naoya Horiguchi <naoya.horiguchi@nec.com> Cc: Ritesh Harjani <riteshh@linux.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
2f437effc6
commit
c36e202495
2 changed files with 88 additions and 10 deletions
|
@ -3178,6 +3178,8 @@ enum mf_flags {
|
||||||
MF_UNPOISON = 1 << 4,
|
MF_UNPOISON = 1 << 4,
|
||||||
MF_SW_SIMULATED = 1 << 5,
|
MF_SW_SIMULATED = 1 << 5,
|
||||||
};
|
};
|
||||||
|
int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index,
|
||||||
|
unsigned long count, int mf_flags);
|
||||||
extern int memory_failure(unsigned long pfn, int flags);
|
extern int memory_failure(unsigned long pfn, int flags);
|
||||||
extern void memory_failure_queue(unsigned long pfn, int flags);
|
extern void memory_failure_queue(unsigned long pfn, int flags);
|
||||||
extern void memory_failure_queue_kick(int cpu);
|
extern void memory_failure_queue_kick(int cpu);
|
||||||
|
|
|
@ -297,10 +297,9 @@ void shake_page(struct page *p)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(shake_page);
|
EXPORT_SYMBOL_GPL(shake_page);
|
||||||
|
|
||||||
static unsigned long dev_pagemap_mapping_shift(struct page *page,
|
static unsigned long dev_pagemap_mapping_shift(struct vm_area_struct *vma,
|
||||||
struct vm_area_struct *vma)
|
unsigned long address)
|
||||||
{
|
{
|
||||||
unsigned long address = vma_address(page, vma);
|
|
||||||
unsigned long ret = 0;
|
unsigned long ret = 0;
|
||||||
pgd_t *pgd;
|
pgd_t *pgd;
|
||||||
p4d_t *p4d;
|
p4d_t *p4d;
|
||||||
|
@ -340,9 +339,13 @@ static unsigned long dev_pagemap_mapping_shift(struct page *page,
|
||||||
/*
|
/*
|
||||||
* Schedule a process for later kill.
|
* Schedule a process for later kill.
|
||||||
* Uses GFP_ATOMIC allocations to avoid potential recursions in the VM.
|
* Uses GFP_ATOMIC allocations to avoid potential recursions in the VM.
|
||||||
|
*
|
||||||
|
* Notice: @fsdax_pgoff is used only when @p is a fsdax page.
|
||||||
|
* In other cases, such as anonymous and file-backend page, the address to be
|
||||||
|
* killed can be caculated by @p itself.
|
||||||
*/
|
*/
|
||||||
static void add_to_kill(struct task_struct *tsk, struct page *p,
|
static void add_to_kill(struct task_struct *tsk, struct page *p,
|
||||||
struct vm_area_struct *vma,
|
pgoff_t fsdax_pgoff, struct vm_area_struct *vma,
|
||||||
struct list_head *to_kill)
|
struct list_head *to_kill)
|
||||||
{
|
{
|
||||||
struct to_kill *tk;
|
struct to_kill *tk;
|
||||||
|
@ -354,9 +357,15 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
|
||||||
}
|
}
|
||||||
|
|
||||||
tk->addr = page_address_in_vma(p, vma);
|
tk->addr = page_address_in_vma(p, vma);
|
||||||
if (is_zone_device_page(p))
|
if (is_zone_device_page(p)) {
|
||||||
tk->size_shift = dev_pagemap_mapping_shift(p, vma);
|
/*
|
||||||
else
|
* Since page->mapping is not used for fsdax, we need
|
||||||
|
* calculate the address based on the vma.
|
||||||
|
*/
|
||||||
|
if (p->pgmap->type == MEMORY_DEVICE_FS_DAX)
|
||||||
|
tk->addr = vma_pgoff_address(fsdax_pgoff, 1, vma);
|
||||||
|
tk->size_shift = dev_pagemap_mapping_shift(vma, tk->addr);
|
||||||
|
} else
|
||||||
tk->size_shift = page_shift(compound_head(p));
|
tk->size_shift = page_shift(compound_head(p));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -505,7 +514,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
|
||||||
if (!page_mapped_in_vma(page, vma))
|
if (!page_mapped_in_vma(page, vma))
|
||||||
continue;
|
continue;
|
||||||
if (vma->vm_mm == t->mm)
|
if (vma->vm_mm == t->mm)
|
||||||
add_to_kill(t, page, vma, to_kill);
|
add_to_kill(t, page, 0, vma, to_kill);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
read_unlock(&tasklist_lock);
|
read_unlock(&tasklist_lock);
|
||||||
|
@ -541,13 +550,41 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
|
||||||
* to be informed of all such data corruptions.
|
* to be informed of all such data corruptions.
|
||||||
*/
|
*/
|
||||||
if (vma->vm_mm == t->mm)
|
if (vma->vm_mm == t->mm)
|
||||||
add_to_kill(t, page, vma, to_kill);
|
add_to_kill(t, page, 0, vma, to_kill);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
read_unlock(&tasklist_lock);
|
read_unlock(&tasklist_lock);
|
||||||
i_mmap_unlock_read(mapping);
|
i_mmap_unlock_read(mapping);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_FS_DAX
|
||||||
|
/*
|
||||||
|
* Collect processes when the error hit a fsdax page.
|
||||||
|
*/
|
||||||
|
static void collect_procs_fsdax(struct page *page,
|
||||||
|
struct address_space *mapping, pgoff_t pgoff,
|
||||||
|
struct list_head *to_kill)
|
||||||
|
{
|
||||||
|
struct vm_area_struct *vma;
|
||||||
|
struct task_struct *tsk;
|
||||||
|
|
||||||
|
i_mmap_lock_read(mapping);
|
||||||
|
read_lock(&tasklist_lock);
|
||||||
|
for_each_process(tsk) {
|
||||||
|
struct task_struct *t = task_early_kill(tsk, true);
|
||||||
|
|
||||||
|
if (!t)
|
||||||
|
continue;
|
||||||
|
vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
|
||||||
|
if (vma->vm_mm == t->mm)
|
||||||
|
add_to_kill(t, page, pgoff, vma, to_kill);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
read_unlock(&tasklist_lock);
|
||||||
|
i_mmap_unlock_read(mapping);
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_FS_DAX */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Collect the processes who have the corrupted page mapped to kill.
|
* Collect the processes who have the corrupted page mapped to kill.
|
||||||
*/
|
*/
|
||||||
|
@ -1588,6 +1625,45 @@ unlock:
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_FS_DAX
|
||||||
|
/**
|
||||||
|
* mf_dax_kill_procs - Collect and kill processes who are using this file range
|
||||||
|
* @mapping: address_space of the file in use
|
||||||
|
* @index: start pgoff of the range within the file
|
||||||
|
* @count: length of the range, in unit of PAGE_SIZE
|
||||||
|
* @mf_flags: memory failure flags
|
||||||
|
*/
|
||||||
|
int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index,
|
||||||
|
unsigned long count, int mf_flags)
|
||||||
|
{
|
||||||
|
LIST_HEAD(to_kill);
|
||||||
|
dax_entry_t cookie;
|
||||||
|
struct page *page;
|
||||||
|
size_t end = index + count;
|
||||||
|
|
||||||
|
mf_flags |= MF_ACTION_REQUIRED | MF_MUST_KILL;
|
||||||
|
|
||||||
|
for (; index < end; index++) {
|
||||||
|
page = NULL;
|
||||||
|
cookie = dax_lock_mapping_entry(mapping, index, &page);
|
||||||
|
if (!cookie)
|
||||||
|
return -EBUSY;
|
||||||
|
if (!page)
|
||||||
|
goto unlock;
|
||||||
|
|
||||||
|
SetPageHWPoison(page);
|
||||||
|
|
||||||
|
collect_procs_fsdax(page, mapping, index, &to_kill);
|
||||||
|
unmap_and_kill(&to_kill, page_to_pfn(page), mapping,
|
||||||
|
index, mf_flags);
|
||||||
|
unlock:
|
||||||
|
dax_unlock_mapping_entry(mapping, index, cookie);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(mf_dax_kill_procs);
|
||||||
|
#endif /* CONFIG_FS_DAX */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Called from hugetlb code with hugetlb_lock held.
|
* Called from hugetlb code with hugetlb_lock held.
|
||||||
*
|
*
|
||||||
|
|
Loading…
Add table
Reference in a new issue