proc/vmcore: convert oldmem_pfn_is_ram callback to more generic vmcore callbacks
Let's support multiple registered callbacks, making sure that registering vmcore callbacks cannot fail. Make the callback return a bool instead of an int, handling how to deal with errors internally. Drop unused HAVE_OLDMEM_PFN_IS_RAM. We soon want to make use of this infrastructure from other drivers: virtio-mem, registering one callback for each virtio-mem device, to prevent reading unplugged virtio-mem memory. Handle it via a generic vmcore_cb structure, prepared for future extensions: for example, once we support virtio-mem on s390x where the vmcore is completely constructed in the second kernel, we want to detect and add plugged virtio-mem memory ranges to the vmcore in order for them to get dumped properly. Handle corner cases that are unexpected and shouldn't happen in sane setups: registering a callback after the vmcore has already been opened (warn only) and unregistering a callback after the vmcore has already been opened (warn and essentially read only zeroes from that point on). Link: https://lkml.kernel.org/r/20211005121430.30136-6-david@redhat.com Signed-off-by: David Hildenbrand <david@redhat.com> Cc: Baoquan He <bhe@redhat.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Dave Young <dyoung@redhat.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jason Wang <jasowang@redhat.com> Cc: Juergen Gross <jgross@suse.com> Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: Michal Hocko <mhocko@suse.com> Cc: Mike Rapoport <rppt@kernel.org> Cc: Oscar Salvador <osalvador@suse.de> Cc: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com> Cc: Stefano Stabellini <sstabellini@kernel.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
2c9feeaedf
commit
cc5f2704c9
4 changed files with 110 additions and 37 deletions
|
@ -73,12 +73,23 @@ static int gart_mem_pfn_is_ram(unsigned long pfn)
|
||||||
(pfn >= aperture_pfn_start + aperture_page_count));
|
(pfn >= aperture_pfn_start + aperture_page_count));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_PROC_VMCORE
|
||||||
|
static bool gart_oldmem_pfn_is_ram(struct vmcore_cb *cb, unsigned long pfn)
|
||||||
|
{
|
||||||
|
return !!gart_mem_pfn_is_ram(pfn);
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct vmcore_cb gart_vmcore_cb = {
|
||||||
|
.pfn_is_ram = gart_oldmem_pfn_is_ram,
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
static void __init exclude_from_core(u64 aper_base, u32 aper_order)
|
static void __init exclude_from_core(u64 aper_base, u32 aper_order)
|
||||||
{
|
{
|
||||||
aperture_pfn_start = aper_base >> PAGE_SHIFT;
|
aperture_pfn_start = aper_base >> PAGE_SHIFT;
|
||||||
aperture_page_count = (32 * 1024 * 1024) << aper_order >> PAGE_SHIFT;
|
aperture_page_count = (32 * 1024 * 1024) << aper_order >> PAGE_SHIFT;
|
||||||
#ifdef CONFIG_PROC_VMCORE
|
#ifdef CONFIG_PROC_VMCORE
|
||||||
WARN_ON(register_oldmem_pfn_is_ram(&gart_mem_pfn_is_ram));
|
register_vmcore_cb(&gart_vmcore_cb);
|
||||||
#endif
|
#endif
|
||||||
#ifdef CONFIG_PROC_KCORE
|
#ifdef CONFIG_PROC_KCORE
|
||||||
WARN_ON(register_mem_pfn_is_ram(&gart_mem_pfn_is_ram));
|
WARN_ON(register_mem_pfn_is_ram(&gart_mem_pfn_is_ram));
|
||||||
|
|
|
@ -12,10 +12,10 @@
|
||||||
* The kdump kernel has to check whether a pfn of the crashed kernel
|
* The kdump kernel has to check whether a pfn of the crashed kernel
|
||||||
* was a ballooned page. vmcore is using this function to decide
|
* was a ballooned page. vmcore is using this function to decide
|
||||||
* whether to access a pfn of the crashed kernel.
|
* whether to access a pfn of the crashed kernel.
|
||||||
* Returns 0 if the pfn is not backed by a RAM page, the caller may
|
* Returns "false" if the pfn is not backed by a RAM page, the caller may
|
||||||
* handle the pfn special in this case.
|
* handle the pfn special in this case.
|
||||||
*/
|
*/
|
||||||
static int xen_oldmem_pfn_is_ram(unsigned long pfn)
|
static bool xen_vmcore_pfn_is_ram(struct vmcore_cb *cb, unsigned long pfn)
|
||||||
{
|
{
|
||||||
struct xen_hvm_get_mem_type a = {
|
struct xen_hvm_get_mem_type a = {
|
||||||
.domid = DOMID_SELF,
|
.domid = DOMID_SELF,
|
||||||
|
@ -24,10 +24,13 @@ static int xen_oldmem_pfn_is_ram(unsigned long pfn)
|
||||||
|
|
||||||
if (HYPERVISOR_hvm_op(HVMOP_get_mem_type, &a)) {
|
if (HYPERVISOR_hvm_op(HVMOP_get_mem_type, &a)) {
|
||||||
pr_warn_once("Unexpected HVMOP_get_mem_type failure\n");
|
pr_warn_once("Unexpected HVMOP_get_mem_type failure\n");
|
||||||
return -ENXIO;
|
return true;
|
||||||
}
|
}
|
||||||
return a.mem_type != HVMMEM_mmio_dm;
|
return a.mem_type != HVMMEM_mmio_dm;
|
||||||
}
|
}
|
||||||
|
static struct vmcore_cb xen_vmcore_cb = {
|
||||||
|
.pfn_is_ram = xen_vmcore_pfn_is_ram,
|
||||||
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static void xen_hvm_exit_mmap(struct mm_struct *mm)
|
static void xen_hvm_exit_mmap(struct mm_struct *mm)
|
||||||
|
@ -61,6 +64,6 @@ void __init xen_hvm_init_mmu_ops(void)
|
||||||
if (is_pagetable_dying_supported())
|
if (is_pagetable_dying_supported())
|
||||||
pv_ops.mmu.exit_mmap = xen_hvm_exit_mmap;
|
pv_ops.mmu.exit_mmap = xen_hvm_exit_mmap;
|
||||||
#ifdef CONFIG_PROC_VMCORE
|
#ifdef CONFIG_PROC_VMCORE
|
||||||
WARN_ON(register_oldmem_pfn_is_ram(&xen_oldmem_pfn_is_ram));
|
register_vmcore_cb(&xen_vmcore_cb);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
|
@ -62,46 +62,75 @@ core_param(novmcoredd, vmcoredd_disabled, bool, 0);
|
||||||
/* Device Dump Size */
|
/* Device Dump Size */
|
||||||
static size_t vmcoredd_orig_sz;
|
static size_t vmcoredd_orig_sz;
|
||||||
|
|
||||||
/*
|
static DECLARE_RWSEM(vmcore_cb_rwsem);
|
||||||
* Returns > 0 for RAM pages, 0 for non-RAM pages, < 0 on error
|
/* List of registered vmcore callbacks. */
|
||||||
* The called function has to take care of module refcounting.
|
static LIST_HEAD(vmcore_cb_list);
|
||||||
|
/* Whether we had a surprise unregistration of a callback. */
|
||||||
|
static bool vmcore_cb_unstable;
|
||||||
|
/* Whether the vmcore has been opened once. */
|
||||||
|
static bool vmcore_opened;
|
||||||
|
|
||||||
|
void register_vmcore_cb(struct vmcore_cb *cb)
|
||||||
|
{
|
||||||
|
down_write(&vmcore_cb_rwsem);
|
||||||
|
INIT_LIST_HEAD(&cb->next);
|
||||||
|
list_add_tail(&cb->next, &vmcore_cb_list);
|
||||||
|
/*
|
||||||
|
* Registering a vmcore callback after the vmcore was opened is
|
||||||
|
* very unusual (e.g., manual driver loading).
|
||||||
*/
|
*/
|
||||||
static int (*oldmem_pfn_is_ram)(unsigned long pfn);
|
if (vmcore_opened)
|
||||||
|
pr_warn_once("Unexpected vmcore callback registration\n");
|
||||||
int register_oldmem_pfn_is_ram(int (*fn)(unsigned long pfn))
|
up_write(&vmcore_cb_rwsem);
|
||||||
{
|
|
||||||
if (oldmem_pfn_is_ram)
|
|
||||||
return -EBUSY;
|
|
||||||
oldmem_pfn_is_ram = fn;
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(register_oldmem_pfn_is_ram);
|
EXPORT_SYMBOL_GPL(register_vmcore_cb);
|
||||||
|
|
||||||
void unregister_oldmem_pfn_is_ram(void)
|
void unregister_vmcore_cb(struct vmcore_cb *cb)
|
||||||
{
|
{
|
||||||
oldmem_pfn_is_ram = NULL;
|
down_write(&vmcore_cb_rwsem);
|
||||||
wmb();
|
list_del(&cb->next);
|
||||||
|
/*
|
||||||
|
* Unregistering a vmcore callback after the vmcore was opened is
|
||||||
|
* very unusual (e.g., forced driver removal), but we cannot stop
|
||||||
|
* unregistering.
|
||||||
|
*/
|
||||||
|
if (vmcore_opened) {
|
||||||
|
pr_warn_once("Unexpected vmcore callback unregistration\n");
|
||||||
|
vmcore_cb_unstable = true;
|
||||||
|
}
|
||||||
|
up_write(&vmcore_cb_rwsem);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(unregister_oldmem_pfn_is_ram);
|
EXPORT_SYMBOL_GPL(unregister_vmcore_cb);
|
||||||
|
|
||||||
static bool pfn_is_ram(unsigned long pfn)
|
static bool pfn_is_ram(unsigned long pfn)
|
||||||
{
|
{
|
||||||
int (*fn)(unsigned long pfn);
|
struct vmcore_cb *cb;
|
||||||
/* pfn is ram unless fn() checks pagetype */
|
|
||||||
bool ret = true;
|
bool ret = true;
|
||||||
|
|
||||||
/*
|
lockdep_assert_held_read(&vmcore_cb_rwsem);
|
||||||
* Ask hypervisor if the pfn is really ram.
|
if (unlikely(vmcore_cb_unstable))
|
||||||
* A ballooned page contains no data and reading from such a page
|
return false;
|
||||||
* will cause high load in the hypervisor.
|
|
||||||
*/
|
list_for_each_entry(cb, &vmcore_cb_list, next) {
|
||||||
fn = oldmem_pfn_is_ram;
|
if (unlikely(!cb->pfn_is_ram))
|
||||||
if (fn)
|
continue;
|
||||||
ret = !!fn(pfn);
|
ret = cb->pfn_is_ram(cb, pfn);
|
||||||
|
if (!ret)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int open_vmcore(struct inode *inode, struct file *file)
|
||||||
|
{
|
||||||
|
down_read(&vmcore_cb_rwsem);
|
||||||
|
vmcore_opened = true;
|
||||||
|
up_read(&vmcore_cb_rwsem);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/* Reads a page from the oldmem device from given offset. */
|
/* Reads a page from the oldmem device from given offset. */
|
||||||
ssize_t read_from_oldmem(char *buf, size_t count,
|
ssize_t read_from_oldmem(char *buf, size_t count,
|
||||||
u64 *ppos, int userbuf,
|
u64 *ppos, int userbuf,
|
||||||
|
@ -117,6 +146,7 @@ ssize_t read_from_oldmem(char *buf, size_t count,
|
||||||
offset = (unsigned long)(*ppos % PAGE_SIZE);
|
offset = (unsigned long)(*ppos % PAGE_SIZE);
|
||||||
pfn = (unsigned long)(*ppos / PAGE_SIZE);
|
pfn = (unsigned long)(*ppos / PAGE_SIZE);
|
||||||
|
|
||||||
|
down_read(&vmcore_cb_rwsem);
|
||||||
do {
|
do {
|
||||||
if (count > (PAGE_SIZE - offset))
|
if (count > (PAGE_SIZE - offset))
|
||||||
nr_bytes = PAGE_SIZE - offset;
|
nr_bytes = PAGE_SIZE - offset;
|
||||||
|
@ -136,9 +166,11 @@ ssize_t read_from_oldmem(char *buf, size_t count,
|
||||||
tmp = copy_oldmem_page(pfn, buf, nr_bytes,
|
tmp = copy_oldmem_page(pfn, buf, nr_bytes,
|
||||||
offset, userbuf);
|
offset, userbuf);
|
||||||
|
|
||||||
if (tmp < 0)
|
if (tmp < 0) {
|
||||||
|
up_read(&vmcore_cb_rwsem);
|
||||||
return tmp;
|
return tmp;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
*ppos += nr_bytes;
|
*ppos += nr_bytes;
|
||||||
count -= nr_bytes;
|
count -= nr_bytes;
|
||||||
buf += nr_bytes;
|
buf += nr_bytes;
|
||||||
|
@ -147,6 +179,7 @@ ssize_t read_from_oldmem(char *buf, size_t count,
|
||||||
offset = 0;
|
offset = 0;
|
||||||
} while (count);
|
} while (count);
|
||||||
|
|
||||||
|
up_read(&vmcore_cb_rwsem);
|
||||||
return read;
|
return read;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -537,14 +570,19 @@ static int vmcore_remap_oldmem_pfn(struct vm_area_struct *vma,
|
||||||
unsigned long from, unsigned long pfn,
|
unsigned long from, unsigned long pfn,
|
||||||
unsigned long size, pgprot_t prot)
|
unsigned long size, pgprot_t prot)
|
||||||
{
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check if oldmem_pfn_is_ram was registered to avoid
|
* Check if oldmem_pfn_is_ram was registered to avoid
|
||||||
* looping over all pages without a reason.
|
* looping over all pages without a reason.
|
||||||
*/
|
*/
|
||||||
if (oldmem_pfn_is_ram)
|
down_read(&vmcore_cb_rwsem);
|
||||||
return remap_oldmem_pfn_checked(vma, from, pfn, size, prot);
|
if (!list_empty(&vmcore_cb_list) || vmcore_cb_unstable)
|
||||||
|
ret = remap_oldmem_pfn_checked(vma, from, pfn, size, prot);
|
||||||
else
|
else
|
||||||
return remap_oldmem_pfn_range(vma, from, pfn, size, prot);
|
ret = remap_oldmem_pfn_range(vma, from, pfn, size, prot);
|
||||||
|
up_read(&vmcore_cb_rwsem);
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
|
static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
|
||||||
|
@ -668,6 +706,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static const struct proc_ops vmcore_proc_ops = {
|
static const struct proc_ops vmcore_proc_ops = {
|
||||||
|
.proc_open = open_vmcore,
|
||||||
.proc_read = read_vmcore,
|
.proc_read = read_vmcore,
|
||||||
.proc_lseek = default_llseek,
|
.proc_lseek = default_llseek,
|
||||||
.proc_mmap = mmap_vmcore,
|
.proc_mmap = mmap_vmcore,
|
||||||
|
|
|
@ -91,9 +91,29 @@ static inline void vmcore_unusable(void)
|
||||||
elfcorehdr_addr = ELFCORE_ADDR_ERR;
|
elfcorehdr_addr = ELFCORE_ADDR_ERR;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define HAVE_OLDMEM_PFN_IS_RAM 1
|
/**
|
||||||
extern int register_oldmem_pfn_is_ram(int (*fn)(unsigned long pfn));
|
* struct vmcore_cb - driver callbacks for /proc/vmcore handling
|
||||||
extern void unregister_oldmem_pfn_is_ram(void);
|
* @pfn_is_ram: check whether a PFN really is RAM and should be accessed when
|
||||||
|
* reading the vmcore. Will return "true" if it is RAM or if the
|
||||||
|
* callback cannot tell. If any callback returns "false", it's not
|
||||||
|
* RAM and the page must not be accessed; zeroes should be
|
||||||
|
* indicated in the vmcore instead. For example, a ballooned page
|
||||||
|
* contains no data and reading from such a page will cause high
|
||||||
|
* load in the hypervisor.
|
||||||
|
* @next: List head to manage registered callbacks internally; initialized by
|
||||||
|
* register_vmcore_cb().
|
||||||
|
*
|
||||||
|
* vmcore callbacks allow drivers managing physical memory ranges to
|
||||||
|
* coordinate with vmcore handling code, for example, to prevent accessing
|
||||||
|
* physical memory ranges that should not be accessed when reading the vmcore,
|
||||||
|
* although included in the vmcore header as memory ranges to dump.
|
||||||
|
*/
|
||||||
|
struct vmcore_cb {
|
||||||
|
bool (*pfn_is_ram)(struct vmcore_cb *cb, unsigned long pfn);
|
||||||
|
struct list_head next;
|
||||||
|
};
|
||||||
|
extern void register_vmcore_cb(struct vmcore_cb *cb);
|
||||||
|
extern void unregister_vmcore_cb(struct vmcore_cb *cb);
|
||||||
|
|
||||||
#else /* !CONFIG_CRASH_DUMP */
|
#else /* !CONFIG_CRASH_DUMP */
|
||||||
static inline bool is_kdump_kernel(void) { return 0; }
|
static inline bool is_kdump_kernel(void) { return 0; }
|
||||||
|
|
Loading…
Add table
Reference in a new issue