When the system runs out of enclave memory, SGX can reclaim EPC pages by swapping to normal RAM. These backing pages are allocated via a per-enclave shared memory area. Since SGX allows unlimited over commit on EPC memory, the reclaimer thread can allocate a large number of backing RAM pages in response to EPC memory pressure. When the shared memory backing RAM allocation occurs during the reclaimer thread context, the shared memory is charged to the root memory control group, and the shmem usage of the enclave is not properly accounted for, making cgroups ineffective at limiting the amount of RAM an enclave can consume. For example, when using a cgroup to launch a set of test enclaves, the kernel does not properly account for 50% - 75% of shmem page allocations on average. In the worst case, when nearly all allocations occur during the reclaimer thread, the kernel accounts less than a percent of the amount of shmem used by the enclave's cgroup to the correct cgroup. SGX stores a list of mm_structs that are associated with an enclave. Pick one of them during reclaim and charge that mm's memcg with the shmem allocation. The one that gets picked is arbitrary, but this list almost always only has one mm. The cases where there is more than one mm with different memcg's are not worth considering. Create a new function - sgx_encl_alloc_backing(). This function is used whenever a new backing storage page needs to be allocated. Previously the same function was used for page allocation as well as retrieving a previously allocated page. Prior to backing page allocation, if there is a mm_struct associated with the enclave that is requesting the allocation, it is set as the active memory control group. [ dhansen: - fix merge conflict with ELDU fixes - check against actual ksgxd_tsk, not ->mm ] Cc: stable@vger.kernel.org Signed-off-by: Kristen Carlson Accardi <kristen@linux.intel.com> Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> Reviewed-by: Shakeel Butt <shakeelb@google.com> Acked-by: Roman Gushchin <roman.gushchin@linux.dev> Link: https://lkml.kernel.org/r/20220520174248.4918-1-kristen@linux.intel.com
123 lines
3.1 KiB
C
123 lines
3.1 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/**
|
|
* Copyright(c) 2016-20 Intel Corporation.
|
|
*
|
|
* Contains the software defined data structures for enclaves.
|
|
*/
|
|
#ifndef _X86_ENCL_H
|
|
#define _X86_ENCL_H
|
|
|
|
#include <linux/cpumask.h>
|
|
#include <linux/kref.h>
|
|
#include <linux/list.h>
|
|
#include <linux/mm_types.h>
|
|
#include <linux/mmu_notifier.h>
|
|
#include <linux/mutex.h>
|
|
#include <linux/notifier.h>
|
|
#include <linux/srcu.h>
|
|
#include <linux/workqueue.h>
|
|
#include <linux/xarray.h>
|
|
#include "sgx.h"
|
|
|
|
/* 'desc' bits holding the offset in the VA (version array) page. */
|
|
#define SGX_ENCL_PAGE_VA_OFFSET_MASK GENMASK_ULL(11, 3)
|
|
|
|
/* 'desc' bit marking that the page is being reclaimed. */
|
|
#define SGX_ENCL_PAGE_BEING_RECLAIMED BIT(3)
|
|
|
|
struct sgx_encl_page {
|
|
unsigned long desc;
|
|
unsigned long vm_max_prot_bits;
|
|
struct sgx_epc_page *epc_page;
|
|
struct sgx_encl *encl;
|
|
struct sgx_va_page *va_page;
|
|
};
|
|
|
|
enum sgx_encl_flags {
|
|
SGX_ENCL_IOCTL = BIT(0),
|
|
SGX_ENCL_DEBUG = BIT(1),
|
|
SGX_ENCL_CREATED = BIT(2),
|
|
SGX_ENCL_INITIALIZED = BIT(3),
|
|
};
|
|
|
|
struct sgx_encl_mm {
|
|
struct sgx_encl *encl;
|
|
struct mm_struct *mm;
|
|
struct list_head list;
|
|
struct mmu_notifier mmu_notifier;
|
|
};
|
|
|
|
struct sgx_encl {
|
|
unsigned long base;
|
|
unsigned long size;
|
|
unsigned long flags;
|
|
unsigned int page_cnt;
|
|
unsigned int secs_child_cnt;
|
|
struct mutex lock;
|
|
struct xarray page_array;
|
|
struct sgx_encl_page secs;
|
|
unsigned long attributes;
|
|
unsigned long attributes_mask;
|
|
|
|
cpumask_t cpumask;
|
|
struct file *backing;
|
|
struct kref refcount;
|
|
struct list_head va_pages;
|
|
unsigned long mm_list_version;
|
|
struct list_head mm_list;
|
|
spinlock_t mm_lock;
|
|
struct srcu_struct srcu;
|
|
};
|
|
|
|
#define SGX_VA_SLOT_COUNT 512
|
|
|
|
struct sgx_va_page {
|
|
struct sgx_epc_page *epc_page;
|
|
DECLARE_BITMAP(slots, SGX_VA_SLOT_COUNT);
|
|
struct list_head list;
|
|
};
|
|
|
|
struct sgx_backing {
|
|
pgoff_t page_index;
|
|
struct page *contents;
|
|
struct page *pcmd;
|
|
unsigned long pcmd_offset;
|
|
};
|
|
|
|
extern const struct vm_operations_struct sgx_vm_ops;
|
|
|
|
static inline int sgx_encl_find(struct mm_struct *mm, unsigned long addr,
|
|
struct vm_area_struct **vma)
|
|
{
|
|
struct vm_area_struct *result;
|
|
|
|
result = vma_lookup(mm, addr);
|
|
if (!result || result->vm_ops != &sgx_vm_ops)
|
|
return -EINVAL;
|
|
|
|
*vma = result;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int sgx_encl_may_map(struct sgx_encl *encl, unsigned long start,
|
|
unsigned long end, unsigned long vm_flags);
|
|
|
|
bool current_is_ksgxd(void);
|
|
void sgx_encl_release(struct kref *ref);
|
|
int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm);
|
|
int sgx_encl_lookup_backing(struct sgx_encl *encl, unsigned long page_index,
|
|
struct sgx_backing *backing);
|
|
int sgx_encl_alloc_backing(struct sgx_encl *encl, unsigned long page_index,
|
|
struct sgx_backing *backing);
|
|
void sgx_encl_put_backing(struct sgx_backing *backing);
|
|
int sgx_encl_test_and_clear_young(struct mm_struct *mm,
|
|
struct sgx_encl_page *page);
|
|
|
|
struct sgx_epc_page *sgx_alloc_va_page(void);
|
|
unsigned int sgx_alloc_va_slot(struct sgx_va_page *va_page);
|
|
void sgx_free_va_slot(struct sgx_va_page *va_page, unsigned int offset);
|
|
bool sgx_va_page_full(struct sgx_va_page *va_page);
|
|
void sgx_encl_free_epc_page(struct sgx_epc_page *page);
|
|
|
|
#endif /* _X86_ENCL_H */
|