cgroup: Merge branch 'memcg_event' into for-3.14
Merge v3.12 based patch series to move cgroup_event implementation to memcg into for-3.14. The following two commits cause a conflict in kernel/cgroup.c2ff2a7d03b
("cgroup: kill css_id")79bd9814e5
("cgroup, memcg: move cgroup_event implementation to memcg") Each patch removes a struct definition from kernel/cgroup.c. As the two are adjacent, they cause a context conflict. Easily resolved by removing both structs. Signed-off-by: Tejun Heo <tj@kernel.org>
This commit is contained in:
commit
edab95103d
7 changed files with 335 additions and 360 deletions
|
@ -24,7 +24,6 @@ CONTENTS:
|
||||||
2.1 Basic Usage
|
2.1 Basic Usage
|
||||||
2.2 Attaching processes
|
2.2 Attaching processes
|
||||||
2.3 Mounting hierarchies by name
|
2.3 Mounting hierarchies by name
|
||||||
2.4 Notification API
|
|
||||||
3. Kernel API
|
3. Kernel API
|
||||||
3.1 Overview
|
3.1 Overview
|
||||||
3.2 Synchronization
|
3.2 Synchronization
|
||||||
|
@ -472,25 +471,6 @@ you give a subsystem a name.
|
||||||
The name of the subsystem appears as part of the hierarchy description
|
The name of the subsystem appears as part of the hierarchy description
|
||||||
in /proc/mounts and /proc/<pid>/cgroups.
|
in /proc/mounts and /proc/<pid>/cgroups.
|
||||||
|
|
||||||
2.4 Notification API
|
|
||||||
--------------------
|
|
||||||
|
|
||||||
There is mechanism which allows to get notifications about changing
|
|
||||||
status of a cgroup.
|
|
||||||
|
|
||||||
To register a new notification handler you need to:
|
|
||||||
- create a file descriptor for event notification using eventfd(2);
|
|
||||||
- open a control file to be monitored (e.g. memory.usage_in_bytes);
|
|
||||||
- write "<event_fd> <control_fd> <args>" to cgroup.event_control.
|
|
||||||
Interpretation of args is defined by control file implementation;
|
|
||||||
|
|
||||||
eventfd will be woken up by control file implementation or when the
|
|
||||||
cgroup is removed.
|
|
||||||
|
|
||||||
To unregister a notification handler just close eventfd.
|
|
||||||
|
|
||||||
NOTE: Support of notifications should be implemented for the control
|
|
||||||
file. See documentation for the subsystem.
|
|
||||||
|
|
||||||
3. Kernel API
|
3. Kernel API
|
||||||
=============
|
=============
|
||||||
|
|
|
@ -29,7 +29,6 @@ struct cgroup_subsys;
|
||||||
struct inode;
|
struct inode;
|
||||||
struct cgroup;
|
struct cgroup;
|
||||||
struct css_id;
|
struct css_id;
|
||||||
struct eventfd_ctx;
|
|
||||||
|
|
||||||
extern int cgroup_init_early(void);
|
extern int cgroup_init_early(void);
|
||||||
extern int cgroup_init(void);
|
extern int cgroup_init(void);
|
||||||
|
@ -239,10 +238,6 @@ struct cgroup {
|
||||||
struct rcu_head rcu_head;
|
struct rcu_head rcu_head;
|
||||||
struct work_struct destroy_work;
|
struct work_struct destroy_work;
|
||||||
|
|
||||||
/* List of events which userspace want to receive */
|
|
||||||
struct list_head event_list;
|
|
||||||
spinlock_t event_list_lock;
|
|
||||||
|
|
||||||
/* directory xattrs */
|
/* directory xattrs */
|
||||||
struct simple_xattrs xattrs;
|
struct simple_xattrs xattrs;
|
||||||
};
|
};
|
||||||
|
@ -506,25 +501,6 @@ struct cftype {
|
||||||
int (*trigger)(struct cgroup_subsys_state *css, unsigned int event);
|
int (*trigger)(struct cgroup_subsys_state *css, unsigned int event);
|
||||||
|
|
||||||
int (*release)(struct inode *inode, struct file *file);
|
int (*release)(struct inode *inode, struct file *file);
|
||||||
|
|
||||||
/*
|
|
||||||
* register_event() callback will be used to add new userspace
|
|
||||||
* waiter for changes related to the cftype. Implement it if
|
|
||||||
* you want to provide this functionality. Use eventfd_signal()
|
|
||||||
* on eventfd to send notification to userspace.
|
|
||||||
*/
|
|
||||||
int (*register_event)(struct cgroup_subsys_state *css,
|
|
||||||
struct cftype *cft, struct eventfd_ctx *eventfd,
|
|
||||||
const char *args);
|
|
||||||
/*
|
|
||||||
* unregister_event() callback will be called when userspace
|
|
||||||
* closes the eventfd or on cgroup removing.
|
|
||||||
* This callback must be implemented, if you want provide
|
|
||||||
* notification functionality.
|
|
||||||
*/
|
|
||||||
void (*unregister_event)(struct cgroup_subsys_state *css,
|
|
||||||
struct cftype *cft,
|
|
||||||
struct eventfd_ctx *eventfd);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
#include <linux/gfp.h>
|
#include <linux/gfp.h>
|
||||||
#include <linux/types.h>
|
#include <linux/types.h>
|
||||||
#include <linux/cgroup.h>
|
#include <linux/cgroup.h>
|
||||||
|
#include <linux/eventfd.h>
|
||||||
|
|
||||||
struct vmpressure {
|
struct vmpressure {
|
||||||
unsigned long scanned;
|
unsigned long scanned;
|
||||||
|
@ -33,13 +34,10 @@ extern void vmpressure_init(struct vmpressure *vmpr);
|
||||||
extern void vmpressure_cleanup(struct vmpressure *vmpr);
|
extern void vmpressure_cleanup(struct vmpressure *vmpr);
|
||||||
extern struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg);
|
extern struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg);
|
||||||
extern struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr);
|
extern struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr);
|
||||||
extern struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css);
|
extern int vmpressure_register_event(struct mem_cgroup *memcg,
|
||||||
extern int vmpressure_register_event(struct cgroup_subsys_state *css,
|
|
||||||
struct cftype *cft,
|
|
||||||
struct eventfd_ctx *eventfd,
|
struct eventfd_ctx *eventfd,
|
||||||
const char *args);
|
const char *args);
|
||||||
extern void vmpressure_unregister_event(struct cgroup_subsys_state *css,
|
extern void vmpressure_unregister_event(struct mem_cgroup *memcg,
|
||||||
struct cftype *cft,
|
|
||||||
struct eventfd_ctx *eventfd);
|
struct eventfd_ctx *eventfd);
|
||||||
#else
|
#else
|
||||||
static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,
|
static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,
|
||||||
|
|
|
@ -848,7 +848,6 @@ config NUMA_BALANCING
|
||||||
|
|
||||||
menuconfig CGROUPS
|
menuconfig CGROUPS
|
||||||
boolean "Control Group support"
|
boolean "Control Group support"
|
||||||
depends on EVENTFD
|
|
||||||
help
|
help
|
||||||
This option adds support for grouping sets of processes together, for
|
This option adds support for grouping sets of processes together, for
|
||||||
use with process control subsystems such as Cpusets, CFS, memory
|
use with process control subsystems such as Cpusets, CFS, memory
|
||||||
|
@ -915,6 +914,7 @@ config MEMCG
|
||||||
bool "Memory Resource Controller for Control Groups"
|
bool "Memory Resource Controller for Control Groups"
|
||||||
depends on RESOURCE_COUNTERS
|
depends on RESOURCE_COUNTERS
|
||||||
select MM_OWNER
|
select MM_OWNER
|
||||||
|
select EVENTFD
|
||||||
help
|
help
|
||||||
Provides a memory resource controller that manages both anonymous
|
Provides a memory resource controller that manages both anonymous
|
||||||
memory and page cache. (See Documentation/cgroups/memory.txt)
|
memory and page cache. (See Documentation/cgroups/memory.txt)
|
||||||
|
@ -1154,7 +1154,6 @@ config UIDGID_STRICT_TYPE_CHECKS
|
||||||
|
|
||||||
config SCHED_AUTOGROUP
|
config SCHED_AUTOGROUP
|
||||||
bool "Automatic process group scheduling"
|
bool "Automatic process group scheduling"
|
||||||
select EVENTFD
|
|
||||||
select CGROUPS
|
select CGROUPS
|
||||||
select CGROUP_SCHED
|
select CGROUP_SCHED
|
||||||
select FAIR_GROUP_SCHED
|
select FAIR_GROUP_SCHED
|
||||||
|
|
259
kernel/cgroup.c
259
kernel/cgroup.c
|
@ -56,11 +56,8 @@
|
||||||
#include <linux/pid_namespace.h>
|
#include <linux/pid_namespace.h>
|
||||||
#include <linux/idr.h>
|
#include <linux/idr.h>
|
||||||
#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
|
#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
|
||||||
#include <linux/eventfd.h>
|
|
||||||
#include <linux/poll.h>
|
|
||||||
#include <linux/flex_array.h> /* used in cgroup_attach_task */
|
#include <linux/flex_array.h> /* used in cgroup_attach_task */
|
||||||
#include <linux/kthread.h>
|
#include <linux/kthread.h>
|
||||||
#include <linux/file.h>
|
|
||||||
|
|
||||||
#include <linux/atomic.h>
|
#include <linux/atomic.h>
|
||||||
|
|
||||||
|
@ -132,36 +129,6 @@ struct cfent {
|
||||||
struct simple_xattrs xattrs;
|
struct simple_xattrs xattrs;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
|
||||||
* cgroup_event represents events which userspace want to receive.
|
|
||||||
*/
|
|
||||||
struct cgroup_event {
|
|
||||||
/*
|
|
||||||
* css which the event belongs to.
|
|
||||||
*/
|
|
||||||
struct cgroup_subsys_state *css;
|
|
||||||
/*
|
|
||||||
* Control file which the event associated.
|
|
||||||
*/
|
|
||||||
struct cftype *cft;
|
|
||||||
/*
|
|
||||||
* eventfd to signal userspace about the event.
|
|
||||||
*/
|
|
||||||
struct eventfd_ctx *eventfd;
|
|
||||||
/*
|
|
||||||
* Each of these stored in a list by the cgroup.
|
|
||||||
*/
|
|
||||||
struct list_head list;
|
|
||||||
/*
|
|
||||||
* All fields below needed to unregister event when
|
|
||||||
* userspace closes eventfd.
|
|
||||||
*/
|
|
||||||
poll_table pt;
|
|
||||||
wait_queue_head_t *wqh;
|
|
||||||
wait_queue_t wait;
|
|
||||||
struct work_struct remove;
|
|
||||||
};
|
|
||||||
|
|
||||||
/* The list of hierarchy roots */
|
/* The list of hierarchy roots */
|
||||||
|
|
||||||
static LIST_HEAD(cgroup_roots);
|
static LIST_HEAD(cgroup_roots);
|
||||||
|
@ -1351,8 +1318,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
|
||||||
INIT_LIST_HEAD(&cgrp->pidlists);
|
INIT_LIST_HEAD(&cgrp->pidlists);
|
||||||
mutex_init(&cgrp->pidlist_mutex);
|
mutex_init(&cgrp->pidlist_mutex);
|
||||||
cgrp->dummy_css.cgroup = cgrp;
|
cgrp->dummy_css.cgroup = cgrp;
|
||||||
INIT_LIST_HEAD(&cgrp->event_list);
|
|
||||||
spin_lock_init(&cgrp->event_list_lock);
|
|
||||||
simple_xattrs_init(&cgrp->xattrs);
|
simple_xattrs_init(&cgrp->xattrs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2626,16 +2591,6 @@ static const struct inode_operations cgroup_dir_inode_operations = {
|
||||||
.removexattr = cgroup_removexattr,
|
.removexattr = cgroup_removexattr,
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
|
||||||
* Check if a file is a control file
|
|
||||||
*/
|
|
||||||
static inline struct cftype *__file_cft(struct file *file)
|
|
||||||
{
|
|
||||||
if (file_inode(file)->i_fop != &cgroup_file_operations)
|
|
||||||
return ERR_PTR(-EINVAL);
|
|
||||||
return __d_cft(file->f_dentry);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int cgroup_create_file(struct dentry *dentry, umode_t mode,
|
static int cgroup_create_file(struct dentry *dentry, umode_t mode,
|
||||||
struct super_block *sb)
|
struct super_block *sb)
|
||||||
{
|
{
|
||||||
|
@ -3915,202 +3870,6 @@ static void cgroup_dput(struct cgroup *cgrp)
|
||||||
deactivate_super(sb);
|
deactivate_super(sb);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Unregister event and free resources.
|
|
||||||
*
|
|
||||||
* Gets called from workqueue.
|
|
||||||
*/
|
|
||||||
static void cgroup_event_remove(struct work_struct *work)
|
|
||||||
{
|
|
||||||
struct cgroup_event *event = container_of(work, struct cgroup_event,
|
|
||||||
remove);
|
|
||||||
struct cgroup_subsys_state *css = event->css;
|
|
||||||
|
|
||||||
remove_wait_queue(event->wqh, &event->wait);
|
|
||||||
|
|
||||||
event->cft->unregister_event(css, event->cft, event->eventfd);
|
|
||||||
|
|
||||||
/* Notify userspace the event is going away. */
|
|
||||||
eventfd_signal(event->eventfd, 1);
|
|
||||||
|
|
||||||
eventfd_ctx_put(event->eventfd);
|
|
||||||
kfree(event);
|
|
||||||
css_put(css);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Gets called on POLLHUP on eventfd when user closes it.
|
|
||||||
*
|
|
||||||
* Called with wqh->lock held and interrupts disabled.
|
|
||||||
*/
|
|
||||||
static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
|
|
||||||
int sync, void *key)
|
|
||||||
{
|
|
||||||
struct cgroup_event *event = container_of(wait,
|
|
||||||
struct cgroup_event, wait);
|
|
||||||
struct cgroup *cgrp = event->css->cgroup;
|
|
||||||
unsigned long flags = (unsigned long)key;
|
|
||||||
|
|
||||||
if (flags & POLLHUP) {
|
|
||||||
/*
|
|
||||||
* If the event has been detached at cgroup removal, we
|
|
||||||
* can simply return knowing the other side will cleanup
|
|
||||||
* for us.
|
|
||||||
*
|
|
||||||
* We can't race against event freeing since the other
|
|
||||||
* side will require wqh->lock via remove_wait_queue(),
|
|
||||||
* which we hold.
|
|
||||||
*/
|
|
||||||
spin_lock(&cgrp->event_list_lock);
|
|
||||||
if (!list_empty(&event->list)) {
|
|
||||||
list_del_init(&event->list);
|
|
||||||
/*
|
|
||||||
* We are in atomic context, but cgroup_event_remove()
|
|
||||||
* may sleep, so we have to call it in workqueue.
|
|
||||||
*/
|
|
||||||
schedule_work(&event->remove);
|
|
||||||
}
|
|
||||||
spin_unlock(&cgrp->event_list_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void cgroup_event_ptable_queue_proc(struct file *file,
|
|
||||||
wait_queue_head_t *wqh, poll_table *pt)
|
|
||||||
{
|
|
||||||
struct cgroup_event *event = container_of(pt,
|
|
||||||
struct cgroup_event, pt);
|
|
||||||
|
|
||||||
event->wqh = wqh;
|
|
||||||
add_wait_queue(wqh, &event->wait);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Parse input and register new cgroup event handler.
|
|
||||||
*
|
|
||||||
* Input must be in format '<event_fd> <control_fd> <args>'.
|
|
||||||
* Interpretation of args is defined by control file implementation.
|
|
||||||
*/
|
|
||||||
static int cgroup_write_event_control(struct cgroup_subsys_state *dummy_css,
|
|
||||||
struct cftype *cft, const char *buffer)
|
|
||||||
{
|
|
||||||
struct cgroup *cgrp = dummy_css->cgroup;
|
|
||||||
struct cgroup_event *event;
|
|
||||||
struct cgroup_subsys_state *cfile_css;
|
|
||||||
unsigned int efd, cfd;
|
|
||||||
struct fd efile;
|
|
||||||
struct fd cfile;
|
|
||||||
char *endp;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
efd = simple_strtoul(buffer, &endp, 10);
|
|
||||||
if (*endp != ' ')
|
|
||||||
return -EINVAL;
|
|
||||||
buffer = endp + 1;
|
|
||||||
|
|
||||||
cfd = simple_strtoul(buffer, &endp, 10);
|
|
||||||
if ((*endp != ' ') && (*endp != '\0'))
|
|
||||||
return -EINVAL;
|
|
||||||
buffer = endp + 1;
|
|
||||||
|
|
||||||
event = kzalloc(sizeof(*event), GFP_KERNEL);
|
|
||||||
if (!event)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
INIT_LIST_HEAD(&event->list);
|
|
||||||
init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc);
|
|
||||||
init_waitqueue_func_entry(&event->wait, cgroup_event_wake);
|
|
||||||
INIT_WORK(&event->remove, cgroup_event_remove);
|
|
||||||
|
|
||||||
efile = fdget(efd);
|
|
||||||
if (!efile.file) {
|
|
||||||
ret = -EBADF;
|
|
||||||
goto out_kfree;
|
|
||||||
}
|
|
||||||
|
|
||||||
event->eventfd = eventfd_ctx_fileget(efile.file);
|
|
||||||
if (IS_ERR(event->eventfd)) {
|
|
||||||
ret = PTR_ERR(event->eventfd);
|
|
||||||
goto out_put_efile;
|
|
||||||
}
|
|
||||||
|
|
||||||
cfile = fdget(cfd);
|
|
||||||
if (!cfile.file) {
|
|
||||||
ret = -EBADF;
|
|
||||||
goto out_put_eventfd;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* the process need read permission on control file */
|
|
||||||
/* AV: shouldn't we check that it's been opened for read instead? */
|
|
||||||
ret = inode_permission(file_inode(cfile.file), MAY_READ);
|
|
||||||
if (ret < 0)
|
|
||||||
goto out_put_cfile;
|
|
||||||
|
|
||||||
event->cft = __file_cft(cfile.file);
|
|
||||||
if (IS_ERR(event->cft)) {
|
|
||||||
ret = PTR_ERR(event->cft);
|
|
||||||
goto out_put_cfile;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!event->cft->ss) {
|
|
||||||
ret = -EBADF;
|
|
||||||
goto out_put_cfile;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Determine the css of @cfile, verify it belongs to the same
|
|
||||||
* cgroup as cgroup.event_control, and associate @event with it.
|
|
||||||
* Remaining events are automatically removed on cgroup destruction
|
|
||||||
* but the removal is asynchronous, so take an extra ref.
|
|
||||||
*/
|
|
||||||
rcu_read_lock();
|
|
||||||
|
|
||||||
ret = -EINVAL;
|
|
||||||
event->css = cgroup_css(cgrp, event->cft->ss);
|
|
||||||
cfile_css = css_from_dir(cfile.file->f_dentry->d_parent, event->cft->ss);
|
|
||||||
if (event->css && event->css == cfile_css && css_tryget(event->css))
|
|
||||||
ret = 0;
|
|
||||||
|
|
||||||
rcu_read_unlock();
|
|
||||||
if (ret)
|
|
||||||
goto out_put_cfile;
|
|
||||||
|
|
||||||
if (!event->cft->register_event || !event->cft->unregister_event) {
|
|
||||||
ret = -EINVAL;
|
|
||||||
goto out_put_css;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = event->cft->register_event(event->css, event->cft,
|
|
||||||
event->eventfd, buffer);
|
|
||||||
if (ret)
|
|
||||||
goto out_put_css;
|
|
||||||
|
|
||||||
efile.file->f_op->poll(efile.file, &event->pt);
|
|
||||||
|
|
||||||
spin_lock(&cgrp->event_list_lock);
|
|
||||||
list_add(&event->list, &cgrp->event_list);
|
|
||||||
spin_unlock(&cgrp->event_list_lock);
|
|
||||||
|
|
||||||
fdput(cfile);
|
|
||||||
fdput(efile);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
out_put_css:
|
|
||||||
css_put(event->css);
|
|
||||||
out_put_cfile:
|
|
||||||
fdput(cfile);
|
|
||||||
out_put_eventfd:
|
|
||||||
eventfd_ctx_put(event->eventfd);
|
|
||||||
out_put_efile:
|
|
||||||
fdput(efile);
|
|
||||||
out_kfree:
|
|
||||||
kfree(event);
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static u64 cgroup_clone_children_read(struct cgroup_subsys_state *css,
|
static u64 cgroup_clone_children_read(struct cgroup_subsys_state *css,
|
||||||
struct cftype *cft)
|
struct cftype *cft)
|
||||||
{
|
{
|
||||||
|
@ -4135,11 +3894,6 @@ static struct cftype cgroup_base_files[] = {
|
||||||
.release = cgroup_pidlist_release,
|
.release = cgroup_pidlist_release,
|
||||||
.mode = S_IRUGO | S_IWUSR,
|
.mode = S_IRUGO | S_IWUSR,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
.name = "cgroup.event_control",
|
|
||||||
.write_string = cgroup_write_event_control,
|
|
||||||
.mode = S_IWUGO,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
.name = "cgroup.clone_children",
|
.name = "cgroup.clone_children",
|
||||||
.flags = CFTYPE_INSANE,
|
.flags = CFTYPE_INSANE,
|
||||||
|
@ -4610,7 +4364,6 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
|
||||||
__releases(&cgroup_mutex) __acquires(&cgroup_mutex)
|
__releases(&cgroup_mutex) __acquires(&cgroup_mutex)
|
||||||
{
|
{
|
||||||
struct dentry *d = cgrp->dentry;
|
struct dentry *d = cgrp->dentry;
|
||||||
struct cgroup_event *event, *tmp;
|
|
||||||
struct cgroup_subsys *ss;
|
struct cgroup_subsys *ss;
|
||||||
struct cgroup *child;
|
struct cgroup *child;
|
||||||
bool empty;
|
bool empty;
|
||||||
|
@ -4685,18 +4438,6 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
|
||||||
dget(d);
|
dget(d);
|
||||||
cgroup_d_remove_dir(d);
|
cgroup_d_remove_dir(d);
|
||||||
|
|
||||||
/*
|
|
||||||
* Unregister events and notify userspace.
|
|
||||||
* Notify userspace about cgroup removing only after rmdir of cgroup
|
|
||||||
* directory to avoid race between userspace and kernelspace.
|
|
||||||
*/
|
|
||||||
spin_lock(&cgrp->event_list_lock);
|
|
||||||
list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
|
|
||||||
list_del_init(&event->list);
|
|
||||||
schedule_work(&event->remove);
|
|
||||||
}
|
|
||||||
spin_unlock(&cgrp->event_list_lock);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
355
mm/memcontrol.c
355
mm/memcontrol.c
|
@ -45,6 +45,7 @@
|
||||||
#include <linux/swapops.h>
|
#include <linux/swapops.h>
|
||||||
#include <linux/spinlock.h>
|
#include <linux/spinlock.h>
|
||||||
#include <linux/eventfd.h>
|
#include <linux/eventfd.h>
|
||||||
|
#include <linux/poll.h>
|
||||||
#include <linux/sort.h>
|
#include <linux/sort.h>
|
||||||
#include <linux/fs.h>
|
#include <linux/fs.h>
|
||||||
#include <linux/seq_file.h>
|
#include <linux/seq_file.h>
|
||||||
|
@ -55,6 +56,7 @@
|
||||||
#include <linux/cpu.h>
|
#include <linux/cpu.h>
|
||||||
#include <linux/oom.h>
|
#include <linux/oom.h>
|
||||||
#include <linux/lockdep.h>
|
#include <linux/lockdep.h>
|
||||||
|
#include <linux/file.h>
|
||||||
#include "internal.h"
|
#include "internal.h"
|
||||||
#include <net/sock.h>
|
#include <net/sock.h>
|
||||||
#include <net/ip.h>
|
#include <net/ip.h>
|
||||||
|
@ -227,6 +229,46 @@ struct mem_cgroup_eventfd_list {
|
||||||
struct eventfd_ctx *eventfd;
|
struct eventfd_ctx *eventfd;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* cgroup_event represents events which userspace want to receive.
|
||||||
|
*/
|
||||||
|
struct mem_cgroup_event {
|
||||||
|
/*
|
||||||
|
* memcg which the event belongs to.
|
||||||
|
*/
|
||||||
|
struct mem_cgroup *memcg;
|
||||||
|
/*
|
||||||
|
* eventfd to signal userspace about the event.
|
||||||
|
*/
|
||||||
|
struct eventfd_ctx *eventfd;
|
||||||
|
/*
|
||||||
|
* Each of these stored in a list by the cgroup.
|
||||||
|
*/
|
||||||
|
struct list_head list;
|
||||||
|
/*
|
||||||
|
* register_event() callback will be used to add new userspace
|
||||||
|
* waiter for changes related to this event. Use eventfd_signal()
|
||||||
|
* on eventfd to send notification to userspace.
|
||||||
|
*/
|
||||||
|
int (*register_event)(struct mem_cgroup *memcg,
|
||||||
|
struct eventfd_ctx *eventfd, const char *args);
|
||||||
|
/*
|
||||||
|
* unregister_event() callback will be called when userspace closes
|
||||||
|
* the eventfd or on cgroup removing. This callback must be set,
|
||||||
|
* if you want provide notification functionality.
|
||||||
|
*/
|
||||||
|
void (*unregister_event)(struct mem_cgroup *memcg,
|
||||||
|
struct eventfd_ctx *eventfd);
|
||||||
|
/*
|
||||||
|
* All fields below needed to unregister event when
|
||||||
|
* userspace closes eventfd.
|
||||||
|
*/
|
||||||
|
poll_table pt;
|
||||||
|
wait_queue_head_t *wqh;
|
||||||
|
wait_queue_t wait;
|
||||||
|
struct work_struct remove;
|
||||||
|
};
|
||||||
|
|
||||||
static void mem_cgroup_threshold(struct mem_cgroup *memcg);
|
static void mem_cgroup_threshold(struct mem_cgroup *memcg);
|
||||||
static void mem_cgroup_oom_notify(struct mem_cgroup *memcg);
|
static void mem_cgroup_oom_notify(struct mem_cgroup *memcg);
|
||||||
|
|
||||||
|
@ -331,6 +373,10 @@ struct mem_cgroup {
|
||||||
atomic_t numainfo_updating;
|
atomic_t numainfo_updating;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* List of events which userspace want to receive */
|
||||||
|
struct list_head event_list;
|
||||||
|
spinlock_t event_list_lock;
|
||||||
|
|
||||||
struct mem_cgroup_per_node *nodeinfo[0];
|
struct mem_cgroup_per_node *nodeinfo[0];
|
||||||
/* WARNING: nodeinfo must be the last member here */
|
/* WARNING: nodeinfo must be the last member here */
|
||||||
};
|
};
|
||||||
|
@ -490,11 +536,6 @@ struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr)
|
||||||
return &container_of(vmpr, struct mem_cgroup, vmpressure)->css;
|
return &container_of(vmpr, struct mem_cgroup, vmpressure)->css;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css)
|
|
||||||
{
|
|
||||||
return &mem_cgroup_from_css(css)->vmpressure;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
|
static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
|
||||||
{
|
{
|
||||||
return (memcg == root_mem_cgroup);
|
return (memcg == root_mem_cgroup);
|
||||||
|
@ -5648,13 +5689,11 @@ static void mem_cgroup_oom_notify(struct mem_cgroup *memcg)
|
||||||
mem_cgroup_oom_notify_cb(iter);
|
mem_cgroup_oom_notify_cb(iter);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int mem_cgroup_usage_register_event(struct cgroup_subsys_state *css,
|
static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg,
|
||||||
struct cftype *cft, struct eventfd_ctx *eventfd, const char *args)
|
struct eventfd_ctx *eventfd, const char *args, enum res_type type)
|
||||||
{
|
{
|
||||||
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
|
|
||||||
struct mem_cgroup_thresholds *thresholds;
|
struct mem_cgroup_thresholds *thresholds;
|
||||||
struct mem_cgroup_threshold_ary *new;
|
struct mem_cgroup_threshold_ary *new;
|
||||||
enum res_type type = MEMFILE_TYPE(cft->private);
|
|
||||||
u64 threshold, usage;
|
u64 threshold, usage;
|
||||||
int i, size, ret;
|
int i, size, ret;
|
||||||
|
|
||||||
|
@ -5731,13 +5770,23 @@ unlock:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void mem_cgroup_usage_unregister_event(struct cgroup_subsys_state *css,
|
static int mem_cgroup_usage_register_event(struct mem_cgroup *memcg,
|
||||||
struct cftype *cft, struct eventfd_ctx *eventfd)
|
struct eventfd_ctx *eventfd, const char *args)
|
||||||
|
{
|
||||||
|
return __mem_cgroup_usage_register_event(memcg, eventfd, args, _MEM);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int memsw_cgroup_usage_register_event(struct mem_cgroup *memcg,
|
||||||
|
struct eventfd_ctx *eventfd, const char *args)
|
||||||
|
{
|
||||||
|
return __mem_cgroup_usage_register_event(memcg, eventfd, args, _MEMSWAP);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
|
||||||
|
struct eventfd_ctx *eventfd, enum res_type type)
|
||||||
{
|
{
|
||||||
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
|
|
||||||
struct mem_cgroup_thresholds *thresholds;
|
struct mem_cgroup_thresholds *thresholds;
|
||||||
struct mem_cgroup_threshold_ary *new;
|
struct mem_cgroup_threshold_ary *new;
|
||||||
enum res_type type = MEMFILE_TYPE(cft->private);
|
|
||||||
u64 usage;
|
u64 usage;
|
||||||
int i, j, size;
|
int i, j, size;
|
||||||
|
|
||||||
|
@ -5810,14 +5859,23 @@ unlock:
|
||||||
mutex_unlock(&memcg->thresholds_lock);
|
mutex_unlock(&memcg->thresholds_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int mem_cgroup_oom_register_event(struct cgroup_subsys_state *css,
|
static void mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
|
||||||
struct cftype *cft, struct eventfd_ctx *eventfd, const char *args)
|
struct eventfd_ctx *eventfd)
|
||||||
{
|
{
|
||||||
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
|
return __mem_cgroup_usage_unregister_event(memcg, eventfd, _MEM);
|
||||||
struct mem_cgroup_eventfd_list *event;
|
}
|
||||||
enum res_type type = MEMFILE_TYPE(cft->private);
|
|
||||||
|
static void memsw_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
|
||||||
|
struct eventfd_ctx *eventfd)
|
||||||
|
{
|
||||||
|
return __mem_cgroup_usage_unregister_event(memcg, eventfd, _MEMSWAP);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int mem_cgroup_oom_register_event(struct mem_cgroup *memcg,
|
||||||
|
struct eventfd_ctx *eventfd, const char *args)
|
||||||
|
{
|
||||||
|
struct mem_cgroup_eventfd_list *event;
|
||||||
|
|
||||||
BUG_ON(type != _OOM_TYPE);
|
|
||||||
event = kmalloc(sizeof(*event), GFP_KERNEL);
|
event = kmalloc(sizeof(*event), GFP_KERNEL);
|
||||||
if (!event)
|
if (!event)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
@ -5835,14 +5893,10 @@ static int mem_cgroup_oom_register_event(struct cgroup_subsys_state *css,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void mem_cgroup_oom_unregister_event(struct cgroup_subsys_state *css,
|
static void mem_cgroup_oom_unregister_event(struct mem_cgroup *memcg,
|
||||||
struct cftype *cft, struct eventfd_ctx *eventfd)
|
struct eventfd_ctx *eventfd)
|
||||||
{
|
{
|
||||||
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
|
|
||||||
struct mem_cgroup_eventfd_list *ev, *tmp;
|
struct mem_cgroup_eventfd_list *ev, *tmp;
|
||||||
enum res_type type = MEMFILE_TYPE(cft->private);
|
|
||||||
|
|
||||||
BUG_ON(type != _OOM_TYPE);
|
|
||||||
|
|
||||||
spin_lock(&memcg_oom_lock);
|
spin_lock(&memcg_oom_lock);
|
||||||
|
|
||||||
|
@ -5959,13 +6013,233 @@ static void kmem_cgroup_css_offline(struct mem_cgroup *memcg)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* DO NOT USE IN NEW FILES.
|
||||||
|
*
|
||||||
|
* "cgroup.event_control" implementation.
|
||||||
|
*
|
||||||
|
* This is way over-engineered. It tries to support fully configurable
|
||||||
|
* events for each user. Such level of flexibility is completely
|
||||||
|
* unnecessary especially in the light of the planned unified hierarchy.
|
||||||
|
*
|
||||||
|
* Please deprecate this and replace with something simpler if at all
|
||||||
|
* possible.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Unregister event and free resources.
|
||||||
|
*
|
||||||
|
* Gets called from workqueue.
|
||||||
|
*/
|
||||||
|
static void memcg_event_remove(struct work_struct *work)
|
||||||
|
{
|
||||||
|
struct mem_cgroup_event *event =
|
||||||
|
container_of(work, struct mem_cgroup_event, remove);
|
||||||
|
struct mem_cgroup *memcg = event->memcg;
|
||||||
|
|
||||||
|
remove_wait_queue(event->wqh, &event->wait);
|
||||||
|
|
||||||
|
event->unregister_event(memcg, event->eventfd);
|
||||||
|
|
||||||
|
/* Notify userspace the event is going away. */
|
||||||
|
eventfd_signal(event->eventfd, 1);
|
||||||
|
|
||||||
|
eventfd_ctx_put(event->eventfd);
|
||||||
|
kfree(event);
|
||||||
|
css_put(&memcg->css);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Gets called on POLLHUP on eventfd when user closes it.
|
||||||
|
*
|
||||||
|
* Called with wqh->lock held and interrupts disabled.
|
||||||
|
*/
|
||||||
|
static int memcg_event_wake(wait_queue_t *wait, unsigned mode,
|
||||||
|
int sync, void *key)
|
||||||
|
{
|
||||||
|
struct mem_cgroup_event *event =
|
||||||
|
container_of(wait, struct mem_cgroup_event, wait);
|
||||||
|
struct mem_cgroup *memcg = event->memcg;
|
||||||
|
unsigned long flags = (unsigned long)key;
|
||||||
|
|
||||||
|
if (flags & POLLHUP) {
|
||||||
|
/*
|
||||||
|
* If the event has been detached at cgroup removal, we
|
||||||
|
* can simply return knowing the other side will cleanup
|
||||||
|
* for us.
|
||||||
|
*
|
||||||
|
* We can't race against event freeing since the other
|
||||||
|
* side will require wqh->lock via remove_wait_queue(),
|
||||||
|
* which we hold.
|
||||||
|
*/
|
||||||
|
spin_lock(&memcg->event_list_lock);
|
||||||
|
if (!list_empty(&event->list)) {
|
||||||
|
list_del_init(&event->list);
|
||||||
|
/*
|
||||||
|
* We are in atomic context, but cgroup_event_remove()
|
||||||
|
* may sleep, so we have to call it in workqueue.
|
||||||
|
*/
|
||||||
|
schedule_work(&event->remove);
|
||||||
|
}
|
||||||
|
spin_unlock(&memcg->event_list_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void memcg_event_ptable_queue_proc(struct file *file,
|
||||||
|
wait_queue_head_t *wqh, poll_table *pt)
|
||||||
|
{
|
||||||
|
struct mem_cgroup_event *event =
|
||||||
|
container_of(pt, struct mem_cgroup_event, pt);
|
||||||
|
|
||||||
|
event->wqh = wqh;
|
||||||
|
add_wait_queue(wqh, &event->wait);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* DO NOT USE IN NEW FILES.
|
||||||
|
*
|
||||||
|
* Parse input and register new cgroup event handler.
|
||||||
|
*
|
||||||
|
* Input must be in format '<event_fd> <control_fd> <args>'.
|
||||||
|
* Interpretation of args is defined by control file implementation.
|
||||||
|
*/
|
||||||
|
static int memcg_write_event_control(struct cgroup_subsys_state *css,
|
||||||
|
struct cftype *cft, const char *buffer)
|
||||||
|
{
|
||||||
|
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
|
||||||
|
struct mem_cgroup_event *event;
|
||||||
|
struct cgroup_subsys_state *cfile_css;
|
||||||
|
unsigned int efd, cfd;
|
||||||
|
struct fd efile;
|
||||||
|
struct fd cfile;
|
||||||
|
const char *name;
|
||||||
|
char *endp;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
efd = simple_strtoul(buffer, &endp, 10);
|
||||||
|
if (*endp != ' ')
|
||||||
|
return -EINVAL;
|
||||||
|
buffer = endp + 1;
|
||||||
|
|
||||||
|
cfd = simple_strtoul(buffer, &endp, 10);
|
||||||
|
if ((*endp != ' ') && (*endp != '\0'))
|
||||||
|
return -EINVAL;
|
||||||
|
buffer = endp + 1;
|
||||||
|
|
||||||
|
event = kzalloc(sizeof(*event), GFP_KERNEL);
|
||||||
|
if (!event)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
event->memcg = memcg;
|
||||||
|
INIT_LIST_HEAD(&event->list);
|
||||||
|
init_poll_funcptr(&event->pt, memcg_event_ptable_queue_proc);
|
||||||
|
init_waitqueue_func_entry(&event->wait, memcg_event_wake);
|
||||||
|
INIT_WORK(&event->remove, memcg_event_remove);
|
||||||
|
|
||||||
|
efile = fdget(efd);
|
||||||
|
if (!efile.file) {
|
||||||
|
ret = -EBADF;
|
||||||
|
goto out_kfree;
|
||||||
|
}
|
||||||
|
|
||||||
|
event->eventfd = eventfd_ctx_fileget(efile.file);
|
||||||
|
if (IS_ERR(event->eventfd)) {
|
||||||
|
ret = PTR_ERR(event->eventfd);
|
||||||
|
goto out_put_efile;
|
||||||
|
}
|
||||||
|
|
||||||
|
cfile = fdget(cfd);
|
||||||
|
if (!cfile.file) {
|
||||||
|
ret = -EBADF;
|
||||||
|
goto out_put_eventfd;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* the process need read permission on control file */
|
||||||
|
/* AV: shouldn't we check that it's been opened for read instead? */
|
||||||
|
ret = inode_permission(file_inode(cfile.file), MAY_READ);
|
||||||
|
if (ret < 0)
|
||||||
|
goto out_put_cfile;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Determine the event callbacks and set them in @event. This used
|
||||||
|
* to be done via struct cftype but cgroup core no longer knows
|
||||||
|
* about these events. The following is crude but the whole thing
|
||||||
|
* is for compatibility anyway.
|
||||||
|
*
|
||||||
|
* DO NOT ADD NEW FILES.
|
||||||
|
*/
|
||||||
|
name = cfile.file->f_dentry->d_name.name;
|
||||||
|
|
||||||
|
if (!strcmp(name, "memory.usage_in_bytes")) {
|
||||||
|
event->register_event = mem_cgroup_usage_register_event;
|
||||||
|
event->unregister_event = mem_cgroup_usage_unregister_event;
|
||||||
|
} else if (!strcmp(name, "memory.oom_control")) {
|
||||||
|
event->register_event = mem_cgroup_oom_register_event;
|
||||||
|
event->unregister_event = mem_cgroup_oom_unregister_event;
|
||||||
|
} else if (!strcmp(name, "memory.pressure_level")) {
|
||||||
|
event->register_event = vmpressure_register_event;
|
||||||
|
event->unregister_event = vmpressure_unregister_event;
|
||||||
|
} else if (!strcmp(name, "memory.memsw.usage_in_bytes")) {
|
||||||
|
event->register_event = memsw_cgroup_usage_register_event;
|
||||||
|
event->unregister_event = memsw_cgroup_usage_unregister_event;
|
||||||
|
} else {
|
||||||
|
ret = -EINVAL;
|
||||||
|
goto out_put_cfile;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Verify @cfile should belong to @css. Also, remaining events are
|
||||||
|
* automatically removed on cgroup destruction but the removal is
|
||||||
|
* asynchronous, so take an extra ref on @css.
|
||||||
|
*/
|
||||||
|
rcu_read_lock();
|
||||||
|
|
||||||
|
ret = -EINVAL;
|
||||||
|
cfile_css = css_from_dir(cfile.file->f_dentry->d_parent,
|
||||||
|
&mem_cgroup_subsys);
|
||||||
|
if (cfile_css == css && css_tryget(css))
|
||||||
|
ret = 0;
|
||||||
|
|
||||||
|
rcu_read_unlock();
|
||||||
|
if (ret)
|
||||||
|
goto out_put_cfile;
|
||||||
|
|
||||||
|
ret = event->register_event(memcg, event->eventfd, buffer);
|
||||||
|
if (ret)
|
||||||
|
goto out_put_css;
|
||||||
|
|
||||||
|
efile.file->f_op->poll(efile.file, &event->pt);
|
||||||
|
|
||||||
|
spin_lock(&memcg->event_list_lock);
|
||||||
|
list_add(&event->list, &memcg->event_list);
|
||||||
|
spin_unlock(&memcg->event_list_lock);
|
||||||
|
|
||||||
|
fdput(cfile);
|
||||||
|
fdput(efile);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
out_put_css:
|
||||||
|
css_put(css);
|
||||||
|
out_put_cfile:
|
||||||
|
fdput(cfile);
|
||||||
|
out_put_eventfd:
|
||||||
|
eventfd_ctx_put(event->eventfd);
|
||||||
|
out_put_efile:
|
||||||
|
fdput(efile);
|
||||||
|
out_kfree:
|
||||||
|
kfree(event);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static struct cftype mem_cgroup_files[] = {
|
static struct cftype mem_cgroup_files[] = {
|
||||||
{
|
{
|
||||||
.name = "usage_in_bytes",
|
.name = "usage_in_bytes",
|
||||||
.private = MEMFILE_PRIVATE(_MEM, RES_USAGE),
|
.private = MEMFILE_PRIVATE(_MEM, RES_USAGE),
|
||||||
.read = mem_cgroup_read,
|
.read = mem_cgroup_read,
|
||||||
.register_event = mem_cgroup_usage_register_event,
|
|
||||||
.unregister_event = mem_cgroup_usage_unregister_event,
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.name = "max_usage_in_bytes",
|
.name = "max_usage_in_bytes",
|
||||||
|
@ -6005,6 +6279,12 @@ static struct cftype mem_cgroup_files[] = {
|
||||||
.write_u64 = mem_cgroup_hierarchy_write,
|
.write_u64 = mem_cgroup_hierarchy_write,
|
||||||
.read_u64 = mem_cgroup_hierarchy_read,
|
.read_u64 = mem_cgroup_hierarchy_read,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
.name = "cgroup.event_control", /* XXX: for compat */
|
||||||
|
.write_string = memcg_write_event_control,
|
||||||
|
.flags = CFTYPE_NO_PREFIX,
|
||||||
|
.mode = S_IWUGO,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
.name = "swappiness",
|
.name = "swappiness",
|
||||||
.read_u64 = mem_cgroup_swappiness_read,
|
.read_u64 = mem_cgroup_swappiness_read,
|
||||||
|
@ -6019,14 +6299,10 @@ static struct cftype mem_cgroup_files[] = {
|
||||||
.name = "oom_control",
|
.name = "oom_control",
|
||||||
.read_map = mem_cgroup_oom_control_read,
|
.read_map = mem_cgroup_oom_control_read,
|
||||||
.write_u64 = mem_cgroup_oom_control_write,
|
.write_u64 = mem_cgroup_oom_control_write,
|
||||||
.register_event = mem_cgroup_oom_register_event,
|
|
||||||
.unregister_event = mem_cgroup_oom_unregister_event,
|
|
||||||
.private = MEMFILE_PRIVATE(_OOM_TYPE, OOM_CONTROL),
|
.private = MEMFILE_PRIVATE(_OOM_TYPE, OOM_CONTROL),
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.name = "pressure_level",
|
.name = "pressure_level",
|
||||||
.register_event = vmpressure_register_event,
|
|
||||||
.unregister_event = vmpressure_unregister_event,
|
|
||||||
},
|
},
|
||||||
#ifdef CONFIG_NUMA
|
#ifdef CONFIG_NUMA
|
||||||
{
|
{
|
||||||
|
@ -6074,8 +6350,6 @@ static struct cftype memsw_cgroup_files[] = {
|
||||||
.name = "memsw.usage_in_bytes",
|
.name = "memsw.usage_in_bytes",
|
||||||
.private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE),
|
.private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE),
|
||||||
.read = mem_cgroup_read,
|
.read = mem_cgroup_read,
|
||||||
.register_event = mem_cgroup_usage_register_event,
|
|
||||||
.unregister_event = mem_cgroup_usage_unregister_event,
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.name = "memsw.max_usage_in_bytes",
|
.name = "memsw.max_usage_in_bytes",
|
||||||
|
@ -6265,6 +6539,8 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
|
||||||
mutex_init(&memcg->thresholds_lock);
|
mutex_init(&memcg->thresholds_lock);
|
||||||
spin_lock_init(&memcg->move_lock);
|
spin_lock_init(&memcg->move_lock);
|
||||||
vmpressure_init(&memcg->vmpressure);
|
vmpressure_init(&memcg->vmpressure);
|
||||||
|
INIT_LIST_HEAD(&memcg->event_list);
|
||||||
|
spin_lock_init(&memcg->event_list_lock);
|
||||||
|
|
||||||
return &memcg->css;
|
return &memcg->css;
|
||||||
|
|
||||||
|
@ -6340,6 +6616,19 @@ static void mem_cgroup_invalidate_reclaim_iterators(struct mem_cgroup *memcg)
|
||||||
static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
|
static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
|
||||||
{
|
{
|
||||||
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
|
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
|
||||||
|
struct mem_cgroup_event *event, *tmp;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Unregister events and notify userspace.
|
||||||
|
* Notify userspace about cgroup removing only after rmdir of cgroup
|
||||||
|
* directory to avoid race between userspace and kernelspace.
|
||||||
|
*/
|
||||||
|
spin_lock(&memcg->event_list_lock);
|
||||||
|
list_for_each_entry_safe(event, tmp, &memcg->event_list, list) {
|
||||||
|
list_del_init(&event->list);
|
||||||
|
schedule_work(&event->remove);
|
||||||
|
}
|
||||||
|
spin_unlock(&memcg->event_list_lock);
|
||||||
|
|
||||||
kmem_cgroup_css_offline(memcg);
|
kmem_cgroup_css_offline(memcg);
|
||||||
|
|
||||||
|
|
|
@ -278,8 +278,7 @@ void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* vmpressure_register_event() - Bind vmpressure notifications to an eventfd
|
* vmpressure_register_event() - Bind vmpressure notifications to an eventfd
|
||||||
* @css: css that is interested in vmpressure notifications
|
* @memcg: memcg that is interested in vmpressure notifications
|
||||||
* @cft: cgroup control files handle
|
|
||||||
* @eventfd: eventfd context to link notifications with
|
* @eventfd: eventfd context to link notifications with
|
||||||
* @args: event arguments (used to set up a pressure level threshold)
|
* @args: event arguments (used to set up a pressure level threshold)
|
||||||
*
|
*
|
||||||
|
@ -289,15 +288,12 @@ void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio)
|
||||||
* threshold (one of vmpressure_str_levels, i.e. "low", "medium", or
|
* threshold (one of vmpressure_str_levels, i.e. "low", "medium", or
|
||||||
* "critical").
|
* "critical").
|
||||||
*
|
*
|
||||||
* This function should not be used directly, just pass it to (struct
|
* To be used as memcg event method.
|
||||||
* cftype).register_event, and then cgroup core will handle everything by
|
|
||||||
* itself.
|
|
||||||
*/
|
*/
|
||||||
int vmpressure_register_event(struct cgroup_subsys_state *css,
|
int vmpressure_register_event(struct mem_cgroup *memcg,
|
||||||
struct cftype *cft, struct eventfd_ctx *eventfd,
|
struct eventfd_ctx *eventfd, const char *args)
|
||||||
const char *args)
|
|
||||||
{
|
{
|
||||||
struct vmpressure *vmpr = css_to_vmpressure(css);
|
struct vmpressure *vmpr = memcg_to_vmpressure(memcg);
|
||||||
struct vmpressure_event *ev;
|
struct vmpressure_event *ev;
|
||||||
int level;
|
int level;
|
||||||
|
|
||||||
|
@ -325,23 +321,19 @@ int vmpressure_register_event(struct cgroup_subsys_state *css,
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* vmpressure_unregister_event() - Unbind eventfd from vmpressure
|
* vmpressure_unregister_event() - Unbind eventfd from vmpressure
|
||||||
* @css: css handle
|
* @memcg: memcg handle
|
||||||
* @cft: cgroup control files handle
|
|
||||||
* @eventfd: eventfd context that was used to link vmpressure with the @cg
|
* @eventfd: eventfd context that was used to link vmpressure with the @cg
|
||||||
*
|
*
|
||||||
* This function does internal manipulations to detach the @eventfd from
|
* This function does internal manipulations to detach the @eventfd from
|
||||||
* the vmpressure notifications, and then frees internal resources
|
* the vmpressure notifications, and then frees internal resources
|
||||||
* associated with the @eventfd (but the @eventfd itself is not freed).
|
* associated with the @eventfd (but the @eventfd itself is not freed).
|
||||||
*
|
*
|
||||||
* This function should not be used directly, just pass it to (struct
|
* To be used as memcg event method.
|
||||||
* cftype).unregister_event, and then cgroup core will handle everything
|
|
||||||
* by itself.
|
|
||||||
*/
|
*/
|
||||||
void vmpressure_unregister_event(struct cgroup_subsys_state *css,
|
void vmpressure_unregister_event(struct mem_cgroup *memcg,
|
||||||
struct cftype *cft,
|
|
||||||
struct eventfd_ctx *eventfd)
|
struct eventfd_ctx *eventfd)
|
||||||
{
|
{
|
||||||
struct vmpressure *vmpr = css_to_vmpressure(css);
|
struct vmpressure *vmpr = memcg_to_vmpressure(memcg);
|
||||||
struct vmpressure_event *ev;
|
struct vmpressure_event *ev;
|
||||||
|
|
||||||
mutex_lock(&vmpr->events_lock);
|
mutex_lock(&vmpr->events_lock);
|
||||||
|
|
Loading…
Add table
Reference in a new issue