From 79bd9814e5ec9a288d6599f53aeac0b548fdfe52 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 Nov 2013 18:20:42 -0500 Subject: cgroup, memcg: move cgroup_event implementation to memcg cgroup_event is way over-designed and tries to build a generic flexible event mechanism into cgroup - fully customizable event specification for each user of the interface. This is utterly unnecessary and overboard especially in the light of the planned unified hierarchy as there's gonna be single agent. Simply generating events at fixed points, or if that's too restrictive, configureable cadence or single set of configureable points should be enough. Thankfully, memcg is the only user and gets to keep it. Replacing it with something simpler on sane_behavior is strongly recommended. This patch moves cgroup_event and "cgroup.event_control" implementation to mm/memcontrol.c. Clearing of events on cgroup destruction is moved from cgroup_destroy_locked() to mem_cgroup_css_offline(), which shouldn't make any noticeable difference. cgroup_css() and __file_cft() are exported to enable the move; however, this will soon be reverted once the event code is updated to be memcg specific. Note that "cgroup.event_control" will now exist only on the hierarchy with memcg attached to it. While this change is visible to userland, it is unlikely to be noticeable as the file has never been meaningful outside memcg. Aside from the above change, this is pure code relocation. v2: Per Li Zefan's comments, init/Kconfig updated accordingly and poll.h inclusion moved from cgroup.c to memcontrol.c. Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Kirill A. Shutemov Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Balbir Singh --- mm/memcontrol.c | 248 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 248 insertions(+) (limited to 'mm/memcontrol.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 13b9d0f221b8..02dae3292668 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -55,6 +56,7 @@ #include #include #include +#include #include "internal.h" #include #include @@ -226,6 +228,36 @@ struct mem_cgroup_eventfd_list { struct eventfd_ctx *eventfd; }; +/* + * cgroup_event represents events which userspace want to receive. + */ +struct cgroup_event { + /* + * css which the event belongs to. + */ + struct cgroup_subsys_state *css; + /* + * Control file which the event associated. + */ + struct cftype *cft; + /* + * eventfd to signal userspace about the event. + */ + struct eventfd_ctx *eventfd; + /* + * Each of these stored in a list by the cgroup. + */ + struct list_head list; + /* + * All fields below needed to unregister event when + * userspace closes eventfd. + */ + poll_table pt; + wait_queue_head_t *wqh; + wait_queue_t wait; + struct work_struct remove; +}; + static void mem_cgroup_threshold(struct mem_cgroup *memcg); static void mem_cgroup_oom_notify(struct mem_cgroup *memcg); @@ -5947,6 +5979,202 @@ static void kmem_cgroup_css_offline(struct mem_cgroup *memcg) } #endif +/* + * Unregister event and free resources. + * + * Gets called from workqueue. + */ +static void cgroup_event_remove(struct work_struct *work) +{ + struct cgroup_event *event = container_of(work, struct cgroup_event, + remove); + struct cgroup_subsys_state *css = event->css; + + remove_wait_queue(event->wqh, &event->wait); + + event->cft->unregister_event(css, event->cft, event->eventfd); + + /* Notify userspace the event is going away. */ + eventfd_signal(event->eventfd, 1); + + eventfd_ctx_put(event->eventfd); + kfree(event); + css_put(css); +} + +/* + * Gets called on POLLHUP on eventfd when user closes it. + * + * Called with wqh->lock held and interrupts disabled. + */ +static int cgroup_event_wake(wait_queue_t *wait, unsigned mode, + int sync, void *key) +{ + struct cgroup_event *event = container_of(wait, + struct cgroup_event, wait); + struct cgroup *cgrp = event->css->cgroup; + unsigned long flags = (unsigned long)key; + + if (flags & POLLHUP) { + /* + * If the event has been detached at cgroup removal, we + * can simply return knowing the other side will cleanup + * for us. + * + * We can't race against event freeing since the other + * side will require wqh->lock via remove_wait_queue(), + * which we hold. + */ + spin_lock(&cgrp->event_list_lock); + if (!list_empty(&event->list)) { + list_del_init(&event->list); + /* + * We are in atomic context, but cgroup_event_remove() + * may sleep, so we have to call it in workqueue. + */ + schedule_work(&event->remove); + } + spin_unlock(&cgrp->event_list_lock); + } + + return 0; +} + +static void cgroup_event_ptable_queue_proc(struct file *file, + wait_queue_head_t *wqh, poll_table *pt) +{ + struct cgroup_event *event = container_of(pt, + struct cgroup_event, pt); + + event->wqh = wqh; + add_wait_queue(wqh, &event->wait); +} + +/* + * Parse input and register new cgroup event handler. + * + * Input must be in format ' '. + * Interpretation of args is defined by control file implementation. + */ +static int cgroup_write_event_control(struct cgroup_subsys_state *dummy_css, + struct cftype *cft, const char *buffer) +{ + struct cgroup *cgrp = dummy_css->cgroup; + struct cgroup_event *event; + struct cgroup_subsys_state *cfile_css; + unsigned int efd, cfd; + struct fd efile; + struct fd cfile; + char *endp; + int ret; + + efd = simple_strtoul(buffer, &endp, 10); + if (*endp != ' ') + return -EINVAL; + buffer = endp + 1; + + cfd = simple_strtoul(buffer, &endp, 10); + if ((*endp != ' ') && (*endp != '\0')) + return -EINVAL; + buffer = endp + 1; + + event = kzalloc(sizeof(*event), GFP_KERNEL); + if (!event) + return -ENOMEM; + + INIT_LIST_HEAD(&event->list); + init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc); + init_waitqueue_func_entry(&event->wait, cgroup_event_wake); + INIT_WORK(&event->remove, cgroup_event_remove); + + efile = fdget(efd); + if (!efile.file) { + ret = -EBADF; + goto out_kfree; + } + + event->eventfd = eventfd_ctx_fileget(efile.file); + if (IS_ERR(event->eventfd)) { + ret = PTR_ERR(event->eventfd); + goto out_put_efile; + } + + cfile = fdget(cfd); + if (!cfile.file) { + ret = -EBADF; + goto out_put_eventfd; + } + + /* the process need read permission on control file */ + /* AV: shouldn't we check that it's been opened for read instead? */ + ret = inode_permission(file_inode(cfile.file), MAY_READ); + if (ret < 0) + goto out_put_cfile; + + event->cft = __file_cft(cfile.file); + if (IS_ERR(event->cft)) { + ret = PTR_ERR(event->cft); + goto out_put_cfile; + } + + if (!event->cft->ss) { + ret = -EBADF; + goto out_put_cfile; + } + + /* + * Determine the css of @cfile, verify it belongs to the same + * cgroup as cgroup.event_control, and associate @event with it. + * Remaining events are automatically removed on cgroup destruction + * but the removal is asynchronous, so take an extra ref. + */ + rcu_read_lock(); + + ret = -EINVAL; + event->css = cgroup_css(cgrp, event->cft->ss); + cfile_css = css_from_dir(cfile.file->f_dentry->d_parent, event->cft->ss); + if (event->css && event->css == cfile_css && css_tryget(event->css)) + ret = 0; + + rcu_read_unlock(); + if (ret) + goto out_put_cfile; + + if (!event->cft->register_event || !event->cft->unregister_event) { + ret = -EINVAL; + goto out_put_css; + } + + ret = event->cft->register_event(event->css, event->cft, + event->eventfd, buffer); + if (ret) + goto out_put_css; + + efile.file->f_op->poll(efile.file, &event->pt); + + spin_lock(&cgrp->event_list_lock); + list_add(&event->list, &cgrp->event_list); + spin_unlock(&cgrp->event_list_lock); + + fdput(cfile); + fdput(efile); + + return 0; + +out_put_css: + css_put(event->css); +out_put_cfile: + fdput(cfile); +out_put_eventfd: + eventfd_ctx_put(event->eventfd); +out_put_efile: + fdput(efile); +out_kfree: + kfree(event); + + return ret; +} + static struct cftype mem_cgroup_files[] = { { .name = "usage_in_bytes", @@ -5993,6 +6221,12 @@ static struct cftype mem_cgroup_files[] = { .write_u64 = mem_cgroup_hierarchy_write, .read_u64 = mem_cgroup_hierarchy_read, }, + { + .name = "cgroup.event_control", + .write_string = cgroup_write_event_control, + .flags = CFTYPE_NO_PREFIX, + .mode = S_IWUGO, + }, { .name = "swappiness", .read_u64 = mem_cgroup_swappiness_read, @@ -6326,6 +6560,20 @@ static void mem_cgroup_invalidate_reclaim_iterators(struct mem_cgroup *memcg) static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); + struct cgroup *cgrp = css->cgroup; + struct cgroup_event *event, *tmp; + + /* + * Unregister events and notify userspace. + * Notify userspace about cgroup removing only after rmdir of cgroup + * directory to avoid race between userspace and kernelspace. + */ + spin_lock(&cgrp->event_list_lock); + list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) { + list_del_init(&event->list); + schedule_work(&event->remove); + } + spin_unlock(&cgrp->event_list_lock); kmem_cgroup_css_offline(memcg); -- cgit v1.2.3 From b5557c4c3b1a38074d7001b87c2482eda3a0834a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 Nov 2013 18:20:42 -0500 Subject: memcg: cgroup_write_event_control() now knows @css is for memcg @css for cgroup_write_event_control() is now always for memcg and the target file should be a memcg file too. Drop code which assumes @css is dummy_css and the target file may belong to different subsystems. Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Kirill A. Shutemov --- mm/memcontrol.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 02dae3292668..d00368110b08 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6056,10 +6056,10 @@ static void cgroup_event_ptable_queue_proc(struct file *file, * Input must be in format ' '. * Interpretation of args is defined by control file implementation. */ -static int cgroup_write_event_control(struct cgroup_subsys_state *dummy_css, +static int cgroup_write_event_control(struct cgroup_subsys_state *css, struct cftype *cft, const char *buffer) { - struct cgroup *cgrp = dummy_css->cgroup; + struct cgroup *cgrp = css->cgroup; struct cgroup_event *event; struct cgroup_subsys_state *cfile_css; unsigned int efd, cfd; @@ -6082,6 +6082,7 @@ static int cgroup_write_event_control(struct cgroup_subsys_state *dummy_css, if (!event) return -ENOMEM; + event->css = css; INIT_LIST_HEAD(&event->list); init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc); init_waitqueue_func_entry(&event->wait, cgroup_event_wake); @@ -6117,23 +6118,17 @@ static int cgroup_write_event_control(struct cgroup_subsys_state *dummy_css, goto out_put_cfile; } - if (!event->cft->ss) { - ret = -EBADF; - goto out_put_cfile; - } - /* - * Determine the css of @cfile, verify it belongs to the same - * cgroup as cgroup.event_control, and associate @event with it. - * Remaining events are automatically removed on cgroup destruction - * but the removal is asynchronous, so take an extra ref. + * Verify @cfile should belong to @css. Also, remaining events are + * automatically removed on cgroup destruction but the removal is + * asynchronous, so take an extra ref on @css. */ rcu_read_lock(); ret = -EINVAL; - event->css = cgroup_css(cgrp, event->cft->ss); - cfile_css = css_from_dir(cfile.file->f_dentry->d_parent, event->cft->ss); - if (event->css && event->css == cfile_css && css_tryget(event->css)) + cfile_css = css_from_dir(cfile.file->f_dentry->d_parent, + &mem_cgroup_subsys); + if (cfile_css == css && css_tryget(css)) ret = 0; rcu_read_unlock(); @@ -6145,7 +6140,7 @@ static int cgroup_write_event_control(struct cgroup_subsys_state *dummy_css, goto out_put_css; } - ret = event->cft->register_event(event->css, event->cft, + ret = event->cft->register_event(css, event->cft, event->eventfd, buffer); if (ret) goto out_put_css; @@ -6162,7 +6157,7 @@ static int cgroup_write_event_control(struct cgroup_subsys_state *dummy_css, return 0; out_put_css: - css_put(event->css); + css_put(css); out_put_cfile: fdput(cfile); out_put_eventfd: -- cgit v1.2.3 From fba94807837850e211f8975e1970e23e7804ff4d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 Nov 2013 18:20:43 -0500 Subject: cgroup, memcg: move cgroup->event_list[_lock] and event callbacks into memcg cgroup_event is being moved from cgroup core to memcg and the implementation is already moved by the previous patch. This patch moves the data fields and callbacks. * cgroup->event_list[_lock] are moved to mem_cgroup. * cftype->[un]register_event() are moved to cgroup_event. This makes it impossible for individual cftype definitions to specify their event callbacks. This is worked around by simply hard-coding filename to event callback mapping in cgroup_write_event_control(). This is awkward and inflexible, which is actually desirable given that we don't want to grow more usages of this feature. * eventfd_ctx declaration is removed from cgroup.h, which makes vmpressure.h miss eventfd_ctx declaration. Include eventfd.h from vmpressure.h. v2: Use file name from dentry instead of cftype. This will allow removing all cftype handling in the function. Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Kirill A. Shutemov Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Balbir Singh --- mm/memcontrol.c | 87 +++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 60 insertions(+), 27 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d00368110b08..2fcacb18404b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -248,6 +248,22 @@ struct cgroup_event { * Each of these stored in a list by the cgroup. */ struct list_head list; + /* + * register_event() callback will be used to add new userspace + * waiter for changes related to this event. Use eventfd_signal() + * on eventfd to send notification to userspace. + */ + int (*register_event)(struct cgroup_subsys_state *css, + struct cftype *cft, struct eventfd_ctx *eventfd, + const char *args); + /* + * unregister_event() callback will be called when userspace closes + * the eventfd or on cgroup removing. This callback must be set, + * if you want provide notification functionality. + */ + void (*unregister_event)(struct cgroup_subsys_state *css, + struct cftype *cft, + struct eventfd_ctx *eventfd); /* * All fields below needed to unregister event when * userspace closes eventfd. @@ -362,6 +378,10 @@ struct mem_cgroup { atomic_t numainfo_updating; #endif + /* List of events which userspace want to receive */ + struct list_head event_list; + spinlock_t event_list_lock; + struct mem_cgroup_per_node *nodeinfo[0]; /* WARNING: nodeinfo must be the last member here */ }; @@ -5992,7 +6012,7 @@ static void cgroup_event_remove(struct work_struct *work) remove_wait_queue(event->wqh, &event->wait); - event->cft->unregister_event(css, event->cft, event->eventfd); + event->unregister_event(css, event->cft, event->eventfd); /* Notify userspace the event is going away. */ eventfd_signal(event->eventfd, 1); @@ -6012,7 +6032,7 @@ static int cgroup_event_wake(wait_queue_t *wait, unsigned mode, { struct cgroup_event *event = container_of(wait, struct cgroup_event, wait); - struct cgroup *cgrp = event->css->cgroup; + struct mem_cgroup *memcg = mem_cgroup_from_css(event->css); unsigned long flags = (unsigned long)key; if (flags & POLLHUP) { @@ -6025,7 +6045,7 @@ static int cgroup_event_wake(wait_queue_t *wait, unsigned mode, * side will require wqh->lock via remove_wait_queue(), * which we hold. */ - spin_lock(&cgrp->event_list_lock); + spin_lock(&memcg->event_list_lock); if (!list_empty(&event->list)) { list_del_init(&event->list); /* @@ -6034,7 +6054,7 @@ static int cgroup_event_wake(wait_queue_t *wait, unsigned mode, */ schedule_work(&event->remove); } - spin_unlock(&cgrp->event_list_lock); + spin_unlock(&memcg->event_list_lock); } return 0; @@ -6059,12 +6079,13 @@ static void cgroup_event_ptable_queue_proc(struct file *file, static int cgroup_write_event_control(struct cgroup_subsys_state *css, struct cftype *cft, const char *buffer) { - struct cgroup *cgrp = css->cgroup; + struct mem_cgroup *memcg = mem_cgroup_from_css(css); struct cgroup_event *event; struct cgroup_subsys_state *cfile_css; unsigned int efd, cfd; struct fd efile; struct fd cfile; + const char *name; char *endp; int ret; @@ -6118,6 +6139,31 @@ static int cgroup_write_event_control(struct cgroup_subsys_state *css, goto out_put_cfile; } + /* + * Determine the event callbacks and set them in @event. This used + * to be done via struct cftype but cgroup core no longer knows + * about these events. The following is crude but the whole thing + * is for compatibility anyway. + */ + name = cfile.file->f_dentry->d_name.name; + + if (!strcmp(name, "memory.usage_in_bytes")) { + event->register_event = mem_cgroup_usage_register_event; + event->unregister_event = mem_cgroup_usage_unregister_event; + } else if (!strcmp(name, "memory.oom_control")) { + event->register_event = mem_cgroup_oom_register_event; + event->unregister_event = mem_cgroup_oom_unregister_event; + } else if (!strcmp(name, "memory.pressure_level")) { + event->register_event = vmpressure_register_event; + event->unregister_event = vmpressure_unregister_event; + } else if (!strcmp(name, "memory.memsw.usage_in_bytes")) { + event->register_event = mem_cgroup_usage_register_event; + event->unregister_event = mem_cgroup_usage_unregister_event; + } else { + ret = -EINVAL; + goto out_put_cfile; + } + /* * Verify @cfile should belong to @css. Also, remaining events are * automatically removed on cgroup destruction but the removal is @@ -6135,21 +6181,15 @@ static int cgroup_write_event_control(struct cgroup_subsys_state *css, if (ret) goto out_put_cfile; - if (!event->cft->register_event || !event->cft->unregister_event) { - ret = -EINVAL; - goto out_put_css; - } - - ret = event->cft->register_event(css, event->cft, - event->eventfd, buffer); + ret = event->register_event(css, event->cft, event->eventfd, buffer); if (ret) goto out_put_css; efile.file->f_op->poll(efile.file, &event->pt); - spin_lock(&cgrp->event_list_lock); - list_add(&event->list, &cgrp->event_list); - spin_unlock(&cgrp->event_list_lock); + spin_lock(&memcg->event_list_lock); + list_add(&event->list, &memcg->event_list); + spin_unlock(&memcg->event_list_lock); fdput(cfile); fdput(efile); @@ -6175,8 +6215,6 @@ static struct cftype mem_cgroup_files[] = { .name = "usage_in_bytes", .private = MEMFILE_PRIVATE(_MEM, RES_USAGE), .read = mem_cgroup_read, - .register_event = mem_cgroup_usage_register_event, - .unregister_event = mem_cgroup_usage_unregister_event, }, { .name = "max_usage_in_bytes", @@ -6236,14 +6274,10 @@ static struct cftype mem_cgroup_files[] = { .name = "oom_control", .read_map = mem_cgroup_oom_control_read, .write_u64 = mem_cgroup_oom_control_write, - .register_event = mem_cgroup_oom_register_event, - .unregister_event = mem_cgroup_oom_unregister_event, .private = MEMFILE_PRIVATE(_OOM_TYPE, OOM_CONTROL), }, { .name = "pressure_level", - .register_event = vmpressure_register_event, - .unregister_event = vmpressure_unregister_event, }, #ifdef CONFIG_NUMA { @@ -6291,8 +6325,6 @@ static struct cftype memsw_cgroup_files[] = { .name = "memsw.usage_in_bytes", .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE), .read = mem_cgroup_read, - .register_event = mem_cgroup_usage_register_event, - .unregister_event = mem_cgroup_usage_unregister_event, }, { .name = "memsw.max_usage_in_bytes", @@ -6483,6 +6515,8 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) mutex_init(&memcg->thresholds_lock); spin_lock_init(&memcg->move_lock); vmpressure_init(&memcg->vmpressure); + INIT_LIST_HEAD(&memcg->event_list); + spin_lock_init(&memcg->event_list_lock); return &memcg->css; @@ -6555,7 +6589,6 @@ static void mem_cgroup_invalidate_reclaim_iterators(struct mem_cgroup *memcg) static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); - struct cgroup *cgrp = css->cgroup; struct cgroup_event *event, *tmp; /* @@ -6563,12 +6596,12 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) * Notify userspace about cgroup removing only after rmdir of cgroup * directory to avoid race between userspace and kernelspace. */ - spin_lock(&cgrp->event_list_lock); - list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) { + spin_lock(&memcg->event_list_lock); + list_for_each_entry_safe(event, tmp, &memcg->event_list, list) { list_del_init(&event->list); schedule_work(&event->remove); } - spin_unlock(&cgrp->event_list_lock); + spin_unlock(&memcg->event_list_lock); kmem_cgroup_css_offline(memcg); -- cgit v1.2.3 From 347c4a8747104a945ecced358944e42879176ca5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 Nov 2013 18:20:43 -0500 Subject: memcg: remove cgroup_event->cft The only use of cgroup_event->cft is distinguishing "usage_in_bytes" and "memsw.usgae_in_bytes" for mem_cgroup_usage_[un]register_event(), which can be done by adding an explicit argument to the function and implementing two wrappers so that the two cases can be distinguished from the function alone. Remove cgroup_event->cft and the related code including [un]register_events() methods. Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Kirill A. Shutemov Acked-by: Michal Hocko --- mm/memcontrol.c | 65 +++++++++++++++++++++++++++++++-------------------------- 1 file changed, 35 insertions(+), 30 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2fcacb18404b..3c93dcfd26da 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -236,10 +236,6 @@ struct cgroup_event { * css which the event belongs to. */ struct cgroup_subsys_state *css; - /* - * Control file which the event associated. - */ - struct cftype *cft; /* * eventfd to signal userspace about the event. */ @@ -254,15 +250,13 @@ struct cgroup_event { * on eventfd to send notification to userspace. */ int (*register_event)(struct cgroup_subsys_state *css, - struct cftype *cft, struct eventfd_ctx *eventfd, - const char *args); + struct eventfd_ctx *eventfd, const char *args); /* * unregister_event() callback will be called when userspace closes * the eventfd or on cgroup removing. This callback must be set, * if you want provide notification functionality. */ void (*unregister_event)(struct cgroup_subsys_state *css, - struct cftype *cft, struct eventfd_ctx *eventfd); /* * All fields below needed to unregister event when @@ -5688,13 +5682,12 @@ static void mem_cgroup_oom_notify(struct mem_cgroup *memcg) mem_cgroup_oom_notify_cb(iter); } -static int mem_cgroup_usage_register_event(struct cgroup_subsys_state *css, - struct cftype *cft, struct eventfd_ctx *eventfd, const char *args) +static int __mem_cgroup_usage_register_event(struct cgroup_subsys_state *css, + struct eventfd_ctx *eventfd, const char *args, enum res_type type) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); struct mem_cgroup_thresholds *thresholds; struct mem_cgroup_threshold_ary *new; - enum res_type type = MEMFILE_TYPE(cft->private); u64 threshold, usage; int i, size, ret; @@ -5771,13 +5764,24 @@ unlock: return ret; } -static void mem_cgroup_usage_unregister_event(struct cgroup_subsys_state *css, - struct cftype *cft, struct eventfd_ctx *eventfd) +static int mem_cgroup_usage_register_event(struct cgroup_subsys_state *css, + struct eventfd_ctx *eventfd, const char *args) +{ + return __mem_cgroup_usage_register_event(css, eventfd, args, _MEM); +} + +static int memsw_cgroup_usage_register_event(struct cgroup_subsys_state *css, + struct eventfd_ctx *eventfd, const char *args) +{ + return __mem_cgroup_usage_register_event(css, eventfd, args, _MEMSWAP); +} + +static void __mem_cgroup_usage_unregister_event(struct cgroup_subsys_state *css, + struct eventfd_ctx *eventfd, enum res_type type) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); struct mem_cgroup_thresholds *thresholds; struct mem_cgroup_threshold_ary *new; - enum res_type type = MEMFILE_TYPE(cft->private); u64 usage; int i, j, size; @@ -5850,14 +5854,24 @@ unlock: mutex_unlock(&memcg->thresholds_lock); } +static void mem_cgroup_usage_unregister_event(struct cgroup_subsys_state *css, + struct eventfd_ctx *eventfd) +{ + return __mem_cgroup_usage_unregister_event(css, eventfd, _MEM); +} + +static void memsw_cgroup_usage_unregister_event(struct cgroup_subsys_state *css, + struct eventfd_ctx *eventfd) +{ + return __mem_cgroup_usage_unregister_event(css, eventfd, _MEMSWAP); +} + static int mem_cgroup_oom_register_event(struct cgroup_subsys_state *css, - struct cftype *cft, struct eventfd_ctx *eventfd, const char *args) + struct eventfd_ctx *eventfd, const char *args) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); struct mem_cgroup_eventfd_list *event; - enum res_type type = MEMFILE_TYPE(cft->private); - BUG_ON(type != _OOM_TYPE); event = kmalloc(sizeof(*event), GFP_KERNEL); if (!event) return -ENOMEM; @@ -5876,13 +5890,10 @@ static int mem_cgroup_oom_register_event(struct cgroup_subsys_state *css, } static void mem_cgroup_oom_unregister_event(struct cgroup_subsys_state *css, - struct cftype *cft, struct eventfd_ctx *eventfd) + struct eventfd_ctx *eventfd) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); struct mem_cgroup_eventfd_list *ev, *tmp; - enum res_type type = MEMFILE_TYPE(cft->private); - - BUG_ON(type != _OOM_TYPE); spin_lock(&memcg_oom_lock); @@ -6012,7 +6023,7 @@ static void cgroup_event_remove(struct work_struct *work) remove_wait_queue(event->wqh, &event->wait); - event->unregister_event(css, event->cft, event->eventfd); + event->unregister_event(css, event->eventfd); /* Notify userspace the event is going away. */ eventfd_signal(event->eventfd, 1); @@ -6133,12 +6144,6 @@ static int cgroup_write_event_control(struct cgroup_subsys_state *css, if (ret < 0) goto out_put_cfile; - event->cft = __file_cft(cfile.file); - if (IS_ERR(event->cft)) { - ret = PTR_ERR(event->cft); - goto out_put_cfile; - } - /* * Determine the event callbacks and set them in @event. This used * to be done via struct cftype but cgroup core no longer knows @@ -6157,8 +6162,8 @@ static int cgroup_write_event_control(struct cgroup_subsys_state *css, event->register_event = vmpressure_register_event; event->unregister_event = vmpressure_unregister_event; } else if (!strcmp(name, "memory.memsw.usage_in_bytes")) { - event->register_event = mem_cgroup_usage_register_event; - event->unregister_event = mem_cgroup_usage_unregister_event; + event->register_event = memsw_cgroup_usage_register_event; + event->unregister_event = memsw_cgroup_usage_unregister_event; } else { ret = -EINVAL; goto out_put_cfile; @@ -6181,7 +6186,7 @@ static int cgroup_write_event_control(struct cgroup_subsys_state *css, if (ret) goto out_put_cfile; - ret = event->register_event(css, event->cft, event->eventfd, buffer); + ret = event->register_event(css, event->eventfd, buffer); if (ret) goto out_put_css; -- cgit v1.2.3 From 59b6f87344ab5eb3057e5844b8cd8a39e668f477 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 Nov 2013 18:20:43 -0500 Subject: memcg: make cgroup_event deal with mem_cgroup instead of cgroup_subsys_state cgroup_event is now memcg specific. Replace cgroup_event->css with ->memcg and convert [un]register_event() callbacks to take mem_cgroup pointer instead of cgroup_subsys_state one. This simplifies the code slightly and makes css_to_vmpressure() unnecessary which is removed. Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Kirill A. Shutemov Acked-by: Michal Hocko --- mm/memcontrol.c | 53 ++++++++++++++++++++++------------------------------- 1 file changed, 22 insertions(+), 31 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 3c93dcfd26da..42f2843af1a7 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -233,9 +233,9 @@ struct mem_cgroup_eventfd_list { */ struct cgroup_event { /* - * css which the event belongs to. + * memcg which the event belongs to. */ - struct cgroup_subsys_state *css; + struct mem_cgroup *memcg; /* * eventfd to signal userspace about the event. */ @@ -249,14 +249,14 @@ struct cgroup_event { * waiter for changes related to this event. Use eventfd_signal() * on eventfd to send notification to userspace. */ - int (*register_event)(struct cgroup_subsys_state *css, + int (*register_event)(struct mem_cgroup *memcg, struct eventfd_ctx *eventfd, const char *args); /* * unregister_event() callback will be called when userspace closes * the eventfd or on cgroup removing. This callback must be set, * if you want provide notification functionality. */ - void (*unregister_event)(struct cgroup_subsys_state *css, + void (*unregister_event)(struct mem_cgroup *memcg, struct eventfd_ctx *eventfd); /* * All fields below needed to unregister event when @@ -535,11 +535,6 @@ struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr) return &container_of(vmpr, struct mem_cgroup, vmpressure)->css; } -struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css) -{ - return &mem_cgroup_from_css(css)->vmpressure; -} - static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) { return (memcg == root_mem_cgroup); @@ -5682,10 +5677,9 @@ static void mem_cgroup_oom_notify(struct mem_cgroup *memcg) mem_cgroup_oom_notify_cb(iter); } -static int __mem_cgroup_usage_register_event(struct cgroup_subsys_state *css, +static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg, struct eventfd_ctx *eventfd, const char *args, enum res_type type) { - struct mem_cgroup *memcg = mem_cgroup_from_css(css); struct mem_cgroup_thresholds *thresholds; struct mem_cgroup_threshold_ary *new; u64 threshold, usage; @@ -5764,22 +5758,21 @@ unlock: return ret; } -static int mem_cgroup_usage_register_event(struct cgroup_subsys_state *css, +static int mem_cgroup_usage_register_event(struct mem_cgroup *memcg, struct eventfd_ctx *eventfd, const char *args) { - return __mem_cgroup_usage_register_event(css, eventfd, args, _MEM); + return __mem_cgroup_usage_register_event(memcg, eventfd, args, _MEM); } -static int memsw_cgroup_usage_register_event(struct cgroup_subsys_state *css, +static int memsw_cgroup_usage_register_event(struct mem_cgroup *memcg, struct eventfd_ctx *eventfd, const char *args) { - return __mem_cgroup_usage_register_event(css, eventfd, args, _MEMSWAP); + return __mem_cgroup_usage_register_event(memcg, eventfd, args, _MEMSWAP); } -static void __mem_cgroup_usage_unregister_event(struct cgroup_subsys_state *css, +static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg, struct eventfd_ctx *eventfd, enum res_type type) { - struct mem_cgroup *memcg = mem_cgroup_from_css(css); struct mem_cgroup_thresholds *thresholds; struct mem_cgroup_threshold_ary *new; u64 usage; @@ -5854,22 +5847,21 @@ unlock: mutex_unlock(&memcg->thresholds_lock); } -static void mem_cgroup_usage_unregister_event(struct cgroup_subsys_state *css, +static void mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg, struct eventfd_ctx *eventfd) { - return __mem_cgroup_usage_unregister_event(css, eventfd, _MEM); + return __mem_cgroup_usage_unregister_event(memcg, eventfd, _MEM); } -static void memsw_cgroup_usage_unregister_event(struct cgroup_subsys_state *css, +static void memsw_cgroup_usage_unregister_event(struct mem_cgroup *memcg, struct eventfd_ctx *eventfd) { - return __mem_cgroup_usage_unregister_event(css, eventfd, _MEMSWAP); + return __mem_cgroup_usage_unregister_event(memcg, eventfd, _MEMSWAP); } -static int mem_cgroup_oom_register_event(struct cgroup_subsys_state *css, +static int mem_cgroup_oom_register_event(struct mem_cgroup *memcg, struct eventfd_ctx *eventfd, const char *args) { - struct mem_cgroup *memcg = mem_cgroup_from_css(css); struct mem_cgroup_eventfd_list *event; event = kmalloc(sizeof(*event), GFP_KERNEL); @@ -5889,10 +5881,9 @@ static int mem_cgroup_oom_register_event(struct cgroup_subsys_state *css, return 0; } -static void mem_cgroup_oom_unregister_event(struct cgroup_subsys_state *css, +static void mem_cgroup_oom_unregister_event(struct mem_cgroup *memcg, struct eventfd_ctx *eventfd) { - struct mem_cgroup *memcg = mem_cgroup_from_css(css); struct mem_cgroup_eventfd_list *ev, *tmp; spin_lock(&memcg_oom_lock); @@ -6019,18 +6010,18 @@ static void cgroup_event_remove(struct work_struct *work) { struct cgroup_event *event = container_of(work, struct cgroup_event, remove); - struct cgroup_subsys_state *css = event->css; + struct mem_cgroup *memcg = event->memcg; remove_wait_queue(event->wqh, &event->wait); - event->unregister_event(css, event->eventfd); + event->unregister_event(memcg, event->eventfd); /* Notify userspace the event is going away. */ eventfd_signal(event->eventfd, 1); eventfd_ctx_put(event->eventfd); kfree(event); - css_put(css); + css_put(&memcg->css); } /* @@ -6043,7 +6034,7 @@ static int cgroup_event_wake(wait_queue_t *wait, unsigned mode, { struct cgroup_event *event = container_of(wait, struct cgroup_event, wait); - struct mem_cgroup *memcg = mem_cgroup_from_css(event->css); + struct mem_cgroup *memcg = event->memcg; unsigned long flags = (unsigned long)key; if (flags & POLLHUP) { @@ -6114,7 +6105,7 @@ static int cgroup_write_event_control(struct cgroup_subsys_state *css, if (!event) return -ENOMEM; - event->css = css; + event->memcg = memcg; INIT_LIST_HEAD(&event->list); init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc); init_waitqueue_func_entry(&event->wait, cgroup_event_wake); @@ -6186,7 +6177,7 @@ static int cgroup_write_event_control(struct cgroup_subsys_state *css, if (ret) goto out_put_cfile; - ret = event->register_event(css, event->eventfd, buffer); + ret = event->register_event(memcg, event->eventfd, buffer); if (ret) goto out_put_css; -- cgit v1.2.3 From 3bc942f372af383f49d56aab599469561a5e39ec Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 Nov 2013 18:20:44 -0500 Subject: memcg: rename cgroup_event to mem_cgroup_event cgroup_event is only available in memcg now. Let's brand it that way. While at it, add a comment encouraging deprecation of the feature and remove the respective section from cgroup documentation. This patch is cosmetic. v3: Typo update as per Li Zefan. v2: Index in cgroups.txt updated accordingly as suggested by Li Zefan. Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Kirill A. Shutemov Acked-by: Michal Hocko --- mm/memcontrol.c | 57 +++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 20 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 42f2843af1a7..ec8582b3a232 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -231,7 +231,7 @@ struct mem_cgroup_eventfd_list { /* * cgroup_event represents events which userspace want to receive. */ -struct cgroup_event { +struct mem_cgroup_event { /* * memcg which the event belongs to. */ @@ -6001,15 +6001,28 @@ static void kmem_cgroup_css_offline(struct mem_cgroup *memcg) } #endif +/* + * DO NOT USE IN NEW FILES. + * + * "cgroup.event_control" implementation. + * + * This is way over-engineered. It tries to support fully configurable + * events for each user. Such level of flexibility is completely + * unnecessary especially in the light of the planned unified hierarchy. + * + * Please deprecate this and replace with something simpler if at all + * possible. + */ + /* * Unregister event and free resources. * * Gets called from workqueue. */ -static void cgroup_event_remove(struct work_struct *work) +static void memcg_event_remove(struct work_struct *work) { - struct cgroup_event *event = container_of(work, struct cgroup_event, - remove); + struct mem_cgroup_event *event = + container_of(work, struct mem_cgroup_event, remove); struct mem_cgroup *memcg = event->memcg; remove_wait_queue(event->wqh, &event->wait); @@ -6029,11 +6042,11 @@ static void cgroup_event_remove(struct work_struct *work) * * Called with wqh->lock held and interrupts disabled. */ -static int cgroup_event_wake(wait_queue_t *wait, unsigned mode, - int sync, void *key) +static int memcg_event_wake(wait_queue_t *wait, unsigned mode, + int sync, void *key) { - struct cgroup_event *event = container_of(wait, - struct cgroup_event, wait); + struct mem_cgroup_event *event = + container_of(wait, struct mem_cgroup_event, wait); struct mem_cgroup *memcg = event->memcg; unsigned long flags = (unsigned long)key; @@ -6062,27 +6075,29 @@ static int cgroup_event_wake(wait_queue_t *wait, unsigned mode, return 0; } -static void cgroup_event_ptable_queue_proc(struct file *file, +static void memcg_event_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, poll_table *pt) { - struct cgroup_event *event = container_of(pt, - struct cgroup_event, pt); + struct mem_cgroup_event *event = + container_of(pt, struct mem_cgroup_event, pt); event->wqh = wqh; add_wait_queue(wqh, &event->wait); } /* + * DO NOT USE IN NEW FILES. + * * Parse input and register new cgroup event handler. * * Input must be in format ' '. * Interpretation of args is defined by control file implementation. */ -static int cgroup_write_event_control(struct cgroup_subsys_state *css, - struct cftype *cft, const char *buffer) +static int memcg_write_event_control(struct cgroup_subsys_state *css, + struct cftype *cft, const char *buffer) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); - struct cgroup_event *event; + struct mem_cgroup_event *event; struct cgroup_subsys_state *cfile_css; unsigned int efd, cfd; struct fd efile; @@ -6107,9 +6122,9 @@ static int cgroup_write_event_control(struct cgroup_subsys_state *css, event->memcg = memcg; INIT_LIST_HEAD(&event->list); - init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc); - init_waitqueue_func_entry(&event->wait, cgroup_event_wake); - INIT_WORK(&event->remove, cgroup_event_remove); + init_poll_funcptr(&event->pt, memcg_event_ptable_queue_proc); + init_waitqueue_func_entry(&event->wait, memcg_event_wake); + INIT_WORK(&event->remove, memcg_event_remove); efile = fdget(efd); if (!efile.file) { @@ -6140,6 +6155,8 @@ static int cgroup_write_event_control(struct cgroup_subsys_state *css, * to be done via struct cftype but cgroup core no longer knows * about these events. The following is crude but the whole thing * is for compatibility anyway. + * + * DO NOT ADD NEW FILES. */ name = cfile.file->f_dentry->d_name.name; @@ -6251,8 +6268,8 @@ static struct cftype mem_cgroup_files[] = { .read_u64 = mem_cgroup_hierarchy_read, }, { - .name = "cgroup.event_control", - .write_string = cgroup_write_event_control, + .name = "cgroup.event_control", /* XXX: for compat */ + .write_string = memcg_write_event_control, .flags = CFTYPE_NO_PREFIX, .mode = S_IWUGO, }, @@ -6585,7 +6602,7 @@ static void mem_cgroup_invalidate_reclaim_iterators(struct mem_cgroup *memcg) static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); - struct cgroup_event *event, *tmp; + struct mem_cgroup_event *event, *tmp; /* * Unregister events and notify userspace. -- cgit v1.2.3