From 22933152934f30de6f05b600c03f8a08f853a8d2 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 18 Dec 2012 14:22:59 -0800 Subject: [PATCH] memcg/sl[au]b: shrink dead caches This means that when we destroy a memcg cache that happened to be empty, those caches may take a lot of time to go away: removing the memcg reference won't destroy them - because there are pending references, and the empty pages will stay there, until a shrinker is called upon for any reason. In this patch, we will call kmem_cache_shrink() for all dead caches that cannot be destroyed because of remaining pages. After shrinking, it is possible that it could be freed. If this is not the case, we'll schedule a lazy worker to keep trying. Signed-off-by: Glauber Costa Cc: Christoph Lameter Cc: David Rientjes Cc: Frederic Weisbecker Cc: Greg Thelen Cc: Johannes Weiner Cc: JoonSoo Kim Cc: KAMEZAWA Hiroyuki Cc: Mel Gorman Cc: Michal Hocko Cc: Pekka Enberg Cc: Rik van Riel Cc: Suleiman Souhlal Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 46 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 4b68ec2c8df..7633e0d429e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3080,7 +3080,27 @@ static void kmem_cache_destroy_work_func(struct work_struct *w) cachep = memcg_params_to_cache(p); - if (!atomic_read(&cachep->memcg_params->nr_pages)) + /* + * If we get down to 0 after shrink, we could delete right away. + * However, memcg_release_pages() already puts us back in the workqueue + * in that case. If we proceed deleting, we'll get a dangling + * reference, and removing the object from the workqueue in that case + * is unnecessary complication. We are not a fast path. + * + * Note that this case is fundamentally different from racing with + * shrink_slab(): if memcg_cgroup_destroy_cache() is called in + * kmem_cache_shrink, not only we would be reinserting a dead cache + * into the queue, but doing so from inside the worker racing to + * destroy it. + * + * So if we aren't down to zero, we'll just schedule a worker and try + * again + */ + if (atomic_read(&cachep->memcg_params->nr_pages) != 0) { + kmem_cache_shrink(cachep); + if (atomic_read(&cachep->memcg_params->nr_pages) == 0) + return; + } else kmem_cache_destroy(cachep); } @@ -3090,6 +3110,26 @@ void mem_cgroup_destroy_cache(struct kmem_cache *cachep) return; /* + * There are many ways in which we can get here. + * + * We can get to a memory-pressure situation while the delayed work is + * still pending to run. The vmscan shrinkers can then release all + * cache memory and get us to destruction. If this is the case, we'll + * be executed twice, which is a bug (the second time will execute over + * bogus data). In this case, cancelling the work should be fine. + * + * But we can also get here from the worker itself, if + * kmem_cache_shrink is enough to shake all the remaining objects and + * get the page count to 0. In this case, we'll deadlock if we try to + * cancel the work (the worker runs with an internal lock held, which + * is the same lock we would hold for cancel_work_sync().) + * + * Since we can't possibly know who got us here, just refrain from + * running if there is already work pending + */ + if (work_pending(&cachep->memcg_params->destroy)) + return; + /* * We have to defer the actual destroying to a workqueue, because * we might currently be in a context that cannot sleep. */ @@ -3217,7 +3257,7 @@ void kmem_cache_destroy_memcg_children(struct kmem_cache *s) * set, so flip it down to guarantee we are in control. */ c->memcg_params->dead = false; - cancel_delayed_work_sync(&c->memcg_params->destroy); + cancel_work_sync(&c->memcg_params->destroy); kmem_cache_destroy(c); } mutex_unlock(&set_limit_mutex); @@ -3242,7 +3282,7 @@ static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg) cachep = memcg_params_to_cache(params); cachep->memcg_params->dead = true; INIT_WORK(&cachep->memcg_params->destroy, - kmem_cache_destroy_work_func); + kmem_cache_destroy_work_func); schedule_work(&cachep->memcg_params->destroy); } mutex_unlock(&memcg->slab_caches_mutex); -- 2.11.4.GIT