memcg/sl[au]b: track all the memcg children of a kmem_cache

[deliverable/linux.git] / mm / slab_common.c
diff --git a/mm/slab_common.c b/mm/slab_common.c

index 2e4b4c6d89e2ef10c59b1ff1d90533da1c085372..080a43804bf1aa11fd50e9ee249416ed0b9c1f11 100644 (file)
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -18,6 +18,7 @@
  #include <asm/cacheflush.h>
  #include <asm/tlbflush.h>
  #include <asm/page.h>
+#include <linux/memcontrol.h>
  
  #include "slab.h"
  
@@ -27,7 +28,8 @@ DEFINE_MUTEX(slab_mutex);
  struct kmem_cache *kmem_cache;
  
  #ifdef CONFIG_DEBUG_VM
-static int kmem_cache_sanity_check(const char *name, size_t size)
+static int kmem_cache_sanity_check(struct mem_cgroup *memcg, const char *name,
+                                  size_t size)
  {
         struct kmem_cache *s = NULL;
  
@@ -53,7 +55,13 @@ static int kmem_cache_sanity_check(const char *name, size_t size)
                         continue;
                 }
  
-               if (!strcmp(s->name, name)) {
+               /*
+                * For simplicity, we won't check this in the list of memcg
+                * caches. We have control over memcg naming, and if there
+                * aren't duplicates in the global list, there won't be any
+                * duplicates in the memcg lists as well.
+                */
+               if (!memcg && !strcmp(s->name, name)) {
                         pr_err("%s (%s): Cache name already exists.\n",
                                __func__, name);
                         dump_stack();
@@ -66,12 +74,69 @@ static int kmem_cache_sanity_check(const char *name, size_t size)
         return 0;
  }
  #else
-static inline int kmem_cache_sanity_check(const char *name, size_t size)
+static inline int kmem_cache_sanity_check(struct mem_cgroup *memcg,
+                                         const char *name, size_t size)
  {
         return 0;
  }
  #endif
  
+#ifdef CONFIG_MEMCG_KMEM
+int memcg_update_all_caches(int num_memcgs)
+{
+       struct kmem_cache *s;
+       int ret = 0;
+       mutex_lock(&slab_mutex);
+
+       list_for_each_entry(s, &slab_caches, list) {
+               if (!is_root_cache(s))
+                       continue;
+
+               ret = memcg_update_cache_size(s, num_memcgs);
+               /*
+                * See comment in memcontrol.c, memcg_update_cache_size:
+                * Instead of freeing the memory, we'll just leave the caches
+                * up to this point in an updated state.
+                */
+               if (ret)
+                       goto out;
+       }
+
+       memcg_update_array_size(num_memcgs);
+out:
+       mutex_unlock(&slab_mutex);
+       return ret;
+}
+#endif
+
+/*
+ * Figure out what the alignment of the objects will be given a set of
+ * flags, a user specified alignment and the size of the objects.
+ */
+unsigned long calculate_alignment(unsigned long flags,
+               unsigned long align, unsigned long size)
+{
+       /*
+        * If the user wants hardware cache aligned objects then follow that
+        * suggestion if the object is sufficiently large.
+        *
+        * The hardware cache alignment cannot override the specified
+        * alignment though. If that is greater then use it.
+        */
+       if (flags & SLAB_HWCACHE_ALIGN) {
+               unsigned long ralign = cache_line_size();
+               while (size <= ralign / 2)
+                       ralign /= 2;
+               align = max(align, ralign);
+       }
+
+       if (align < ARCH_SLAB_MINALIGN)
+               align = ARCH_SLAB_MINALIGN;
+
+       return ALIGN(align, sizeof(void *));
+}
+
+
  /*
   * kmem_cache_create - Create a cache.
   * @name: A string which is used in /proc/slabinfo to identify this cache.
@@ -97,8 +162,9 @@ static inline int kmem_cache_sanity_check(const char *name, size_t size)
   * as davem.
   */
  
-struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align,
-               unsigned long flags, void (*ctor)(void *))
+struct kmem_cache *
+kmem_cache_create_memcg(struct mem_cgroup *memcg, const char *name, size_t size,
+                       size_t align, unsigned long flags, void (*ctor)(void *))
  {
         struct kmem_cache *s = NULL;
         int err = 0;
@@ -106,19 +172,33 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align
         get_online_cpus();
         mutex_lock(&slab_mutex);
  
-       if (!kmem_cache_sanity_check(name, size) == 0)
+       if (!kmem_cache_sanity_check(memcg, name, size) == 0)
                 goto out_locked;
  
+       /*
+        * Some allocators will constraint the set of valid flags to a subset
+        * of all flags. We expect them to define CACHE_CREATE_MASK in this
+        * case, and we'll just provide them with a sanitized version of the
+        * passed flags.
+        */
+       flags &= CACHE_CREATE_MASK;
  
-       s = __kmem_cache_alias(name, size, align, flags, ctor);
+       s = __kmem_cache_alias(memcg, name, size, align, flags, ctor);
         if (s)
                 goto out_locked;
  
         s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL);
         if (s) {
                 s->object_size = s->size = size;
-               s->align = align;
+               s->align = calculate_alignment(flags, align, size);
                 s->ctor = ctor;
+
+               if (memcg_register_cache(memcg, s)) {
+                       kmem_cache_free(kmem_cache, s);
+                       err = -ENOMEM;
+                       goto out_locked;
+               }
+
                 s->name = kstrdup(name, GFP_KERNEL);
                 if (!s->name) {
                         kmem_cache_free(kmem_cache, s);
@@ -128,10 +208,9 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align
  
                 err = __kmem_cache_create(s, flags);
                 if (!err) {
-
                         s->refcount = 1;
                         list_add(&s->list, &slab_caches);
-
+                       memcg_cache_list_add(memcg, s);
                 } else {
                         kfree(s->name);
                         kmem_cache_free(kmem_cache, s);
@@ -159,10 +238,20 @@ out_locked:
  
         return s;
  }
+
+struct kmem_cache *
+kmem_cache_create(const char *name, size_t size, size_t align,
+                 unsigned long flags, void (*ctor)(void *))
+{
+       return kmem_cache_create_memcg(NULL, name, size, align, flags, ctor);
+}
  EXPORT_SYMBOL(kmem_cache_create);
  
  void kmem_cache_destroy(struct kmem_cache *s)
  {
+       /* Destroy all the children caches if we aren't a memcg cache */
+       kmem_cache_destroy_memcg_children(s);
+
         get_online_cpus();
         mutex_lock(&slab_mutex);
         s->refcount--;
@@ -174,6 +263,7 @@ void kmem_cache_destroy(struct kmem_cache *s)
                         if (s->flags & SLAB_DESTROY_BY_RCU)
                                 rcu_barrier();
  
+                       memcg_release_cache(s);
                         kfree(s->name);
                         kmem_cache_free(kmem_cache, s);
                 } else {
@@ -195,7 +285,66 @@ int slab_is_available(void)
         return slab_state >= UP;
  }
  
+#ifndef CONFIG_SLOB
+/* Create a cache during boot when no slab services are available yet */
+void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t size,
+               unsigned long flags)
+{
+       int err;
+
+       s->name = name;
+       s->size = s->object_size = size;
+       s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size);
+       err = __kmem_cache_create(s, flags);
+
+       if (err)
+               panic("Creation of kmalloc slab %s size=%zd failed. Reason %d\n",
+                                       name, size, err);
+
+       s->refcount = -1;       /* Exempt from merging for now */
+}
+
+struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size,
+                               unsigned long flags)
+{
+       struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
+
+       if (!s)
+               panic("Out of memory when creating slab %s\n", name);
+
+       create_boot_cache(s, name, size, flags);
+       list_add(&s->list, &slab_caches);
+       s->refcount = 1;
+       return s;
+}
+
+#endif /* !CONFIG_SLOB */
+
+
  #ifdef CONFIG_SLABINFO
+static void print_slabinfo_header(struct seq_file *m)
+{
+       /*
+        * Output format version, so at least we can change it
+        * without _too_ many complaints.
+        */
+#ifdef CONFIG_DEBUG_SLAB
+       seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
+#else
+       seq_puts(m, "slabinfo - version: 2.1\n");
+#endif
+       seq_puts(m, "# name            <active_objs> <num_objs> <objsize> "
+                "<objperslab> <pagesperslab>");
+       seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
+       seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
+#ifdef CONFIG_DEBUG_SLAB
+       seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> "
+                "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
+       seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
+#endif
+       seq_putc(m, '\n');
+}
+
  static void *s_start(struct seq_file *m, loff_t *pos)
  {
         loff_t n = *pos;
@@ -219,7 +368,23 @@ static void s_stop(struct seq_file *m, void *p)
  
  static int s_show(struct seq_file *m, void *p)
  {
-       return slabinfo_show(m, p);
+       struct kmem_cache *s = list_entry(p, struct kmem_cache, list);
+       struct slabinfo sinfo;
+
+       memset(&sinfo, 0, sizeof(sinfo));
+       get_slabinfo(s, &sinfo);
+
+       seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
+                  s->name, sinfo.active_objs, sinfo.num_objs, s->size,
+                  sinfo.objects_per_slab, (1 << sinfo.cache_order));
+
+       seq_printf(m, " : tunables %4u %4u %4u",
+                  sinfo.limit, sinfo.batchcount, sinfo.shared);
+       seq_printf(m, " : slabdata %6lu %6lu %6lu",
+                  sinfo.active_slabs, sinfo.num_slabs, sinfo.shared_avail);
+       slabinfo_show_stats(m, s);
+       seq_putc(m, '\n');
+       return 0;
  }
  
  /*