Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input
[deliverable/linux.git] / mm / vmscan.c
index dd984470248fe3a137d211be0789f8aa8446f95a..142cb61f4822454bf3819a4c10c4e8b479a70ec8 100644 (file)
@@ -382,9 +382,8 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
  *
  * @memcg specifies the memory cgroup to target. If it is not NULL,
  * only shrinkers with SHRINKER_MEMCG_AWARE set will be called to scan
- * objects from the memory cgroup specified. Otherwise all shrinkers
- * are called, and memcg aware shrinkers are supposed to scan the
- * global list then.
+ * objects from the memory cgroup specified. Otherwise, only unaware
+ * shrinkers are called.
  *
  * @nr_scanned and @nr_eligible form a ratio that indicate how much of
  * the available objects should be scanned.  Page reclaim for example
@@ -404,7 +403,7 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
        struct shrinker *shrinker;
        unsigned long freed = 0;
 
-       if (memcg && !memcg_kmem_online(memcg))
+       if (memcg && (!memcg_kmem_enabled() || !mem_cgroup_online(memcg)))
                return 0;
 
        if (nr_scanned == 0)
@@ -428,7 +427,13 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
                        .memcg = memcg,
                };
 
-               if (memcg && !(shrinker->flags & SHRINKER_MEMCG_AWARE))
+               /*
+                * If kernel memory accounting is disabled, we ignore
+                * SHRINKER_MEMCG_AWARE flag and call all shrinkers
+                * passing NULL for memcg.
+                */
+               if (memcg_kmem_enabled() &&
+                   !!memcg != !!(shrinker->flags & SHRINKER_MEMCG_AWARE))
                        continue;
 
                if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
@@ -633,11 +638,11 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
         * Note that if SetPageDirty is always performed via set_page_dirty,
         * and thus under tree_lock, then this ordering is not required.
         */
-       if (!page_freeze_refs(page, 2))
+       if (!page_ref_freeze(page, 2))
                goto cannot_free;
        /* note: atomic_cmpxchg in page_freeze_refs provides the smp_rmb */
        if (unlikely(PageDirty(page))) {
-               page_unfreeze_refs(page, 2);
+               page_ref_unfreeze(page, 2);
                goto cannot_free;
        }
 
@@ -699,7 +704,7 @@ int remove_mapping(struct address_space *mapping, struct page *page)
                 * drops the pagecache ref for us without requiring another
                 * atomic operation.
                 */
-               page_unfreeze_refs(page, 1);
+               page_ref_unfreeze(page, 1);
                return 1;
        }
        return 0;
@@ -2548,7 +2553,7 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
                sc->gfp_mask |= __GFP_HIGHMEM;
 
        for_each_zone_zonelist_nodemask(zone, z, zonelist,
-                                       requested_highidx, sc->nodemask) {
+                                       gfp_zone(sc->gfp_mask), sc->nodemask) {
                enum zone_type classzone_idx;
 
                if (!populated_zone(zone))
@@ -2968,18 +2973,23 @@ static void age_active_anon(struct zone *zone, struct scan_control *sc)
        } while (memcg);
 }
 
-static bool zone_balanced(struct zone *zone, int order,
-                         unsigned long balance_gap, int classzone_idx)
+static bool zone_balanced(struct zone *zone, int order, bool highorder,
+                       unsigned long balance_gap, int classzone_idx)
 {
-       if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone) +
-                                   balance_gap, classzone_idx))
-               return false;
+       unsigned long mark = high_wmark_pages(zone) + balance_gap;
 
-       if (IS_ENABLED(CONFIG_COMPACTION) && order && compaction_suitable(zone,
-                               order, 0, classzone_idx) == COMPACT_SKIPPED)
-               return false;
+       /*
+        * When checking from pgdat_balanced(), kswapd should stop and sleep
+        * when it reaches the high order-0 watermark and let kcompactd take
+        * over. Other callers such as wakeup_kswapd() want to determine the
+        * true high-order watermark.
+        */
+       if (IS_ENABLED(CONFIG_COMPACTION) && !highorder) {
+               mark += (1UL << order);
+               order = 0;
+       }
 
-       return true;
+       return zone_watermark_ok_safe(zone, order, mark, classzone_idx);
 }
 
 /*
@@ -3029,7 +3039,7 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int classzone_idx)
                        continue;
                }
 
-               if (zone_balanced(zone, order, 0, i))
+               if (zone_balanced(zone, order, false, 0, i))
                        balanced_pages += zone->managed_pages;
                else if (!order)
                        return false;
@@ -3083,27 +3093,14 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining,
  */
 static bool kswapd_shrink_zone(struct zone *zone,
                               int classzone_idx,
-                              struct scan_control *sc,
-                              unsigned long *nr_attempted)
+                              struct scan_control *sc)
 {
-       int testorder = sc->order;
        unsigned long balance_gap;
        bool lowmem_pressure;
 
        /* Reclaim above the high watermark. */
        sc->nr_to_reclaim = max(SWAP_CLUSTER_MAX, high_wmark_pages(zone));
 
-       /*
-        * Kswapd reclaims only single pages with compaction enabled. Trying
-        * too hard to reclaim until contiguous free pages have become
-        * available can hurt performance by evicting too much useful data
-        * from memory. Do not reclaim more than needed for compaction.
-        */
-       if (IS_ENABLED(CONFIG_COMPACTION) && sc->order &&
-                       compaction_suitable(zone, sc->order, 0, classzone_idx)
-                                                       != COMPACT_SKIPPED)
-               testorder = 0;
-
        /*
         * We put equal pressure on every zone, unless one zone has way too
         * many pages free already. The "too many pages" is defined as the
@@ -3118,15 +3115,12 @@ static bool kswapd_shrink_zone(struct zone *zone,
         * reclaim is necessary
         */
        lowmem_pressure = (buffer_heads_over_limit && is_highmem(zone));
-       if (!lowmem_pressure && zone_balanced(zone, testorder,
+       if (!lowmem_pressure && zone_balanced(zone, sc->order, false,
                                                balance_gap, classzone_idx))
                return true;
 
        shrink_zone(zone, sc, zone_idx(zone) == classzone_idx);
 
-       /* Account for the number of pages attempted to reclaim */
-       *nr_attempted += sc->nr_to_reclaim;
-
        clear_bit(ZONE_WRITEBACK, &zone->flags);
 
        /*
@@ -3136,7 +3130,7 @@ static bool kswapd_shrink_zone(struct zone *zone,
         * waits.
         */
        if (zone_reclaimable(zone) &&
-           zone_balanced(zone, testorder, 0, classzone_idx)) {
+           zone_balanced(zone, sc->order, false, 0, classzone_idx)) {
                clear_bit(ZONE_CONGESTED, &zone->flags);
                clear_bit(ZONE_DIRTY, &zone->flags);
        }
@@ -3148,7 +3142,7 @@ static bool kswapd_shrink_zone(struct zone *zone,
  * For kswapd, balance_pgdat() will work across all this node's zones until
  * they are all at high_wmark_pages(zone).
  *
- * Returns the final order kswapd was reclaiming at
+ * Returns the highest zone idx kswapd was reclaiming at
  *
  * There is special handling here for zones which are full of pinned pages.
  * This can happen if the pages are all mlocked, or if they are all used by
@@ -3165,8 +3159,7 @@ static bool kswapd_shrink_zone(struct zone *zone,
  * interoperates with the page allocator fallback scheme to ensure that aging
  * of pages is balanced across the zones.
  */
-static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
-                                                       int *classzone_idx)
+static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
 {
        int i;
        int end_zone = 0;       /* Inclusive.  0 = ZONE_DMA */
@@ -3183,9 +3176,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
        count_vm_event(PAGEOUTRUN);
 
        do {
-               unsigned long nr_attempted = 0;
                bool raise_priority = true;
-               bool pgdat_needs_compaction = (order > 0);
 
                sc.nr_reclaimed = 0;
 
@@ -3220,7 +3211,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
                                break;
                        }
 
-                       if (!zone_balanced(zone, order, 0, 0)) {
+                       if (!zone_balanced(zone, order, false, 0, 0)) {
                                end_zone = i;
                                break;
                        } else {
@@ -3236,24 +3227,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
                if (i < 0)
                        goto out;
 
-               for (i = 0; i <= end_zone; i++) {
-                       struct zone *zone = pgdat->node_zones + i;
-
-                       if (!populated_zone(zone))
-                               continue;
-
-                       /*
-                        * If any zone is currently balanced then kswapd will
-                        * not call compaction as it is expected that the
-                        * necessary pages are already available.
-                        */
-                       if (pgdat_needs_compaction &&
-                                       zone_watermark_ok(zone, order,
-                                               low_wmark_pages(zone),
-                                               *classzone_idx, 0))
-                               pgdat_needs_compaction = false;
-               }
-
                /*
                 * If we're getting trouble reclaiming, start doing writepage
                 * even in laptop mode.
@@ -3297,8 +3270,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
                         * that that high watermark would be met at 100%
                         * efficiency.
                         */
-                       if (kswapd_shrink_zone(zone, end_zone,
-                                              &sc, &nr_attempted))
+                       if (kswapd_shrink_zone(zone, end_zone, &sc))
                                raise_priority = false;
                }
 
@@ -3311,28 +3283,10 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
                                pfmemalloc_watermark_ok(pgdat))
                        wake_up_all(&pgdat->pfmemalloc_wait);
 
-               /*
-                * Fragmentation may mean that the system cannot be rebalanced
-                * for high-order allocations in all zones. If twice the
-                * allocation size has been reclaimed and the zones are still
-                * not balanced then recheck the watermarks at order-0 to
-                * prevent kswapd reclaiming excessively. Assume that a
-                * process requested a high-order can direct reclaim/compact.
-                */
-               if (order && sc.nr_reclaimed >= 2UL << order)
-                       order = sc.order = 0;
-
                /* Check if kswapd should be suspending */
                if (try_to_freeze() || kthread_should_stop())
                        break;
 
-               /*
-                * Compact if necessary and kswapd is reclaiming at least the
-                * high watermark number of pages as requsted
-                */
-               if (pgdat_needs_compaction && sc.nr_reclaimed > nr_attempted)
-                       compact_pgdat(pgdat, order);
-
                /*
                 * Raise priority if scanning rate is too low or there was no
                 * progress in reclaiming pages
@@ -3340,20 +3294,18 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
                if (raise_priority || !sc.nr_reclaimed)
                        sc.priority--;
        } while (sc.priority >= 1 &&
-                !pgdat_balanced(pgdat, order, *classzone_idx));
+                       !pgdat_balanced(pgdat, order, classzone_idx));
 
 out:
        /*
-        * Return the order we were reclaiming at so prepare_kswapd_sleep()
-        * makes a decision on the order we were last reclaiming at. However,
-        * if another caller entered the allocator slow path while kswapd
-        * was awake, order will remain at the higher level
+        * Return the highest zone idx we were reclaiming at so
+        * prepare_kswapd_sleep() makes the same decisions as here.
         */
-       *classzone_idx = end_zone;
-       return order;
+       return end_zone;
 }
 
-static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx)
+static void kswapd_try_to_sleep(pg_data_t *pgdat, int order,
+                               int classzone_idx, int balanced_classzone_idx)
 {
        long remaining = 0;
        DEFINE_WAIT(wait);
@@ -3364,7 +3316,22 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx)
        prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
 
        /* Try to sleep for a short interval */
-       if (prepare_kswapd_sleep(pgdat, order, remaining, classzone_idx)) {
+       if (prepare_kswapd_sleep(pgdat, order, remaining,
+                                               balanced_classzone_idx)) {
+               /*
+                * Compaction records what page blocks it recently failed to
+                * isolate pages from and skips them in the future scanning.
+                * When kswapd is going to sleep, it is reasonable to assume
+                * that pages and compaction may succeed so reset the cache.
+                */
+               reset_isolation_suitable(pgdat);
+
+               /*
+                * We have freed the memory, now we should compact it to make
+                * allocation of the requested order possible.
+                */
+               wakeup_kcompactd(pgdat, order, classzone_idx);
+
                remaining = schedule_timeout(HZ/10);
                finish_wait(&pgdat->kswapd_wait, &wait);
                prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
@@ -3374,7 +3341,8 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx)
         * After a short sleep, check if it was a premature sleep. If not, then
         * go fully to sleep until explicitly woken up.
         */
-       if (prepare_kswapd_sleep(pgdat, order, remaining, classzone_idx)) {
+       if (prepare_kswapd_sleep(pgdat, order, remaining,
+                                               balanced_classzone_idx)) {
                trace_mm_vmscan_kswapd_sleep(pgdat->node_id);
 
                /*
@@ -3387,14 +3355,6 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx)
                 */
                set_pgdat_percpu_threshold(pgdat, calculate_normal_threshold);
 
-               /*
-                * Compaction records what page blocks it recently failed to
-                * isolate pages from and skips them in the future scanning.
-                * When kswapd is going to sleep, it is reasonable to assume
-                * that pages and compaction may succeed so reset the cache.
-                */
-               reset_isolation_suitable(pgdat);
-
                if (!kthread_should_stop())
                        schedule();
 
@@ -3424,7 +3384,6 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx)
 static int kswapd(void *p)
 {
        unsigned long order, new_order;
-       unsigned balanced_order;
        int classzone_idx, new_classzone_idx;
        int balanced_classzone_idx;
        pg_data_t *pgdat = (pg_data_t*)p;
@@ -3457,24 +3416,19 @@ static int kswapd(void *p)
        set_freezable();
 
        order = new_order = 0;
-       balanced_order = 0;
        classzone_idx = new_classzone_idx = pgdat->nr_zones - 1;
        balanced_classzone_idx = classzone_idx;
        for ( ; ; ) {
                bool ret;
 
                /*
-                * If the last balance_pgdat was unsuccessful it's unlikely a
-                * new request of a similar or harder type will succeed soon
-                * so consider going to sleep on the basis we reclaimed at
+                * While we were reclaiming, there might have been another
+                * wakeup, so check the values.
                 */
-               if (balanced_classzone_idx >= new_classzone_idx &&
-                                       balanced_order == new_order) {
-                       new_order = pgdat->kswapd_max_order;
-                       new_classzone_idx = pgdat->classzone_idx;
-                       pgdat->kswapd_max_order =  0;
-                       pgdat->classzone_idx = pgdat->nr_zones - 1;
-               }
+               new_order = pgdat->kswapd_max_order;
+               new_classzone_idx = pgdat->classzone_idx;
+               pgdat->kswapd_max_order =  0;
+               pgdat->classzone_idx = pgdat->nr_zones - 1;
 
                if (order < new_order || classzone_idx > new_classzone_idx) {
                        /*
@@ -3484,7 +3438,7 @@ static int kswapd(void *p)
                        order = new_order;
                        classzone_idx = new_classzone_idx;
                } else {
-                       kswapd_try_to_sleep(pgdat, balanced_order,
+                       kswapd_try_to_sleep(pgdat, order, classzone_idx,
                                                balanced_classzone_idx);
                        order = pgdat->kswapd_max_order;
                        classzone_idx = pgdat->classzone_idx;
@@ -3504,9 +3458,8 @@ static int kswapd(void *p)
                 */
                if (!ret) {
                        trace_mm_vmscan_kswapd_wake(pgdat->node_id, order);
-                       balanced_classzone_idx = classzone_idx;
-                       balanced_order = balance_pgdat(pgdat, order,
-                                               &balanced_classzone_idx);
+                       balanced_classzone_idx = balance_pgdat(pgdat, order,
+                                                               classzone_idx);
                }
        }
 
@@ -3536,7 +3489,7 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
        }
        if (!waitqueue_active(&pgdat->kswapd_wait))
                return;
-       if (zone_balanced(zone, order, 0, 0))
+       if (zone_balanced(zone, order, true, 0, 0))
                return;
 
        trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, zone_idx(zone), order);
This page took 0.028664 seconds and 5 git commands to generate.