X-Git-Url: http://git.efficios.com/?a=blobdiff_plain;f=mm%2Fpage_alloc.c;h=721d62c5be69977bc595f9cd8d8d9e5ce618ea26;hb=refs%2Fheads%2Fsched_update_prio;hp=a2214c64ed3cd04dceaed7a579f593852e458df1;hpb=00aba0533e1a70cd953d5ffe2425dc47b19d28e8;p=deliverable%2Flinux.git diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a2214c64ed3c..721d62c5be69 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -64,6 +64,7 @@ #include #include #include +#include #include #include @@ -91,6 +92,11 @@ EXPORT_PER_CPU_SYMBOL(_numa_mem_); int _node_numa_mem_[MAX_NUMNODES]; #endif +#ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY +volatile u64 latent_entropy __latent_entropy; +EXPORT_SYMBOL(latent_entropy); +#endif + /* * Array of node states. */ @@ -254,7 +260,7 @@ int watermark_scale_factor = 10; static unsigned long __meminitdata nr_kernel_pages; static unsigned long __meminitdata nr_all_pages; -static unsigned long __meminitdata dma_reserve; +static unsigned long __meminitdata nr_memory_reserve; #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES]; @@ -607,6 +613,9 @@ static bool need_debug_guardpage(void) if (!debug_pagealloc_enabled()) return false; + if (!debug_guardpage_minorder()) + return false; + return true; } @@ -615,6 +624,9 @@ static void init_debug_guardpage(void) if (!debug_pagealloc_enabled()) return; + if (!debug_guardpage_minorder()) + return; + _debug_guardpage_enabled = true; } @@ -635,19 +647,22 @@ static int __init debug_guardpage_minorder_setup(char *buf) pr_info("Setting debug_guardpage_minorder to %lu\n", res); return 0; } -__setup("debug_guardpage_minorder=", debug_guardpage_minorder_setup); +early_param("debug_guardpage_minorder", debug_guardpage_minorder_setup); -static inline void set_page_guard(struct zone *zone, struct page *page, +static inline bool set_page_guard(struct zone *zone, struct page *page, unsigned int order, int migratetype) { struct page_ext *page_ext; if (!debug_guardpage_enabled()) - return; + return false; + + if (order >= debug_guardpage_minorder()) + return false; page_ext = lookup_page_ext(page); if (unlikely(!page_ext)) - return; + return false; __set_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags); @@ -655,6 +670,8 @@ static inline void set_page_guard(struct zone *zone, struct page *page, set_page_private(page, order); /* Guard pages are not available for any usage */ __mod_zone_freepage_state(zone, -(1 << order), migratetype); + + return true; } static inline void clear_page_guard(struct zone *zone, struct page *page, @@ -676,9 +693,9 @@ static inline void clear_page_guard(struct zone *zone, struct page *page, __mod_zone_freepage_state(zone, (1 << order), migratetype); } #else -struct page_ext_operations debug_guardpage_ops = { NULL, }; -static inline void set_page_guard(struct zone *zone, struct page *page, - unsigned int order, int migratetype) {} +struct page_ext_operations debug_guardpage_ops; +static inline bool set_page_guard(struct zone *zone, struct page *page, + unsigned int order, int migratetype) { return false; } static inline void clear_page_guard(struct zone *zone, struct page *page, unsigned int order, int migratetype) {} #endif @@ -1234,6 +1251,15 @@ static void __free_pages_ok(struct page *page, unsigned int order) local_irq_restore(flags); } +bool __meminitdata ram_latent_entropy; + +static int __init setup_ram_latent_entropy(char *str) +{ + ram_latent_entropy = true; + return 0; +} +early_param("ram_latent_entropy", setup_ram_latent_entropy); + static void __init __free_pages_boot_core(struct page *page, unsigned int order) { unsigned int nr_pages = 1 << order; @@ -1249,6 +1275,17 @@ static void __init __free_pages_boot_core(struct page *page, unsigned int order) __ClearPageReserved(p); set_page_count(p, 0); + if (ram_latent_entropy && !PageHighMem(page) && + page_to_pfn(page) < 0x100000) { + u64 hash = 0; + size_t index, end = PAGE_SIZE * nr_pages / sizeof(hash); + const u64 *data = lowmem_page_address(page); + + for (index = 0; index < end; index++) + hash ^= hash + data[index]; + add_device_randomness((const void *)&hash, sizeof(hash)); + } + page_zone(page)->managed_pages += nr_pages; set_page_refcounted(page); __free_pages(page, order); @@ -1393,15 +1430,18 @@ static void __init deferred_free_range(struct page *page, return; /* Free a large naturally-aligned chunk if possible */ - if (nr_pages == MAX_ORDER_NR_PAGES && - (pfn & (MAX_ORDER_NR_PAGES-1)) == 0) { + if (nr_pages == pageblock_nr_pages && + (pfn & (pageblock_nr_pages - 1)) == 0) { set_pageblock_migratetype(page, MIGRATE_MOVABLE); - __free_pages_boot_core(page, MAX_ORDER-1); + __free_pages_boot_core(page, pageblock_order); return; } - for (i = 0; i < nr_pages; i++, page++) + for (i = 0; i < nr_pages; i++, page++, pfn++) { + if ((pfn & (pageblock_nr_pages - 1)) == 0) + set_pageblock_migratetype(page, MIGRATE_MOVABLE); __free_pages_boot_core(page, 0); + } } /* Completion tracking for deferred_init_memmap() threads */ @@ -1469,9 +1509,9 @@ static int __init deferred_init_memmap(void *data) /* * Ensure pfn_valid is checked every - * MAX_ORDER_NR_PAGES for memory holes + * pageblock_nr_pages for memory holes */ - if ((pfn & (MAX_ORDER_NR_PAGES - 1)) == 0) { + if ((pfn & (pageblock_nr_pages - 1)) == 0) { if (!pfn_valid(pfn)) { page = NULL; goto free_range; @@ -1484,7 +1524,7 @@ static int __init deferred_init_memmap(void *data) } /* Minimise pfn page lookups and scheduler checks */ - if (page && (pfn & (MAX_ORDER_NR_PAGES - 1)) != 0) { + if (page && (pfn & (pageblock_nr_pages - 1)) != 0) { page++; } else { nr_pages += nr_to_free; @@ -1520,6 +1560,9 @@ free_range: free_base_page = NULL; free_base_pfn = nr_to_free = 0; } + /* Free the last block of pages to allocator */ + nr_pages += nr_to_free; + deferred_free_range(free_base_page, free_base_pfn, nr_to_free); first_init_pfn = max(end_pfn, first_init_pfn); } @@ -1616,18 +1659,15 @@ static inline void expand(struct zone *zone, struct page *page, size >>= 1; VM_BUG_ON_PAGE(bad_range(zone, &page[size]), &page[size]); - if (IS_ENABLED(CONFIG_DEBUG_PAGEALLOC) && - debug_guardpage_enabled() && - high < debug_guardpage_minorder()) { - /* - * Mark as guard pages (or page), that will allow to - * merge back to allocator when buddy will be freed. - * Corresponding page table entries will not be touched, - * pages will stay not present in virtual address space - */ - set_page_guard(zone, &page[size], high, migratetype); + /* + * Mark as guard pages (or page), that will allow to + * merge back to allocator when buddy will be freed. + * Corresponding page table entries will not be touched, + * pages will stay not present in virtual address space + */ + if (set_page_guard(zone, &page[size], high, migratetype)) continue; - } + list_add(&page[size].lru, &area->free_list[migratetype]); area->nr_free++; set_page_order(&page[size], high); @@ -2489,9 +2529,14 @@ int __isolate_free_page(struct page *page, unsigned int order) mt = get_pageblock_migratetype(page); if (!is_migrate_isolate(mt)) { - /* Obey watermarks as if the page was being allocated */ - watermark = low_wmark_pages(zone) + (1 << order); - if (!zone_watermark_ok(zone, 0, watermark, 0, 0)) + /* + * Obey watermarks as if the page was being allocated. We can + * emulate a high-order watermark check with a raised order-0 + * watermark, because we already know our high-order page + * exists. + */ + watermark = min_wmark_pages(zone) + (1UL << order); + if (!zone_watermark_ok(zone, 0, watermark, 0, ALLOC_CMA)) return 0; __mod_zone_freepage_state(zone, -(1UL << order), mt); @@ -3137,6 +3182,61 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, return NULL; } +static inline bool +should_compact_retry(struct alloc_context *ac, int order, int alloc_flags, + enum compact_result compact_result, + enum compact_priority *compact_priority, + int compaction_retries) +{ + int max_retries = MAX_COMPACT_RETRIES; + int min_priority; + + if (!order) + return false; + + /* + * compaction considers all the zone as desperately out of memory + * so it doesn't really make much sense to retry except when the + * failure could be caused by insufficient priority + */ + if (compaction_failed(compact_result)) + goto check_priority; + + /* + * make sure the compaction wasn't deferred or didn't bail out early + * due to locks contention before we declare that we should give up. + * But do not retry if the given zonelist is not suitable for + * compaction. + */ + if (compaction_withdrawn(compact_result)) + return compaction_zonelist_suitable(ac, order, alloc_flags); + + /* + * !costly requests are much more important than __GFP_REPEAT + * costly ones because they are de facto nofail and invoke OOM + * killer to move on while costly can fail and users are ready + * to cope with that. 1/4 retries is rather arbitrary but we + * would need much more detailed feedback from compaction to + * make a better decision. + */ + if (order > PAGE_ALLOC_COSTLY_ORDER) + max_retries /= 4; + if (compaction_retries <= max_retries) + return true; + + /* + * Make sure there is at least one attempt at the highest priority + * if we exhausted all retries at the lower priorities + */ +check_priority: + min_priority = (order > PAGE_ALLOC_COSTLY_ORDER) ? + MIN_COMPACT_COSTLY_PRIORITY : MIN_COMPACT_PRIORITY; + if (*compact_priority > min_priority) { + (*compact_priority)--; + return true; + } + return false; +} #else static inline struct page * __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, @@ -3147,8 +3247,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, return NULL; } -#endif /* CONFIG_COMPACTION */ - static inline bool should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_flags, enum compact_result compact_result, @@ -3175,6 +3273,7 @@ should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_fla } return false; } +#endif /* CONFIG_COMPACTION */ /* Perform direct synchronous page reclaim */ static int @@ -4555,7 +4654,7 @@ static void build_zonelists_in_node_order(pg_data_t *pgdat, int node) int j; struct zonelist *zonelist; - zonelist = &pgdat->node_zonelists[0]; + zonelist = &pgdat->node_zonelists[ZONELIST_FALLBACK]; for (j = 0; zonelist->_zonerefs[j].zone != NULL; j++) ; j = build_zonelists_node(NODE_DATA(node), zonelist, j); @@ -4571,7 +4670,7 @@ static void build_thisnode_zonelists(pg_data_t *pgdat) int j; struct zonelist *zonelist; - zonelist = &pgdat->node_zonelists[1]; + zonelist = &pgdat->node_zonelists[ZONELIST_NOFALLBACK]; j = build_zonelists_node(pgdat, zonelist, 0); zonelist->_zonerefs[j].zone = NULL; zonelist->_zonerefs[j].zone_idx = 0; @@ -4592,7 +4691,7 @@ static void build_zonelists_in_zone_order(pg_data_t *pgdat, int nr_nodes) struct zone *z; struct zonelist *zonelist; - zonelist = &pgdat->node_zonelists[0]; + zonelist = &pgdat->node_zonelists[ZONELIST_FALLBACK]; pos = 0; for (zone_type = MAX_NR_ZONES - 1; zone_type >= 0; zone_type--) { for (j = 0; j < nr_nodes; j++) { @@ -4727,7 +4826,7 @@ static void build_zonelists(pg_data_t *pgdat) local_node = pgdat->node_id; - zonelist = &pgdat->node_zonelists[0]; + zonelist = &pgdat->node_zonelists[ZONELIST_FALLBACK]; j = build_zonelists_node(pgdat, zonelist, 0); /* @@ -4999,15 +5098,6 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, break; #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP - /* - * If not mirrored_kernelcore and ZONE_MOVABLE exists, range - * from zone_movable_pfn[nid] to end of each node should be - * ZONE_MOVABLE not ZONE_NORMAL. skip it. - */ - if (!mirrored_kernelcore && zone_movable_pfn[nid]) - if (zone == ZONE_NORMAL && pfn >= zone_movable_pfn[nid]) - continue; - /* * Check given memblock attribute by firmware which can affect * kernel memory layout. If zone==ZONE_MOVABLE but memory is @@ -5451,6 +5541,12 @@ static void __meminit adjust_zone_range_for_zone_movable(int nid, *zone_end_pfn = min(node_end_pfn, arch_zone_highest_possible_pfn[movable_zone]); + /* Adjust for ZONE_MOVABLE starting within this range */ + } else if (!mirrored_kernelcore && + *zone_start_pfn < zone_movable_pfn[nid] && + *zone_end_pfn > zone_movable_pfn[nid]) { + *zone_end_pfn = zone_movable_pfn[nid]; + /* Check if this whole range is within ZONE_MOVABLE */ } else if (*zone_start_pfn >= zone_movable_pfn[nid]) *zone_start_pfn = *zone_end_pfn; @@ -5554,28 +5650,23 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid, * Treat pages to be ZONE_MOVABLE in ZONE_NORMAL as absent pages * and vice versa. */ - if (zone_movable_pfn[nid]) { - if (mirrored_kernelcore) { - unsigned long start_pfn, end_pfn; - struct memblock_region *r; - - for_each_memblock(memory, r) { - start_pfn = clamp(memblock_region_memory_base_pfn(r), - zone_start_pfn, zone_end_pfn); - end_pfn = clamp(memblock_region_memory_end_pfn(r), - zone_start_pfn, zone_end_pfn); - - if (zone_type == ZONE_MOVABLE && - memblock_is_mirror(r)) - nr_absent += end_pfn - start_pfn; - - if (zone_type == ZONE_NORMAL && - !memblock_is_mirror(r)) - nr_absent += end_pfn - start_pfn; - } - } else { - if (zone_type == ZONE_NORMAL) - nr_absent += node_end_pfn - zone_movable_pfn[nid]; + if (mirrored_kernelcore && zone_movable_pfn[nid]) { + unsigned long start_pfn, end_pfn; + struct memblock_region *r; + + for_each_memblock(memory, r) { + start_pfn = clamp(memblock_region_memory_base_pfn(r), + zone_start_pfn, zone_end_pfn); + end_pfn = clamp(memblock_region_memory_end_pfn(r), + zone_start_pfn, zone_end_pfn); + + if (zone_type == ZONE_MOVABLE && + memblock_is_mirror(r)) + nr_absent += end_pfn - start_pfn; + + if (zone_type == ZONE_NORMAL && + !memblock_is_mirror(r)) + nr_absent += end_pfn - start_pfn; } } @@ -5812,10 +5903,10 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) } /* Account for reserved pages */ - if (j == 0 && freesize > dma_reserve) { - freesize -= dma_reserve; + if (j == 0 && freesize > nr_memory_reserve) { + freesize -= nr_memory_reserve; printk(KERN_DEBUG " %s zone: %lu pages reserved\n", - zone_names[0], dma_reserve); + zone_names[0], nr_memory_reserve); } if (!is_highmem_idx(j)) @@ -6501,8 +6592,9 @@ void __init mem_init_print_info(const char *str) } /** - * set_dma_reserve - set the specified number of pages reserved in the first zone - * @new_dma_reserve: The number of pages to mark reserved + * set_memory_reserve - set number of pages reserved in the first zone + * @nr_reserve: The number of pages to mark reserved + * @inc: true increment to existing value; false set new value. * * The per-cpu batchsize and zone watermarks are determined by managed_pages. * In the DMA zone, a significant percentage may be consumed by kernel image @@ -6511,9 +6603,12 @@ void __init mem_init_print_info(const char *str) * first zone (e.g., ZONE_DMA). The effect will be lower watermarks and * smaller per-cpu batchsize. */ -void __init set_dma_reserve(unsigned long new_dma_reserve) +void __init set_memory_reserve(unsigned long nr_reserve, bool inc) { - dma_reserve = new_dma_reserve; + if (inc) + nr_memory_reserve += nr_reserve; + else + nr_memory_reserve = nr_reserve; } void __init free_area_init(unsigned long *zones_size) @@ -6929,6 +7024,17 @@ static int __init set_hashdist(char *str) __setup("hashdist=", set_hashdist); #endif +#ifndef __HAVE_ARCH_RESERVED_KERNEL_PAGES +/* + * Returns the number of pages that arch has reserved but + * is not known to alloc_large_system_hash(). + */ +static unsigned long __init arch_reserved_kernel_pages(void) +{ + return 0; +} +#endif + /* * allocate a large system hash table from bootmem * - it is assumed that the hash table must contain an exact power-of-2 @@ -6953,6 +7059,7 @@ void *__init alloc_large_system_hash(const char *tablename, if (!numentries) { /* round applicable memory size up to nearest megabyte */ numentries = nr_kernel_pages; + numentries -= arch_reserved_kernel_pages(); /* It isn't necessary when PAGE_SIZE >= 1MB */ if (PAGE_SHIFT < 20)