unsigned long totalram_pages __read_mostly;
unsigned long totalhigh_pages __read_mostly;
long nr_swap_pages;
+int percpu_pagelist_fraction;
static void fastcall free_hot_cold_page(struct page *page, int cold);
return;
list_add(&page->lru, &list);
- mod_page_state(pgfree, 1 << order);
kernel_map_pages(page, 1<<order, 0);
local_irq_save(flags);
+ __mod_page_state(pgfree, 1 << order);
free_pages_bulk(page_zone(page), 1, &list, order);
local_irq_restore(flags);
}
}
#endif /* CONFIG_PM */
-static void zone_statistics(struct zonelist *zonelist, struct zone *z)
+static void zone_statistics(struct zonelist *zonelist, struct zone *z, int cpu)
{
#ifdef CONFIG_NUMA
- unsigned long flags;
- int cpu;
pg_data_t *pg = z->zone_pgdat;
pg_data_t *orig = zonelist->zones[0]->zone_pgdat;
struct per_cpu_pageset *p;
- local_irq_save(flags);
- cpu = smp_processor_id();
- p = zone_pcp(z,cpu);
+ p = zone_pcp(z, cpu);
if (pg == orig) {
p->numa_hit++;
} else {
p->local_node++;
else
p->other_node++;
- local_irq_restore(flags);
#endif
}
if (free_pages_check(page))
return;
- inc_page_state(pgfree);
kernel_map_pages(page, 1, 0);
pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
local_irq_save(flags);
+ __inc_page_state(pgfree);
list_add(&page->lru, &pcp->list);
pcp->count++;
if (pcp->count >= pcp->high)
* we cheat by calling it from here, in the order > 0 path. Saves a branch
* or two.
*/
-static struct page *
-buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
+static struct page *buffered_rmqueue(struct zonelist *zonelist,
+ struct zone *zone, int order, gfp_t gfp_flags)
{
unsigned long flags;
struct page *page;
int cold = !!(gfp_flags & __GFP_COLD);
+ int cpu;
again:
+ cpu = get_cpu();
if (order == 0) {
struct per_cpu_pages *pcp;
- page = NULL;
- pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
+ pcp = &zone_pcp(zone, cpu)->pcp[cold];
local_irq_save(flags);
- if (!pcp->count)
+ if (!pcp->count) {
pcp->count += rmqueue_bulk(zone, 0,
pcp->batch, &pcp->list);
- if (likely(pcp->count)) {
- page = list_entry(pcp->list.next, struct page, lru);
- list_del(&page->lru);
- pcp->count--;
+ if (unlikely(!pcp->count))
+ goto failed;
}
- local_irq_restore(flags);
- put_cpu();
+ page = list_entry(pcp->list.next, struct page, lru);
+ list_del(&page->lru);
+ pcp->count--;
} else {
spin_lock_irqsave(&zone->lock, flags);
page = __rmqueue(zone, order);
- spin_unlock_irqrestore(&zone->lock, flags);
+ spin_unlock(&zone->lock);
+ if (!page)
+ goto failed;
}
- if (page != NULL) {
- BUG_ON(bad_range(zone, page));
- mod_page_state_zone(zone, pgalloc, 1 << order);
- if (prep_new_page(page, order))
- goto again;
+ __mod_page_state_zone(zone, pgalloc, 1 << order);
+ zone_statistics(zonelist, zone, cpu);
+ local_irq_restore(flags);
+ put_cpu();
- if (gfp_flags & __GFP_ZERO)
- prep_zero_page(page, order, gfp_flags);
+ BUG_ON(bad_range(zone, page));
+ if (prep_new_page(page, order))
+ goto again;
- if (order && (gfp_flags & __GFP_COMP))
- prep_compound_page(page, order);
- }
+ if (gfp_flags & __GFP_ZERO)
+ prep_zero_page(page, order, gfp_flags);
+
+ if (order && (gfp_flags & __GFP_COMP))
+ prep_compound_page(page, order);
return page;
+
+failed:
+ local_irq_restore(flags);
+ put_cpu();
+ return NULL;
}
#define ALLOC_NO_WATERMARKS 0x01 /* don't check watermarks at all */
continue;
}
- page = buffered_rmqueue(*z, order, gfp_mask);
+ page = buffered_rmqueue(zonelist, *z, order, gfp_mask);
if (page) {
- zone_statistics(zonelist, *z);
break;
}
} while (*(++z) != NULL);
int cpu = 0;
memset(ret, 0, sizeof(*ret));
+ cpus_and(*cpumask, *cpumask, cpu_online_map);
cpu = first_cpu(*cpumask);
while (cpu < NR_CPUS) {
__get_page_state(ret, sizeof(*ret) / sizeof(unsigned long), &mask);
}
-unsigned long __read_page_state(unsigned long offset)
+unsigned long read_page_state_offset(unsigned long offset)
{
unsigned long ret = 0;
int cpu;
- for_each_cpu(cpu) {
+ for_each_online_cpu(cpu) {
unsigned long in;
in = (unsigned long)&per_cpu(page_states, cpu) + offset;
return ret;
}
-void __mod_page_state(unsigned long offset, unsigned long delta)
+void __mod_page_state_offset(unsigned long offset, unsigned long delta)
+{
+ void *ptr;
+
+ ptr = &__get_cpu_var(page_states);
+ *(unsigned long *)(ptr + offset) += delta;
+}
+EXPORT_SYMBOL(__mod_page_state_offset);
+
+void mod_page_state_offset(unsigned long offset, unsigned long delta)
{
unsigned long flags;
- void* ptr;
+ void *ptr;
local_irq_save(flags);
ptr = &__get_cpu_var(page_states);
- *(unsigned long*)(ptr + offset) += delta;
+ *(unsigned long *)(ptr + offset) += delta;
local_irq_restore(flags);
}
-
-EXPORT_SYMBOL(__mod_page_state);
+EXPORT_SYMBOL(mod_page_state_offset);
void __get_zone_counts(unsigned long *active, unsigned long *inactive,
unsigned long *free, struct pglist_data *pgdat)
* Add all populated zones of a node to the zonelist.
*/
static int __init build_zonelists_node(pg_data_t *pgdat,
- struct zonelist *zonelist, int j, int k)
+ struct zonelist *zonelist, int nr_zones, int zone_type)
{
struct zone *zone;
- BUG_ON(k > ZONE_HIGHMEM);
- for (zone = pgdat->node_zones + k; zone >= pgdat->node_zones; zone--) {
+ BUG_ON(zone_type > ZONE_HIGHMEM);
+
+ do {
+ zone = pgdat->node_zones + zone_type;
if (populated_zone(zone)) {
#ifndef CONFIG_HIGHMEM
- BUG_ON(zone - pgdat->node_zones > ZONE_NORMAL);
+ BUG_ON(zone_type > ZONE_NORMAL);
#endif
- zonelist->zones[j++] = zone;
- check_highest_zone(k);
+ zonelist->zones[nr_zones++] = zone;
+ check_highest_zone(zone_type);
}
- }
- return j;
+ zone_type--;
+
+ } while (zone_type >= 0);
+ return nr_zones;
}
static inline int highest_zone(int zone_bits)
INIT_LIST_HEAD(&pcp->list);
}
+/*
+ * setup_pagelist_highmark() sets the high water mark for hot per_cpu_pagelist
+ * to the value high for the pageset p.
+ */
+
+static void setup_pagelist_highmark(struct per_cpu_pageset *p,
+ unsigned long high)
+{
+ struct per_cpu_pages *pcp;
+
+ pcp = &p->pcp[0]; /* hot list */
+ pcp->high = high;
+ pcp->batch = max(1UL, high/4);
+ if ((high/4) > (PAGE_SHIFT * 8))
+ pcp->batch = PAGE_SHIFT * 8;
+}
+
+
#ifdef CONFIG_NUMA
/*
* Boot pageset table. One per cpu which is going to be used for all
goto bad;
setup_pageset(zone->pageset[cpu], zone_batchsize(zone));
+
+ if (percpu_pagelist_fraction)
+ setup_pagelist_highmark(zone_pcp(zone, cpu),
+ (zone->present_pages / percpu_pagelist_fraction));
}
return 0;
static inline void free_zone_pagesets(int cpu)
{
-#ifdef CONFIG_NUMA
struct zone *zone;
for_each_zone(zone) {
zone_pcp(zone, cpu) = NULL;
kfree(pset);
}
-#endif
}
static int __devinit pageset_cpuup_callback(struct notifier_block *nfb,
return 0;
}
+/*
+ * percpu_pagelist_fraction - changes the pcp->high for each zone on each
+ * cpu. It is the fraction of total pages in each zone that a hot per cpu pagelist
+ * can have before it gets flushed back to buddy allocator.
+ */
+
+int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
+ struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+{
+ struct zone *zone;
+ unsigned int cpu;
+ int ret;
+
+ ret = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+ if (!write || (ret == -EINVAL))
+ return ret;
+ for_each_zone(zone) {
+ for_each_online_cpu(cpu) {
+ unsigned long high;
+ high = zone->present_pages / percpu_pagelist_fraction;
+ setup_pagelist_highmark(zone_pcp(zone, cpu), high);
+ }
+ }
+ return 0;
+}
+
__initdata int hashdist = HASHDIST_DEFAULT;
#ifdef CONFIG_NUMA