#include <linux/seq_file.h>
#include <linux/syscalls.h>
#include <linux/times.h>
-#include <linux/acct.h>
+#include <linux/tsacct_kern.h>
#include <linux/kprobes.h>
#include <linux/delayacct.h>
#include <asm/tlb.h>
while (sd) {
cpumask_t span;
struct sched_group *group;
- int new_cpu;
- int weight;
+ int new_cpu, weight;
+
+ if (!(sd->flags & flag)) {
+ sd = sd->child;
+ continue;
+ }
span = sd->span;
group = find_idlest_group(sd, t, cpu);
- if (!group)
- goto nextlevel;
+ if (!group) {
+ sd = sd->child;
+ continue;
+ }
new_cpu = find_idlest_cpu(group, t, cpu);
- if (new_cpu == -1 || new_cpu == cpu)
- goto nextlevel;
+ if (new_cpu == -1 || new_cpu == cpu) {
+ /* Now try balancing at a lower domain level of cpu */
+ sd = sd->child;
+ continue;
+ }
- /* Now try balancing at a lower domain level */
+ /* Now try balancing at a lower domain level of new_cpu */
cpu = new_cpu;
-nextlevel:
sd = NULL;
weight = cpus_weight(span);
for_each_domain(cpu, tmp) {
__releases(rq->lock)
{
struct mm_struct *mm = rq->prev_mm;
- unsigned long prev_task_flags;
+ long prev_state;
rq->prev_mm = NULL;
/*
* A task struct has one reference for the use as "current".
- * If a task dies, then it sets EXIT_ZOMBIE in tsk->exit_state and
- * calls schedule one last time. The schedule call will never return,
- * and the scheduled task must drop that reference.
- * The test for EXIT_ZOMBIE must occur while the runqueue locks are
+ * If a task dies, then it sets TASK_DEAD in tsk->state and calls
+ * schedule one last time. The schedule call will never return, and
+ * the scheduled task must drop that reference.
+ * The test for TASK_DEAD must occur while the runqueue locks are
* still held, otherwise prev could be scheduled on another cpu, die
* there before we look at prev->state, and then the reference would
* be dropped twice.
* Manfred Spraul <manfred@colorfullife.com>
*/
- prev_task_flags = prev->flags;
+ prev_state = prev->state;
finish_arch_switch(prev);
finish_lock_switch(rq, prev);
if (mm)
mmdrop(mm);
- if (unlikely(prev_task_flags & PF_DEAD)) {
+ if (unlikely(prev_state == TASK_DEAD)) {
/*
* Remove function-return probe instances associated with this
* task and put them back on the free list.
spin_lock_irq(&rq->lock);
- if (unlikely(prev->flags & PF_DEAD))
- prev->state = EXIT_DEAD;
-
switch_count = &prev->nivcsw;
if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
switch_count = &prev->nvcsw;
(p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) ||
(!p->mm && param->sched_priority > MAX_RT_PRIO-1))
return -EINVAL;
- if ((policy == SCHED_NORMAL || policy == SCHED_BATCH)
- != (param->sched_priority == 0))
+ if (is_rt_policy(policy) != (param->sched_priority != 0))
return -EINVAL;
/*
* Allow unprivileged RT tasks to decrease priority:
*/
if (!capable(CAP_SYS_NICE)) {
- unsigned long rlim_rtprio;
- unsigned long flags;
-
- if (!lock_task_sighand(p, &flags))
- return -ESRCH;
- rlim_rtprio = p->signal->rlim[RLIMIT_RTPRIO].rlim_cur;
- unlock_task_sighand(p, &flags);
+ if (is_rt_policy(policy)) {
+ unsigned long rlim_rtprio;
+ unsigned long flags;
+
+ if (!lock_task_sighand(p, &flags))
+ return -ESRCH;
+ rlim_rtprio = p->signal->rlim[RLIMIT_RTPRIO].rlim_cur;
+ unlock_task_sighand(p, &flags);
+
+ /* can't set/change the rt policy */
+ if (policy != p->policy && !rlim_rtprio)
+ return -EPERM;
+
+ /* can't increase priority */
+ if (param->sched_priority > p->rt_priority &&
+ param->sched_priority > rlim_rtprio)
+ return -EPERM;
+ }
- /*
- * can't change policy, except between SCHED_NORMAL
- * and SCHED_BATCH:
- */
- if (((policy != SCHED_NORMAL && p->policy != SCHED_BATCH) &&
- (policy != SCHED_BATCH && p->policy != SCHED_NORMAL)) &&
- !rlim_rtprio)
- return -EPERM;
- /* can't increase priority */
- if ((policy != SCHED_NORMAL && policy != SCHED_BATCH) &&
- param->sched_priority > p->rt_priority &&
- param->sched_priority > rlim_rtprio)
- return -EPERM;
/* can't change other user's priorities */
if ((current->euid != p->euid) &&
(current->euid != p->uid))
#ifndef CONFIG_SMP
cpumask_t cpu_online_map __read_mostly = CPU_MASK_ALL;
+EXPORT_SYMBOL(cpu_online_map);
+
cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL;
+EXPORT_SYMBOL(cpu_possible_map);
#endif
long sched_getaffinity(pid_t pid, cpumask_t *mask)
* NOTE: this function does not set the idle thread's NEED_RESCHED
* flag, to make booting more robust.
*/
-void __devinit init_idle(struct task_struct *idle, int cpu)
+void __cpuinit init_idle(struct task_struct *idle, int cpu)
{
struct rq *rq = cpu_rq(cpu);
unsigned long flags;
BUG_ON(p->exit_state != EXIT_ZOMBIE && p->exit_state != EXIT_DEAD);
/* Cannot have done final schedule yet: would have vanished. */
- BUG_ON(p->flags & PF_DEAD);
+ BUG_ON(p->state == TASK_DEAD);
get_task_struct(p);
struct sched_domain *parent = tmp->parent;
if (!parent)
break;
- if (sd_parent_degenerate(tmp, parent))
+ if (sd_parent_degenerate(tmp, parent)) {
tmp->parent = parent->parent;
+ if (parent->parent)
+ parent->parent->child = tmp;
+ }
}
- if (sd && sd_degenerate(sd))
+ if (sd && sd_degenerate(sd)) {
sd = sd->parent;
+ if (sd)
+ sd->child = NULL;
+ }
sched_domain_debug(sd, cpu);
}
/* cpus with isolated domains */
-static cpumask_t __devinitdata cpu_isolated_map = CPU_MASK_NONE;
+static cpumask_t __cpuinitdata cpu_isolated_map = CPU_MASK_NONE;
/* Setup the mask of cpus configured for isolated domains */
static int __init isolated_cpu_setup(char *str)
* covered by the given span, and will set each group's ->cpumask correctly,
* and ->cpu_power to 0.
*/
-static void init_sched_build_groups(struct sched_group groups[], cpumask_t span,
- int (*group_fn)(int cpu))
+static void
+init_sched_build_groups(struct sched_group groups[], cpumask_t span,
+ const cpumask_t *cpu_map,
+ int (*group_fn)(int cpu, const cpumask_t *cpu_map))
{
struct sched_group *first = NULL, *last = NULL;
cpumask_t covered = CPU_MASK_NONE;
int i;
for_each_cpu_mask(i, span) {
- int group = group_fn(i);
+ int group = group_fn(i, cpu_map);
struct sched_group *sg = &groups[group];
int j;
sg->cpu_power = 0;
for_each_cpu_mask(j, span) {
- if (group_fn(j) != group)
+ if (group_fn(j, cpu_map) != group)
continue;
cpu_set(j, covered);
#endif
);
if (system_state == SYSTEM_BOOTING) {
- printk("migration_cost=");
- for (distance = 0; distance <= max_distance; distance++) {
- if (distance)
- printk(",");
- printk("%ld", (long)migration_cost[distance] / 1000);
+ if (num_online_cpus() > 1) {
+ printk("migration_cost=");
+ for (distance = 0; distance <= max_distance; distance++) {
+ if (distance)
+ printk(",");
+ printk("%ld", (long)migration_cost[distance] / 1000);
+ }
+ printk("\n");
}
- printk("\n");
}
j1 = jiffies;
if (migration_debug)
static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
static struct sched_group sched_group_cpus[NR_CPUS];
-static int cpu_to_cpu_group(int cpu)
+static int cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map)
{
return cpu;
}
*/
#ifdef CONFIG_SCHED_MC
static DEFINE_PER_CPU(struct sched_domain, core_domains);
-static struct sched_group *sched_group_core_bycpu[NR_CPUS];
+static struct sched_group sched_group_core[NR_CPUS];
#endif
#if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT)
-static int cpu_to_core_group(int cpu)
+static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map)
{
- return first_cpu(cpu_sibling_map[cpu]);
+ cpumask_t mask = cpu_sibling_map[cpu];
+ cpus_and(mask, mask, *cpu_map);
+ return first_cpu(mask);
}
#elif defined(CONFIG_SCHED_MC)
-static int cpu_to_core_group(int cpu)
+static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map)
{
return cpu;
}
#endif
static DEFINE_PER_CPU(struct sched_domain, phys_domains);
-static struct sched_group *sched_group_phys_bycpu[NR_CPUS];
+static struct sched_group sched_group_phys[NR_CPUS];
-static int cpu_to_phys_group(int cpu)
+static int cpu_to_phys_group(int cpu, const cpumask_t *cpu_map)
{
#ifdef CONFIG_SCHED_MC
cpumask_t mask = cpu_coregroup_map(cpu);
+ cpus_and(mask, mask, *cpu_map);
return first_cpu(mask);
#elif defined(CONFIG_SCHED_SMT)
- return first_cpu(cpu_sibling_map[cpu]);
+ cpumask_t mask = cpu_sibling_map[cpu];
+ cpus_and(mask, mask, *cpu_map);
+ return first_cpu(mask);
#else
return cpu;
#endif
static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
static struct sched_group *sched_group_allnodes_bycpu[NR_CPUS];
-static int cpu_to_allnodes_group(int cpu)
+static int cpu_to_allnodes_group(int cpu, const cpumask_t *cpu_map)
{
return cpu_to_node(cpu);
}
}
#endif
+#ifdef CONFIG_NUMA
/* Free memory allocated for various sched_group structures */
static void free_sched_groups(const cpumask_t *cpu_map)
{
- int cpu;
-#ifdef CONFIG_NUMA
- int i;
+ int cpu, i;
for_each_cpu_mask(cpu, *cpu_map) {
struct sched_group *sched_group_allnodes
kfree(sched_group_nodes);
sched_group_nodes_bycpu[cpu] = NULL;
}
-#endif
- for_each_cpu_mask(cpu, *cpu_map) {
- if (sched_group_phys_bycpu[cpu]) {
- kfree(sched_group_phys_bycpu[cpu]);
- sched_group_phys_bycpu[cpu] = NULL;
- }
-#ifdef CONFIG_SCHED_MC
- if (sched_group_core_bycpu[cpu]) {
- kfree(sched_group_core_bycpu[cpu]);
- sched_group_core_bycpu[cpu] = NULL;
- }
-#endif
- }
}
+#else
+static void free_sched_groups(const cpumask_t *cpu_map)
+{
+}
+#endif
/*
* Build sched domains for a given set of cpus and attach the sched domains
static int build_sched_domains(const cpumask_t *cpu_map)
{
int i;
- struct sched_group *sched_group_phys = NULL;
-#ifdef CONFIG_SCHED_MC
- struct sched_group *sched_group_core = NULL;
-#endif
#ifdef CONFIG_NUMA
struct sched_group **sched_group_nodes = NULL;
struct sched_group *sched_group_allnodes = NULL;
sd = &per_cpu(allnodes_domains, i);
*sd = SD_ALLNODES_INIT;
sd->span = *cpu_map;
- group = cpu_to_allnodes_group(i);
+ group = cpu_to_allnodes_group(i, cpu_map);
sd->groups = &sched_group_allnodes[group];
p = sd;
} else
*sd = SD_NODE_INIT;
sd->span = sched_domain_node_span(cpu_to_node(i));
sd->parent = p;
+ if (p)
+ p->child = sd;
cpus_and(sd->span, sd->span, *cpu_map);
#endif
- if (!sched_group_phys) {
- sched_group_phys
- = kmalloc(sizeof(struct sched_group) * NR_CPUS,
- GFP_KERNEL);
- if (!sched_group_phys) {
- printk (KERN_WARNING "Can not alloc phys sched"
- "group\n");
- goto error;
- }
- sched_group_phys_bycpu[i] = sched_group_phys;
- }
-
p = sd;
sd = &per_cpu(phys_domains, i);
- group = cpu_to_phys_group(i);
+ group = cpu_to_phys_group(i, cpu_map);
*sd = SD_CPU_INIT;
sd->span = nodemask;
sd->parent = p;
+ if (p)
+ p->child = sd;
sd->groups = &sched_group_phys[group];
#ifdef CONFIG_SCHED_MC
- if (!sched_group_core) {
- sched_group_core
- = kmalloc(sizeof(struct sched_group) * NR_CPUS,
- GFP_KERNEL);
- if (!sched_group_core) {
- printk (KERN_WARNING "Can not alloc core sched"
- "group\n");
- goto error;
- }
- sched_group_core_bycpu[i] = sched_group_core;
- }
-
p = sd;
sd = &per_cpu(core_domains, i);
- group = cpu_to_core_group(i);
+ group = cpu_to_core_group(i, cpu_map);
*sd = SD_MC_INIT;
sd->span = cpu_coregroup_map(i);
cpus_and(sd->span, sd->span, *cpu_map);
sd->parent = p;
+ p->child = sd;
sd->groups = &sched_group_core[group];
#endif
#ifdef CONFIG_SCHED_SMT
p = sd;
sd = &per_cpu(cpu_domains, i);
- group = cpu_to_cpu_group(i);
+ group = cpu_to_cpu_group(i, cpu_map);
*sd = SD_SIBLING_INIT;
sd->span = cpu_sibling_map[i];
cpus_and(sd->span, sd->span, *cpu_map);
sd->parent = p;
+ p->child = sd;
sd->groups = &sched_group_cpus[group];
#endif
}
continue;
init_sched_build_groups(sched_group_cpus, this_sibling_map,
- &cpu_to_cpu_group);
+ cpu_map, &cpu_to_cpu_group);
}
#endif
if (i != first_cpu(this_core_map))
continue;
init_sched_build_groups(sched_group_core, this_core_map,
- &cpu_to_core_group);
+ cpu_map, &cpu_to_core_group);
}
#endif
continue;
init_sched_build_groups(sched_group_phys, nodemask,
- &cpu_to_phys_group);
+ cpu_map, &cpu_to_phys_group);
}
#ifdef CONFIG_NUMA
/* Set up node groups */
if (sched_group_allnodes)
init_sched_build_groups(sched_group_allnodes, *cpu_map,
- &cpu_to_allnodes_group);
+ cpu_map, &cpu_to_allnodes_group);
for (i = 0; i < MAX_NUMNODES; i++) {
/* Set up node groups */
init_numa_sched_groups_power(sched_group_nodes[i]);
if (sched_group_allnodes) {
- int group = cpu_to_allnodes_group(first_cpu(*cpu_map));
+ int group = cpu_to_allnodes_group(first_cpu(*cpu_map), cpu_map);
struct sched_group *sg = &sched_group_allnodes[group];
init_numa_sched_groups_power(sg);
return 0;
+#ifdef CONFIG_NUMA
error:
free_sched_groups(cpu_map);
return -ENOMEM;
+#endif
}
/*
* Set up scheduler domains and groups. Callers must hold the hotplug lock.
void __init sched_init_smp(void)
{
+ cpumask_t non_isolated_cpus;
+
lock_cpu_hotplug();
arch_init_sched_domains(&cpu_online_map);
+ cpus_andnot(non_isolated_cpus, cpu_online_map, cpu_isolated_map);
+ if (cpus_empty(non_isolated_cpus))
+ cpu_set(smp_processor_id(), non_isolated_cpus);
unlock_cpu_hotplug();
/* XXX: Theoretical race here - CPU may be hotplugged now */
hotcpu_notifier(update_sched_domains, 0);
+
+ /* Move init over to a non-isolated CPU */
+ if (set_cpus_allowed(current, non_isolated_cpus) < 0)
+ BUG();
}
#else
void __init sched_init_smp(void)