Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 2 Oct 2016 17:42:26 +0000 (10:42 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 2 Oct 2016 17:42:26 +0000 (10:42 -0700)
Pull sparc fixes from David Miller:

 1) Fix section mismatches in some builds, from Paul Gortmaker.

 2) Need to count huge zero page mappings when doing TSB sizing, from
    Mike Kravetz.

 3) Fix handing of cpu_possible_mask when nr_cpus module option is
    specified, from Atish Patra.

 4) Don't allocate irq stacks until nr_irqs has been processed, also
    from Atish Patra.

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc:
  sparc64: Fix non-SMP build.
  sparc64: Fix irq stack bootmem allocation.
  sparc64: Fix cpu_possible_mask if nr_cpus is set
  sparc64 mm: Fix more TSB sizing issues
  sparc64: fix section mismatch in find_numa_latencies_for_group

arch/sparc/include/asm/page_64.h
arch/sparc/include/asm/smp_64.h
arch/sparc/kernel/setup_64.c
arch/sparc/kernel/smp_64.c
arch/sparc/mm/fault_64.c
arch/sparc/mm/init_64.c
arch/sparc/mm/tlb.c
arch/sparc/mm/tsb.c

index 8c2a8c937540ffebb206576d06334bb8ba1a2438..c1263fc390db6d2f1672c9380d5a63d9b881d066 100644 (file)
@@ -25,6 +25,7 @@
 #define HPAGE_MASK             (~(HPAGE_SIZE - 1UL))
 #define HUGETLB_PAGE_ORDER     (HPAGE_SHIFT - PAGE_SHIFT)
 #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+#define REAL_HPAGE_PER_HPAGE   (_AC(1,UL) << (HPAGE_SHIFT - REAL_HPAGE_SHIFT))
 #endif
 
 #ifndef __ASSEMBLY__
index 26d9e77268673601f8809537b5d14aa3642bb334..ce2233f7e662d7f85f5a32ea45339f0eda67e19c 100644 (file)
@@ -43,6 +43,7 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 int hard_smp_processor_id(void);
 #define raw_smp_processor_id() (current_thread_info()->cpu)
 
+void smp_fill_in_cpu_possible_map(void);
 void smp_fill_in_sib_core_maps(void);
 void cpu_play_dead(void);
 
@@ -72,6 +73,7 @@ void __cpu_die(unsigned int cpu);
 #define smp_fill_in_sib_core_maps() do { } while (0)
 #define smp_fetch_global_regs() do { } while (0)
 #define smp_fetch_global_pmu() do { } while (0)
+#define smp_fill_in_cpu_possible_map() do { } while (0)
 
 #endif /* !(CONFIG_SMP) */
 
index 599f1207eed2e469157e2475631d2537543b5794..6b7331d198e9d8b9b0596acbfdab443d227682a3 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/initrd.h>
 #include <linux/module.h>
 #include <linux/start_kernel.h>
+#include <linux/bootmem.h>
 
 #include <asm/io.h>
 #include <asm/processor.h>
@@ -50,6 +51,8 @@
 #include <asm/elf.h>
 #include <asm/mdesc.h>
 #include <asm/cacheflush.h>
+#include <asm/dma.h>
+#include <asm/irq.h>
 
 #ifdef CONFIG_IP_PNP
 #include <net/ipconfig.h>
@@ -590,6 +593,22 @@ static void __init init_sparc64_elf_hwcap(void)
                pause_patch();
 }
 
+void __init alloc_irqstack_bootmem(void)
+{
+       unsigned int i, node;
+
+       for_each_possible_cpu(i) {
+               node = cpu_to_node(i);
+
+               softirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node),
+                                                       THREAD_SIZE,
+                                                       THREAD_SIZE, 0);
+               hardirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node),
+                                                       THREAD_SIZE,
+                                                       THREAD_SIZE, 0);
+       }
+}
+
 void __init setup_arch(char **cmdline_p)
 {
        /* Initialize PROM console and command line. */
@@ -650,6 +669,13 @@ void __init setup_arch(char **cmdline_p)
 
        paging_init();
        init_sparc64_elf_hwcap();
+       smp_fill_in_cpu_possible_map();
+       /*
+        * Once the OF device tree and MDESC have been setup and nr_cpus has
+        * been parsed, we know the list of possible cpus.  Therefore we can
+        * allocate the IRQ stacks.
+        */
+       alloc_irqstack_bootmem();
 }
 
 extern int stop_a_enabled;
index 8a6151a628ce9bd6b5de8a3b60f8c7e3911d6aa7..d3035ba6cd3181fb2ada3b4f6bbdcf80422a02a8 100644 (file)
@@ -1227,6 +1227,20 @@ void __init smp_setup_processor_id(void)
                xcall_deliver_impl = hypervisor_xcall_deliver;
 }
 
+void __init smp_fill_in_cpu_possible_map(void)
+{
+       int possible_cpus = num_possible_cpus();
+       int i;
+
+       if (possible_cpus > nr_cpu_ids)
+               possible_cpus = nr_cpu_ids;
+
+       for (i = 0; i < possible_cpus; i++)
+               set_cpu_possible(i, true);
+       for (; i < NR_CPUS; i++)
+               set_cpu_possible(i, false);
+}
+
 void smp_fill_in_sib_core_maps(void)
 {
        unsigned int i;
index e16fdd28a93159ccd5ade26584e9cfc212fedb94..3f291d8c57f797214ba5e1b97da75868da6189dd 100644 (file)
@@ -484,6 +484,7 @@ good_area:
                tsb_grow(mm, MM_TSB_BASE, mm_rss);
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
        mm_rss = mm->context.hugetlb_pte_count + mm->context.thp_pte_count;
+       mm_rss *= REAL_HPAGE_PER_HPAGE;
        if (unlikely(mm_rss >
                     mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) {
                if (mm->context.tsb_block[MM_TSB_HUGE].tsb)
index 65457c9f1365f07b0b9d9337c897983234d85652..7ac6b62fb7c100a298f84f2d41b558d0dc74c317 100644 (file)
@@ -1160,7 +1160,7 @@ int __node_distance(int from, int to)
        return numa_latency[from][to];
 }
 
-static int find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp)
+static int __init find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp)
 {
        int i;
 
@@ -1173,8 +1173,8 @@ static int find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp)
        return i;
 }
 
-static void find_numa_latencies_for_group(struct mdesc_handle *md, u64 grp,
-                                         int index)
+static void __init find_numa_latencies_for_group(struct mdesc_handle *md,
+                                                u64 grp, int index)
 {
        u64 arc;
 
@@ -2081,7 +2081,6 @@ void __init paging_init(void)
 {
        unsigned long end_pfn, shift, phys_base;
        unsigned long real_end, i;
-       int node;
 
        setup_page_offset();
 
@@ -2250,21 +2249,6 @@ void __init paging_init(void)
        /* Setup bootmem... */
        last_valid_pfn = end_pfn = bootmem_init(phys_base);
 
-       /* Once the OF device tree and MDESC have been setup, we know
-        * the list of possible cpus.  Therefore we can allocate the
-        * IRQ stacks.
-        */
-       for_each_possible_cpu(i) {
-               node = cpu_to_node(i);
-
-               softirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node),
-                                                       THREAD_SIZE,
-                                                       THREAD_SIZE, 0);
-               hardirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node),
-                                                       THREAD_SIZE,
-                                                       THREAD_SIZE, 0);
-       }
-
        kernel_physical_mapping_init();
 
        {
index 3659d37b4d818e30c614f46cf4b2aca8bf700aa0..c56a195c90719fc3eb1400ad14e6b3ae27bb8417 100644 (file)
@@ -174,10 +174,25 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
                return;
 
        if ((pmd_val(pmd) ^ pmd_val(orig)) & _PAGE_PMD_HUGE) {
-               if (pmd_val(pmd) & _PAGE_PMD_HUGE)
-                       mm->context.thp_pte_count++;
-               else
-                       mm->context.thp_pte_count--;
+               /*
+                * Note that this routine only sets pmds for THP pages.
+                * Hugetlb pages are handled elsewhere.  We need to check
+                * for huge zero page.  Huge zero pages are like hugetlb
+                * pages in that there is no RSS, but there is the need
+                * for TSB entries.  So, huge zero page counts go into
+                * hugetlb_pte_count.
+                */
+               if (pmd_val(pmd) & _PAGE_PMD_HUGE) {
+                       if (is_huge_zero_page(pmd_page(pmd)))
+                               mm->context.hugetlb_pte_count++;
+                       else
+                               mm->context.thp_pte_count++;
+               } else {
+                       if (is_huge_zero_page(pmd_page(orig)))
+                               mm->context.hugetlb_pte_count--;
+                       else
+                               mm->context.thp_pte_count--;
+               }
 
                /* Do not try to allocate the TSB hash table if we
                 * don't have one already.  We have various locks held
@@ -204,6 +219,9 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
        }
 }
 
+/*
+ * This routine is only called when splitting a THP
+ */
 void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
                     pmd_t *pmdp)
 {
@@ -213,6 +231,15 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
 
        set_pmd_at(vma->vm_mm, address, pmdp, entry);
        flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+
+       /*
+        * set_pmd_at() will not be called in a way to decrement
+        * thp_pte_count when splitting a THP, so do it now.
+        * Sanity check pmd before doing the actual decrement.
+        */
+       if ((pmd_val(entry) & _PAGE_PMD_HUGE) &&
+           !is_huge_zero_page(pmd_page(entry)))
+               (vma->vm_mm)->context.thp_pte_count--;
 }
 
 void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
index 6725ed45580e525cf5567b1b9ccbeb2c723738bf..f2b77112e9d8bb50f4f05346e955fb4c5f6746a9 100644 (file)
@@ -469,8 +469,10 @@ retry_tsb_alloc:
 
 int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 {
+       unsigned long mm_rss = get_mm_rss(mm);
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
-       unsigned long total_huge_pte_count;
+       unsigned long saved_hugetlb_pte_count;
+       unsigned long saved_thp_pte_count;
 #endif
        unsigned int i;
 
@@ -483,10 +485,12 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
         * will re-increment the counters as the parent PTEs are
         * copied into the child address space.
         */
-       total_huge_pte_count = mm->context.hugetlb_pte_count +
-                        mm->context.thp_pte_count;
+       saved_hugetlb_pte_count = mm->context.hugetlb_pte_count;
+       saved_thp_pte_count = mm->context.thp_pte_count;
        mm->context.hugetlb_pte_count = 0;
        mm->context.thp_pte_count = 0;
+
+       mm_rss -= saved_thp_pte_count * (HPAGE_SIZE / PAGE_SIZE);
 #endif
 
        /* copy_mm() copies over the parent's mm_struct before calling
@@ -499,11 +503,13 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
        /* If this is fork, inherit the parent's TSB size.  We would
         * grow it to that size on the first page fault anyways.
         */
-       tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm));
+       tsb_grow(mm, MM_TSB_BASE, mm_rss);
 
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
-       if (unlikely(total_huge_pte_count))
-               tsb_grow(mm, MM_TSB_HUGE, total_huge_pte_count);
+       if (unlikely(saved_hugetlb_pte_count + saved_thp_pte_count))
+               tsb_grow(mm, MM_TSB_HUGE,
+                        (saved_hugetlb_pte_count + saved_thp_pte_count) *
+                        REAL_HPAGE_PER_HPAGE);
 #endif
 
        if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb))
This page took 0.029953 seconds and 5 git commands to generate.