Commit | Line | Data |
---|---|---|
926e5392 AV |
1 | /* |
2 | * Debug helper to dump the current kernel pagetables of the system | |
3 | * so that we can see what the various memory ranges are set to. | |
4 | * | |
5 | * (C) Copyright 2008 Intel Corporation | |
6 | * | |
7 | * Author: Arjan van de Ven <arjan@linux.intel.com> | |
8 | * | |
9 | * This program is free software; you can redistribute it and/or | |
10 | * modify it under the terms of the GNU General Public License | |
11 | * as published by the Free Software Foundation; version 2 | |
12 | * of the License. | |
13 | */ | |
14 | ||
fe770bf0 PA |
15 | #include <linux/debugfs.h> |
16 | #include <linux/mm.h> | |
926e5392 AV |
17 | #include <linux/module.h> |
18 | #include <linux/seq_file.h> | |
926e5392 AV |
19 | |
20 | #include <asm/pgtable.h> | |
21 | ||
22 | /* | |
23 | * The dumper groups pagetable entries of the same type into one, and for | |
24 | * that it needs to keep some state when walking, and flush this state | |
25 | * when a "break" in the continuity is found. | |
26 | */ | |
27 | struct pg_state { | |
28 | int level; | |
29 | pgprot_t current_prot; | |
30 | unsigned long start_address; | |
31 | unsigned long current_address; | |
fe770bf0 | 32 | const struct addr_marker *marker; |
926e5392 AV |
33 | }; |
34 | ||
fe770bf0 PA |
35 | struct addr_marker { |
36 | unsigned long start_address; | |
37 | const char *name; | |
38 | }; | |
39 | ||
40 | /* Address space markers hints */ | |
41 | static struct addr_marker address_markers[] = { | |
42 | { 0, "User Space" }, | |
43 | #ifdef CONFIG_X86_64 | |
44 | { 0x8000000000000000UL, "Kernel Space" }, | |
684eb016 | 45 | { PAGE_OFFSET, "Low Kernel Mapping" }, |
fe770bf0 | 46 | { VMALLOC_START, "vmalloc() Area" }, |
fe770bf0 PA |
47 | { VMEMMAP_START, "Vmemmap" }, |
48 | { __START_KERNEL_map, "High Kernel Mapping" }, | |
9a79cf9c YL |
49 | { MODULES_VADDR, "Modules" }, |
50 | { MODULES_END, "End Modules" }, | |
fe770bf0 PA |
51 | #else |
52 | { PAGE_OFFSET, "Kernel Mapping" }, | |
53 | { 0/* VMALLOC_START */, "vmalloc() Area" }, | |
54 | { 0/*VMALLOC_END*/, "vmalloc() End" }, | |
55 | # ifdef CONFIG_HIGHMEM | |
56 | { 0/*PKMAP_BASE*/, "Persisent kmap() Area" }, | |
57 | # endif | |
58 | { 0/*FIXADDR_START*/, "Fixmap Area" }, | |
59 | #endif | |
60 | { -1, NULL } /* End of list */ | |
61 | }; | |
926e5392 | 62 | |
fe770bf0 PA |
63 | /* Multipliers for offsets within the PTEs */ |
64 | #define PTE_LEVEL_MULT (PAGE_SIZE) | |
65 | #define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT) | |
66 | #define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT) | |
67 | #define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT) | |
926e5392 AV |
68 | |
69 | /* | |
70 | * Print a readable form of a pgprot_t to the seq_file | |
71 | */ | |
72 | static void printk_prot(struct seq_file *m, pgprot_t prot, int level) | |
73 | { | |
fe770bf0 PA |
74 | pgprotval_t pr = pgprot_val(prot); |
75 | static const char * const level_name[] = | |
76 | { "cr3", "pgd", "pud", "pmd", "pte" }; | |
77 | ||
78 | if (!pgprot_val(prot)) { | |
79 | /* Not present */ | |
80 | seq_printf(m, " "); | |
81 | } else { | |
82 | if (pr & _PAGE_USER) | |
83 | seq_printf(m, "USR "); | |
926e5392 AV |
84 | else |
85 | seq_printf(m, " "); | |
fe770bf0 PA |
86 | if (pr & _PAGE_RW) |
87 | seq_printf(m, "RW "); | |
88 | else | |
89 | seq_printf(m, "ro "); | |
90 | if (pr & _PAGE_PWT) | |
91 | seq_printf(m, "PWT "); | |
92 | else | |
93 | seq_printf(m, " "); | |
94 | if (pr & _PAGE_PCD) | |
95 | seq_printf(m, "PCD "); | |
926e5392 AV |
96 | else |
97 | seq_printf(m, " "); | |
fe770bf0 PA |
98 | |
99 | /* Bit 9 has a different meaning on level 3 vs 4 */ | |
100 | if (level <= 3) { | |
101 | if (pr & _PAGE_PSE) | |
102 | seq_printf(m, "PSE "); | |
103 | else | |
104 | seq_printf(m, " "); | |
105 | } else { | |
106 | if (pr & _PAGE_PAT) | |
107 | seq_printf(m, "pat "); | |
108 | else | |
109 | seq_printf(m, " "); | |
110 | } | |
111 | if (pr & _PAGE_GLOBAL) | |
112 | seq_printf(m, "GLB "); | |
113 | else | |
114 | seq_printf(m, " "); | |
115 | if (pr & _PAGE_NX) | |
116 | seq_printf(m, "NX "); | |
117 | else | |
118 | seq_printf(m, "x "); | |
926e5392 | 119 | } |
fe770bf0 | 120 | seq_printf(m, "%s\n", level_name[level]); |
926e5392 AV |
121 | } |
122 | ||
123 | /* | |
fe770bf0 | 124 | * On 64 bits, sign-extend the 48 bit address to 64 bit |
926e5392 | 125 | */ |
fe770bf0 | 126 | static unsigned long normalize_addr(unsigned long u) |
926e5392 | 127 | { |
fe770bf0 PA |
128 | #ifdef CONFIG_X86_64 |
129 | return (signed long)(u << 16) >> 16; | |
130 | #else | |
926e5392 | 131 | return u; |
fe770bf0 | 132 | #endif |
926e5392 AV |
133 | } |
134 | ||
135 | /* | |
136 | * This function gets called on a break in a continuous series | |
137 | * of PTE entries; the next one is different so we need to | |
138 | * print what we collected so far. | |
139 | */ | |
140 | static void note_page(struct seq_file *m, struct pg_state *st, | |
fe770bf0 | 141 | pgprot_t new_prot, int level) |
926e5392 | 142 | { |
fe770bf0 PA |
143 | pgprotval_t prot, cur; |
144 | static const char units[] = "KMGTPE"; | |
926e5392 AV |
145 | |
146 | /* | |
147 | * If we have a "break" in the series, we need to flush the state that | |
fe770bf0 PA |
148 | * we have now. "break" is either changing perms, levels or |
149 | * address space marker. | |
926e5392 | 150 | */ |
27990eac JF |
151 | prot = pgprot_val(new_prot) & PTE_FLAGS_MASK; |
152 | cur = pgprot_val(st->current_prot) & PTE_FLAGS_MASK; | |
926e5392 | 153 | |
fe770bf0 PA |
154 | if (!st->level) { |
155 | /* First entry */ | |
156 | st->current_prot = new_prot; | |
157 | st->level = level; | |
158 | st->marker = address_markers; | |
159 | seq_printf(m, "---[ %s ]---\n", st->marker->name); | |
160 | } else if (prot != cur || level != st->level || | |
161 | st->current_address >= st->marker[1].start_address) { | |
162 | const char *unit = units; | |
926e5392 AV |
163 | unsigned long delta; |
164 | ||
926e5392 AV |
165 | /* |
166 | * Now print the actual finished series | |
167 | */ | |
fe770bf0 PA |
168 | seq_printf(m, "0x%p-0x%p ", |
169 | (void *)st->start_address, | |
170 | (void *)st->current_address); | |
926e5392 AV |
171 | |
172 | delta = (st->current_address - st->start_address) >> 10; | |
fe770bf0 PA |
173 | while (!(delta & 1023) && unit[1]) { |
174 | delta >>= 10; | |
175 | unit++; | |
926e5392 | 176 | } |
fe770bf0 PA |
177 | seq_printf(m, "%9lu%c ", delta, *unit); |
178 | printk_prot(m, st->current_prot, st->level); | |
179 | ||
180 | /* | |
181 | * We print markers for special areas of address space, | |
182 | * such as the start of vmalloc space etc. | |
183 | * This helps in the interpretation. | |
184 | */ | |
185 | if (st->current_address >= st->marker[1].start_address) { | |
186 | st->marker++; | |
187 | seq_printf(m, "---[ %s ]---\n", st->marker->name); | |
926e5392 | 188 | } |
fe770bf0 | 189 | |
926e5392 AV |
190 | st->start_address = st->current_address; |
191 | st->current_prot = new_prot; | |
192 | st->level = level; | |
fe770bf0 | 193 | } |
926e5392 AV |
194 | } |
195 | ||
fe770bf0 | 196 | static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr, |
926e5392 AV |
197 | unsigned long P) |
198 | { | |
199 | int i; | |
200 | pte_t *start; | |
201 | ||
202 | start = (pte_t *) pmd_page_vaddr(addr); | |
203 | for (i = 0; i < PTRS_PER_PTE; i++) { | |
204 | pgprot_t prot = pte_pgprot(*start); | |
205 | ||
fe770bf0 | 206 | st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT); |
926e5392 AV |
207 | note_page(m, st, prot, 4); |
208 | start++; | |
209 | } | |
210 | } | |
211 | ||
fe770bf0 | 212 | #if PTRS_PER_PMD > 1 |
926e5392 | 213 | |
fe770bf0 | 214 | static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, |
926e5392 AV |
215 | unsigned long P) |
216 | { | |
217 | int i; | |
218 | pmd_t *start; | |
219 | ||
220 | start = (pmd_t *) pud_page_vaddr(addr); | |
221 | for (i = 0; i < PTRS_PER_PMD; i++) { | |
fe770bf0 | 222 | st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT); |
926e5392 | 223 | if (!pmd_none(*start)) { |
77be1fab | 224 | pgprotval_t prot = pmd_val(*start) & PTE_FLAGS_MASK; |
926e5392 | 225 | |
fe770bf0 | 226 | if (pmd_large(*start) || !pmd_present(*start)) |
926e5392 AV |
227 | note_page(m, st, __pgprot(prot), 3); |
228 | else | |
fe770bf0 PA |
229 | walk_pte_level(m, st, *start, |
230 | P + i * PMD_LEVEL_MULT); | |
926e5392 AV |
231 | } else |
232 | note_page(m, st, __pgprot(0), 3); | |
233 | start++; | |
234 | } | |
235 | } | |
236 | ||
fe770bf0 PA |
237 | #else |
238 | #define walk_pmd_level(m,s,a,p) walk_pte_level(m,s,__pmd(pud_val(a)),p) | |
239 | #define pud_large(a) pmd_large(__pmd(pud_val(a))) | |
240 | #define pud_none(a) pmd_none(__pmd(pud_val(a))) | |
241 | #endif | |
926e5392 | 242 | |
fe770bf0 PA |
243 | #if PTRS_PER_PUD > 1 |
244 | ||
245 | static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, | |
926e5392 AV |
246 | unsigned long P) |
247 | { | |
248 | int i; | |
249 | pud_t *start; | |
250 | ||
251 | start = (pud_t *) pgd_page_vaddr(addr); | |
252 | ||
253 | for (i = 0; i < PTRS_PER_PUD; i++) { | |
fe770bf0 | 254 | st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); |
926e5392 | 255 | if (!pud_none(*start)) { |
77be1fab | 256 | pgprotval_t prot = pud_val(*start) & PTE_FLAGS_MASK; |
926e5392 | 257 | |
fe770bf0 | 258 | if (pud_large(*start) || !pud_present(*start)) |
926e5392 AV |
259 | note_page(m, st, __pgprot(prot), 2); |
260 | else | |
fe770bf0 PA |
261 | walk_pmd_level(m, st, *start, |
262 | P + i * PUD_LEVEL_MULT); | |
926e5392 AV |
263 | } else |
264 | note_page(m, st, __pgprot(0), 2); | |
265 | ||
266 | start++; | |
267 | } | |
268 | } | |
269 | ||
fe770bf0 PA |
270 | #else |
271 | #define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(pgd_val(a)),p) | |
272 | #define pgd_large(a) pud_large(__pud(pgd_val(a))) | |
273 | #define pgd_none(a) pud_none(__pud(pgd_val(a))) | |
274 | #endif | |
275 | ||
276 | static void walk_pgd_level(struct seq_file *m) | |
926e5392 | 277 | { |
fe770bf0 | 278 | #ifdef CONFIG_X86_64 |
926e5392 | 279 | pgd_t *start = (pgd_t *) &init_level4_pgt; |
fe770bf0 PA |
280 | #else |
281 | pgd_t *start = swapper_pg_dir; | |
282 | #endif | |
926e5392 AV |
283 | int i; |
284 | struct pg_state st; | |
285 | ||
286 | memset(&st, 0, sizeof(st)); | |
926e5392 AV |
287 | |
288 | for (i = 0; i < PTRS_PER_PGD; i++) { | |
fe770bf0 PA |
289 | st.current_address = normalize_addr(i * PGD_LEVEL_MULT); |
290 | if (!pgd_none(*start)) { | |
77be1fab | 291 | pgprotval_t prot = pgd_val(*start) & PTE_FLAGS_MASK; |
fe770bf0 PA |
292 | |
293 | if (pgd_large(*start) || !pgd_present(*start)) | |
294 | note_page(m, &st, __pgprot(prot), 1); | |
295 | else | |
296 | walk_pud_level(m, &st, *start, | |
297 | i * PGD_LEVEL_MULT); | |
298 | } else | |
926e5392 | 299 | note_page(m, &st, __pgprot(0), 1); |
fe770bf0 | 300 | |
926e5392 AV |
301 | start++; |
302 | } | |
fe770bf0 PA |
303 | |
304 | /* Flush out the last page */ | |
305 | st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT); | |
306 | note_page(m, &st, __pgprot(0), 0); | |
926e5392 AV |
307 | } |
308 | ||
309 | static int ptdump_show(struct seq_file *m, void *v) | |
310 | { | |
fe770bf0 | 311 | walk_pgd_level(m); |
926e5392 AV |
312 | return 0; |
313 | } | |
314 | ||
315 | static int ptdump_open(struct inode *inode, struct file *filp) | |
316 | { | |
317 | return single_open(filp, ptdump_show, NULL); | |
318 | } | |
319 | ||
320 | static const struct file_operations ptdump_fops = { | |
321 | .open = ptdump_open, | |
322 | .read = seq_read, | |
323 | .llseek = seq_lseek, | |
324 | .release = single_release, | |
325 | }; | |
326 | ||
a4928cff | 327 | static int pt_dump_init(void) |
926e5392 AV |
328 | { |
329 | struct dentry *pe; | |
330 | ||
fe770bf0 PA |
331 | #ifdef CONFIG_X86_32 |
332 | /* Not a compile-time constant on x86-32 */ | |
333 | address_markers[2].start_address = VMALLOC_START; | |
334 | address_markers[3].start_address = VMALLOC_END; | |
335 | # ifdef CONFIG_HIGHMEM | |
336 | address_markers[4].start_address = PKMAP_BASE; | |
337 | address_markers[5].start_address = FIXADDR_START; | |
338 | # else | |
339 | address_markers[4].start_address = FIXADDR_START; | |
340 | # endif | |
341 | #endif | |
342 | ||
926e5392 AV |
343 | pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL, |
344 | &ptdump_fops); | |
345 | if (!pe) | |
346 | return -ENOMEM; | |
347 | ||
348 | return 0; | |
349 | } | |
350 | ||
351 | __initcall(pt_dump_init); | |
352 | MODULE_LICENSE("GPL"); | |
353 | MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>"); | |
354 | MODULE_DESCRIPTION("Kernel debugging helper that dumps pagetables"); |