Commit | Line | Data |
---|---|---|
926e5392 AV |
1 | /* |
2 | * Debug helper to dump the current kernel pagetables of the system | |
3 | * so that we can see what the various memory ranges are set to. | |
4 | * | |
5 | * (C) Copyright 2008 Intel Corporation | |
6 | * | |
7 | * Author: Arjan van de Ven <arjan@linux.intel.com> | |
8 | * | |
9 | * This program is free software; you can redistribute it and/or | |
10 | * modify it under the terms of the GNU General Public License | |
11 | * as published by the Free Software Foundation; version 2 | |
12 | * of the License. | |
13 | */ | |
14 | ||
fe770bf0 PA |
15 | #include <linux/debugfs.h> |
16 | #include <linux/mm.h> | |
926e5392 AV |
17 | #include <linux/module.h> |
18 | #include <linux/seq_file.h> | |
926e5392 AV |
19 | |
20 | #include <asm/pgtable.h> | |
21 | ||
22 | /* | |
23 | * The dumper groups pagetable entries of the same type into one, and for | |
24 | * that it needs to keep some state when walking, and flush this state | |
25 | * when a "break" in the continuity is found. | |
26 | */ | |
27 | struct pg_state { | |
28 | int level; | |
29 | pgprot_t current_prot; | |
30 | unsigned long start_address; | |
31 | unsigned long current_address; | |
fe770bf0 | 32 | const struct addr_marker *marker; |
926e5392 AV |
33 | }; |
34 | ||
fe770bf0 PA |
35 | struct addr_marker { |
36 | unsigned long start_address; | |
37 | const char *name; | |
38 | }; | |
39 | ||
92851e2f AS |
40 | /* indices for address_markers; keep sync'd w/ address_markers below */ |
41 | enum address_markers_idx { | |
42 | USER_SPACE_NR = 0, | |
43 | #ifdef CONFIG_X86_64 | |
44 | KERNEL_SPACE_NR, | |
45 | LOW_KERNEL_NR, | |
46 | VMALLOC_START_NR, | |
47 | VMEMMAP_START_NR, | |
48 | HIGH_KERNEL_NR, | |
49 | MODULES_VADDR_NR, | |
50 | MODULES_END_NR, | |
51 | #else | |
52 | KERNEL_SPACE_NR, | |
53 | VMALLOC_START_NR, | |
54 | VMALLOC_END_NR, | |
55 | # ifdef CONFIG_HIGHMEM | |
56 | PKMAP_BASE_NR, | |
57 | # endif | |
58 | FIXADDR_START_NR, | |
59 | #endif | |
60 | }; | |
61 | ||
fe770bf0 PA |
62 | /* Address space markers hints */ |
63 | static struct addr_marker address_markers[] = { | |
64 | { 0, "User Space" }, | |
65 | #ifdef CONFIG_X86_64 | |
66 | { 0x8000000000000000UL, "Kernel Space" }, | |
684eb016 | 67 | { PAGE_OFFSET, "Low Kernel Mapping" }, |
fe770bf0 | 68 | { VMALLOC_START, "vmalloc() Area" }, |
fe770bf0 PA |
69 | { VMEMMAP_START, "Vmemmap" }, |
70 | { __START_KERNEL_map, "High Kernel Mapping" }, | |
9a79cf9c YL |
71 | { MODULES_VADDR, "Modules" }, |
72 | { MODULES_END, "End Modules" }, | |
fe770bf0 PA |
73 | #else |
74 | { PAGE_OFFSET, "Kernel Mapping" }, | |
75 | { 0/* VMALLOC_START */, "vmalloc() Area" }, | |
76 | { 0/*VMALLOC_END*/, "vmalloc() End" }, | |
77 | # ifdef CONFIG_HIGHMEM | |
78 | { 0/*PKMAP_BASE*/, "Persisent kmap() Area" }, | |
79 | # endif | |
80 | { 0/*FIXADDR_START*/, "Fixmap Area" }, | |
81 | #endif | |
82 | { -1, NULL } /* End of list */ | |
83 | }; | |
926e5392 | 84 | |
fe770bf0 PA |
85 | /* Multipliers for offsets within the PTEs */ |
86 | #define PTE_LEVEL_MULT (PAGE_SIZE) | |
87 | #define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT) | |
88 | #define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT) | |
89 | #define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT) | |
926e5392 AV |
90 | |
91 | /* | |
92 | * Print a readable form of a pgprot_t to the seq_file | |
93 | */ | |
94 | static void printk_prot(struct seq_file *m, pgprot_t prot, int level) | |
95 | { | |
fe770bf0 PA |
96 | pgprotval_t pr = pgprot_val(prot); |
97 | static const char * const level_name[] = | |
98 | { "cr3", "pgd", "pud", "pmd", "pte" }; | |
99 | ||
100 | if (!pgprot_val(prot)) { | |
101 | /* Not present */ | |
102 | seq_printf(m, " "); | |
103 | } else { | |
104 | if (pr & _PAGE_USER) | |
105 | seq_printf(m, "USR "); | |
926e5392 AV |
106 | else |
107 | seq_printf(m, " "); | |
fe770bf0 PA |
108 | if (pr & _PAGE_RW) |
109 | seq_printf(m, "RW "); | |
110 | else | |
111 | seq_printf(m, "ro "); | |
112 | if (pr & _PAGE_PWT) | |
113 | seq_printf(m, "PWT "); | |
114 | else | |
115 | seq_printf(m, " "); | |
116 | if (pr & _PAGE_PCD) | |
117 | seq_printf(m, "PCD "); | |
926e5392 AV |
118 | else |
119 | seq_printf(m, " "); | |
fe770bf0 PA |
120 | |
121 | /* Bit 9 has a different meaning on level 3 vs 4 */ | |
122 | if (level <= 3) { | |
123 | if (pr & _PAGE_PSE) | |
124 | seq_printf(m, "PSE "); | |
125 | else | |
126 | seq_printf(m, " "); | |
127 | } else { | |
128 | if (pr & _PAGE_PAT) | |
129 | seq_printf(m, "pat "); | |
130 | else | |
131 | seq_printf(m, " "); | |
132 | } | |
133 | if (pr & _PAGE_GLOBAL) | |
134 | seq_printf(m, "GLB "); | |
135 | else | |
136 | seq_printf(m, " "); | |
137 | if (pr & _PAGE_NX) | |
138 | seq_printf(m, "NX "); | |
139 | else | |
140 | seq_printf(m, "x "); | |
926e5392 | 141 | } |
fe770bf0 | 142 | seq_printf(m, "%s\n", level_name[level]); |
926e5392 AV |
143 | } |
144 | ||
145 | /* | |
fe770bf0 | 146 | * On 64 bits, sign-extend the 48 bit address to 64 bit |
926e5392 | 147 | */ |
fe770bf0 | 148 | static unsigned long normalize_addr(unsigned long u) |
926e5392 | 149 | { |
fe770bf0 PA |
150 | #ifdef CONFIG_X86_64 |
151 | return (signed long)(u << 16) >> 16; | |
152 | #else | |
926e5392 | 153 | return u; |
fe770bf0 | 154 | #endif |
926e5392 AV |
155 | } |
156 | ||
157 | /* | |
158 | * This function gets called on a break in a continuous series | |
159 | * of PTE entries; the next one is different so we need to | |
160 | * print what we collected so far. | |
161 | */ | |
162 | static void note_page(struct seq_file *m, struct pg_state *st, | |
fe770bf0 | 163 | pgprot_t new_prot, int level) |
926e5392 | 164 | { |
fe770bf0 PA |
165 | pgprotval_t prot, cur; |
166 | static const char units[] = "KMGTPE"; | |
926e5392 AV |
167 | |
168 | /* | |
169 | * If we have a "break" in the series, we need to flush the state that | |
fe770bf0 PA |
170 | * we have now. "break" is either changing perms, levels or |
171 | * address space marker. | |
926e5392 | 172 | */ |
27990eac JF |
173 | prot = pgprot_val(new_prot) & PTE_FLAGS_MASK; |
174 | cur = pgprot_val(st->current_prot) & PTE_FLAGS_MASK; | |
926e5392 | 175 | |
fe770bf0 PA |
176 | if (!st->level) { |
177 | /* First entry */ | |
178 | st->current_prot = new_prot; | |
179 | st->level = level; | |
180 | st->marker = address_markers; | |
181 | seq_printf(m, "---[ %s ]---\n", st->marker->name); | |
182 | } else if (prot != cur || level != st->level || | |
183 | st->current_address >= st->marker[1].start_address) { | |
184 | const char *unit = units; | |
926e5392 | 185 | unsigned long delta; |
6424fb38 | 186 | int width = sizeof(unsigned long) * 2; |
926e5392 | 187 | |
926e5392 AV |
188 | /* |
189 | * Now print the actual finished series | |
190 | */ | |
6424fb38 YL |
191 | seq_printf(m, "0x%0*lx-0x%0*lx ", |
192 | width, st->start_address, | |
193 | width, st->current_address); | |
926e5392 AV |
194 | |
195 | delta = (st->current_address - st->start_address) >> 10; | |
fe770bf0 PA |
196 | while (!(delta & 1023) && unit[1]) { |
197 | delta >>= 10; | |
198 | unit++; | |
926e5392 | 199 | } |
fe770bf0 PA |
200 | seq_printf(m, "%9lu%c ", delta, *unit); |
201 | printk_prot(m, st->current_prot, st->level); | |
202 | ||
203 | /* | |
204 | * We print markers for special areas of address space, | |
205 | * such as the start of vmalloc space etc. | |
206 | * This helps in the interpretation. | |
207 | */ | |
208 | if (st->current_address >= st->marker[1].start_address) { | |
209 | st->marker++; | |
210 | seq_printf(m, "---[ %s ]---\n", st->marker->name); | |
926e5392 | 211 | } |
fe770bf0 | 212 | |
926e5392 AV |
213 | st->start_address = st->current_address; |
214 | st->current_prot = new_prot; | |
215 | st->level = level; | |
fe770bf0 | 216 | } |
926e5392 AV |
217 | } |
218 | ||
fe770bf0 | 219 | static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr, |
926e5392 AV |
220 | unsigned long P) |
221 | { | |
222 | int i; | |
223 | pte_t *start; | |
224 | ||
225 | start = (pte_t *) pmd_page_vaddr(addr); | |
226 | for (i = 0; i < PTRS_PER_PTE; i++) { | |
227 | pgprot_t prot = pte_pgprot(*start); | |
228 | ||
fe770bf0 | 229 | st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT); |
926e5392 AV |
230 | note_page(m, st, prot, 4); |
231 | start++; | |
232 | } | |
233 | } | |
234 | ||
fe770bf0 | 235 | #if PTRS_PER_PMD > 1 |
926e5392 | 236 | |
fe770bf0 | 237 | static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, |
926e5392 AV |
238 | unsigned long P) |
239 | { | |
240 | int i; | |
241 | pmd_t *start; | |
242 | ||
243 | start = (pmd_t *) pud_page_vaddr(addr); | |
244 | for (i = 0; i < PTRS_PER_PMD; i++) { | |
fe770bf0 | 245 | st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT); |
926e5392 | 246 | if (!pmd_none(*start)) { |
77be1fab | 247 | pgprotval_t prot = pmd_val(*start) & PTE_FLAGS_MASK; |
926e5392 | 248 | |
fe770bf0 | 249 | if (pmd_large(*start) || !pmd_present(*start)) |
926e5392 AV |
250 | note_page(m, st, __pgprot(prot), 3); |
251 | else | |
fe770bf0 PA |
252 | walk_pte_level(m, st, *start, |
253 | P + i * PMD_LEVEL_MULT); | |
926e5392 AV |
254 | } else |
255 | note_page(m, st, __pgprot(0), 3); | |
256 | start++; | |
257 | } | |
258 | } | |
259 | ||
fe770bf0 PA |
260 | #else |
261 | #define walk_pmd_level(m,s,a,p) walk_pte_level(m,s,__pmd(pud_val(a)),p) | |
262 | #define pud_large(a) pmd_large(__pmd(pud_val(a))) | |
263 | #define pud_none(a) pmd_none(__pmd(pud_val(a))) | |
264 | #endif | |
926e5392 | 265 | |
fe770bf0 PA |
266 | #if PTRS_PER_PUD > 1 |
267 | ||
268 | static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, | |
926e5392 AV |
269 | unsigned long P) |
270 | { | |
271 | int i; | |
272 | pud_t *start; | |
273 | ||
274 | start = (pud_t *) pgd_page_vaddr(addr); | |
275 | ||
276 | for (i = 0; i < PTRS_PER_PUD; i++) { | |
fe770bf0 | 277 | st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); |
926e5392 | 278 | if (!pud_none(*start)) { |
77be1fab | 279 | pgprotval_t prot = pud_val(*start) & PTE_FLAGS_MASK; |
926e5392 | 280 | |
fe770bf0 | 281 | if (pud_large(*start) || !pud_present(*start)) |
926e5392 AV |
282 | note_page(m, st, __pgprot(prot), 2); |
283 | else | |
fe770bf0 PA |
284 | walk_pmd_level(m, st, *start, |
285 | P + i * PUD_LEVEL_MULT); | |
926e5392 AV |
286 | } else |
287 | note_page(m, st, __pgprot(0), 2); | |
288 | ||
289 | start++; | |
290 | } | |
291 | } | |
292 | ||
fe770bf0 PA |
293 | #else |
294 | #define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(pgd_val(a)),p) | |
295 | #define pgd_large(a) pud_large(__pud(pgd_val(a))) | |
296 | #define pgd_none(a) pud_none(__pud(pgd_val(a))) | |
297 | #endif | |
298 | ||
299 | static void walk_pgd_level(struct seq_file *m) | |
926e5392 | 300 | { |
fe770bf0 | 301 | #ifdef CONFIG_X86_64 |
926e5392 | 302 | pgd_t *start = (pgd_t *) &init_level4_pgt; |
fe770bf0 PA |
303 | #else |
304 | pgd_t *start = swapper_pg_dir; | |
305 | #endif | |
926e5392 AV |
306 | int i; |
307 | struct pg_state st; | |
308 | ||
309 | memset(&st, 0, sizeof(st)); | |
926e5392 AV |
310 | |
311 | for (i = 0; i < PTRS_PER_PGD; i++) { | |
fe770bf0 PA |
312 | st.current_address = normalize_addr(i * PGD_LEVEL_MULT); |
313 | if (!pgd_none(*start)) { | |
77be1fab | 314 | pgprotval_t prot = pgd_val(*start) & PTE_FLAGS_MASK; |
fe770bf0 PA |
315 | |
316 | if (pgd_large(*start) || !pgd_present(*start)) | |
317 | note_page(m, &st, __pgprot(prot), 1); | |
318 | else | |
319 | walk_pud_level(m, &st, *start, | |
320 | i * PGD_LEVEL_MULT); | |
321 | } else | |
926e5392 | 322 | note_page(m, &st, __pgprot(0), 1); |
fe770bf0 | 323 | |
926e5392 AV |
324 | start++; |
325 | } | |
fe770bf0 PA |
326 | |
327 | /* Flush out the last page */ | |
328 | st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT); | |
329 | note_page(m, &st, __pgprot(0), 0); | |
926e5392 AV |
330 | } |
331 | ||
332 | static int ptdump_show(struct seq_file *m, void *v) | |
333 | { | |
fe770bf0 | 334 | walk_pgd_level(m); |
926e5392 AV |
335 | return 0; |
336 | } | |
337 | ||
338 | static int ptdump_open(struct inode *inode, struct file *filp) | |
339 | { | |
340 | return single_open(filp, ptdump_show, NULL); | |
341 | } | |
342 | ||
343 | static const struct file_operations ptdump_fops = { | |
344 | .open = ptdump_open, | |
345 | .read = seq_read, | |
346 | .llseek = seq_lseek, | |
347 | .release = single_release, | |
348 | }; | |
349 | ||
a4928cff | 350 | static int pt_dump_init(void) |
926e5392 AV |
351 | { |
352 | struct dentry *pe; | |
353 | ||
fe770bf0 PA |
354 | #ifdef CONFIG_X86_32 |
355 | /* Not a compile-time constant on x86-32 */ | |
92851e2f AS |
356 | address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; |
357 | address_markers[VMALLOC_END_NR].start_address = VMALLOC_END; | |
fe770bf0 | 358 | # ifdef CONFIG_HIGHMEM |
92851e2f | 359 | address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE; |
fe770bf0 | 360 | # endif |
92851e2f | 361 | address_markers[FIXADDR_START_NR].start_address = FIXADDR_START; |
fe770bf0 PA |
362 | #endif |
363 | ||
926e5392 AV |
364 | pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL, |
365 | &ptdump_fops); | |
366 | if (!pe) | |
367 | return -ENOMEM; | |
368 | ||
369 | return 0; | |
370 | } | |
371 | ||
372 | __initcall(pt_dump_init); | |
373 | MODULE_LICENSE("GPL"); | |
374 | MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>"); | |
375 | MODULE_DESCRIPTION("Kernel debugging helper that dumps pagetables"); |