Commit | Line | Data |
---|---|---|
b5eafe92 JF |
1 | /* |
2 | * Xen leaves the responsibility for maintaining p2m mappings to the | |
3 | * guests themselves, but it must also access and update the p2m array | |
4 | * during suspend/resume when all the pages are reallocated. | |
5 | * | |
054954eb JG |
6 | * The logical flat p2m table is mapped to a linear kernel memory area. |
7 | * For accesses by Xen a three-level tree linked via mfns only is set up to | |
8 | * allow the address space to be sparse. | |
b5eafe92 | 9 | * |
054954eb JG |
10 | * Xen |
11 | * | | |
12 | * p2m_top_mfn | |
13 | * / \ | |
14 | * p2m_mid_mfn p2m_mid_mfn | |
15 | * / / | |
16 | * p2m p2m p2m ... | |
b5eafe92 JF |
17 | * |
18 | * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p. | |
19 | * | |
054954eb JG |
20 | * The p2m_top_mfn level is limited to 1 page, so the maximum representable |
21 | * pseudo-physical address space is: | |
b5eafe92 JF |
22 | * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages |
23 | * | |
24 | * P2M_PER_PAGE depends on the architecture, as a mfn is always | |
25 | * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to | |
a3118beb | 26 | * 512 and 1024 entries respectively. |
f4cec35b KRW |
27 | * |
28 | * In short, these structures contain the Machine Frame Number (MFN) of the PFN. | |
29 | * | |
30 | * However not all entries are filled with MFNs. Specifically for all other | |
31 | * leaf entries, or for the top root, or middle one, for which there is a void | |
32 | * entry, we assume it is "missing". So (for example) | |
33 | * pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY. | |
054954eb JG |
34 | * We have a dedicated page p2m_missing with all entries being |
35 | * INVALID_P2M_ENTRY. This page may be referenced multiple times in the p2m | |
36 | * list/tree in case there are multiple areas with P2M_PER_PAGE invalid pfns. | |
f4cec35b KRW |
37 | * |
38 | * We also have the possibility of setting 1-1 mappings on certain regions, so | |
39 | * that: | |
40 | * pfn_to_mfn(0xc0000)=0xc0000 | |
41 | * | |
42 | * The benefit of this is, that we can assume for non-RAM regions (think | |
3cb83e46 | 43 | * PCI BARs, or ACPI spaces), we can create mappings easily because we |
f4cec35b KRW |
44 | * get the PFN value to match the MFN. |
45 | * | |
054954eb JG |
46 | * For this to work efficiently we have one new page p2m_identity. All entries |
47 | * in p2m_identity are set to INVALID_P2M_ENTRY type (Xen toolstack only | |
48 | * recognizes that and MFNs, no other fancy value). | |
f4cec35b KRW |
49 | * |
50 | * On lookup we spot that the entry points to p2m_identity and return the | |
51 | * identity value instead of dereferencing and returning INVALID_P2M_ENTRY. | |
52 | * If the entry points to an allocated page, we just proceed as before and | |
054954eb | 53 | * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in |
f4cec35b KRW |
54 | * appropriate functions (pfn_to_mfn). |
55 | * | |
56 | * The reason for having the IDENTITY_FRAME_BIT instead of just returning the | |
57 | * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a | |
58 | * non-identity pfn. To protect ourselves against we elect to set (and get) the | |
59 | * IDENTITY_FRAME_BIT on all identity mapped PFNs. | |
b5eafe92 JF |
60 | */ |
61 | ||
62 | #include <linux/init.h> | |
63 | #include <linux/module.h> | |
448f2831 JF |
64 | #include <linux/list.h> |
65 | #include <linux/hash.h> | |
87f1d40a | 66 | #include <linux/sched.h> |
2222e71b | 67 | #include <linux/seq_file.h> |
2c185687 | 68 | #include <linux/bootmem.h> |
7108c9ce | 69 | #include <linux/slab.h> |
b5eafe92 JF |
70 | |
71 | #include <asm/cache.h> | |
72 | #include <asm/setup.h> | |
2e917175 | 73 | #include <asm/uaccess.h> |
b5eafe92 JF |
74 | |
75 | #include <asm/xen/page.h> | |
76 | #include <asm/xen/hypercall.h> | |
77 | #include <asm/xen/hypervisor.h> | |
ee072640 | 78 | #include <xen/balloon.h> |
0930bba6 | 79 | #include <xen/grant_table.h> |
b5eafe92 | 80 | |
4fbb67e3 | 81 | #include "p2m.h" |
0930bba6 | 82 | #include "multicalls.h" |
b5eafe92 JF |
83 | #include "xen-ops.h" |
84 | ||
054954eb JG |
85 | #define PMDS_PER_MID_PAGE (P2M_MID_PER_PAGE / PTRS_PER_PTE) |
86 | ||
5b8e7d80 JG |
87 | unsigned long *xen_p2m_addr __read_mostly; |
88 | EXPORT_SYMBOL_GPL(xen_p2m_addr); | |
89 | unsigned long xen_p2m_size __read_mostly; | |
90 | EXPORT_SYMBOL_GPL(xen_p2m_size); | |
b5eafe92 | 91 | unsigned long xen_max_p2m_pfn __read_mostly; |
5b8e7d80 | 92 | EXPORT_SYMBOL_GPL(xen_max_p2m_pfn); |
b5eafe92 | 93 | |
633d6f17 JG |
94 | #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG_LIMIT |
95 | #define P2M_LIMIT CONFIG_XEN_BALLOON_MEMORY_HOTPLUG_LIMIT | |
96 | #else | |
97 | #define P2M_LIMIT 0 | |
98 | #endif | |
99 | ||
054954eb JG |
100 | static DEFINE_SPINLOCK(p2m_update_lock); |
101 | ||
2c185687 JG |
102 | static unsigned long *p2m_mid_missing_mfn; |
103 | static unsigned long *p2m_top_mfn; | |
104 | static unsigned long **p2m_top_mfn_p; | |
054954eb JG |
105 | static unsigned long *p2m_missing; |
106 | static unsigned long *p2m_identity; | |
107 | static pte_t *p2m_missing_pte; | |
108 | static pte_t *p2m_identity_pte; | |
7108c9ce | 109 | |
b5eafe92 JF |
110 | static inline unsigned p2m_top_index(unsigned long pfn) |
111 | { | |
112 | BUG_ON(pfn >= MAX_P2M_PFN); | |
113 | return pfn / (P2M_MID_PER_PAGE * P2M_PER_PAGE); | |
114 | } | |
115 | ||
116 | static inline unsigned p2m_mid_index(unsigned long pfn) | |
117 | { | |
118 | return (pfn / P2M_PER_PAGE) % P2M_MID_PER_PAGE; | |
119 | } | |
120 | ||
121 | static inline unsigned p2m_index(unsigned long pfn) | |
122 | { | |
123 | return pfn % P2M_PER_PAGE; | |
124 | } | |
125 | ||
b5eafe92 JF |
126 | static void p2m_top_mfn_init(unsigned long *top) |
127 | { | |
128 | unsigned i; | |
129 | ||
130 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | |
131 | top[i] = virt_to_mfn(p2m_mid_missing_mfn); | |
132 | } | |
133 | ||
134 | static void p2m_top_mfn_p_init(unsigned long **top) | |
135 | { | |
136 | unsigned i; | |
137 | ||
138 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | |
139 | top[i] = p2m_mid_missing_mfn; | |
140 | } | |
141 | ||
054954eb | 142 | static void p2m_mid_mfn_init(unsigned long *mid, unsigned long *leaf) |
b5eafe92 JF |
143 | { |
144 | unsigned i; | |
145 | ||
146 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | |
054954eb | 147 | mid[i] = virt_to_mfn(leaf); |
b5eafe92 JF |
148 | } |
149 | ||
054954eb | 150 | static void p2m_init(unsigned long *p2m) |
b5eafe92 JF |
151 | { |
152 | unsigned i; | |
153 | ||
054954eb JG |
154 | for (i = 0; i < P2M_PER_PAGE; i++) |
155 | p2m[i] = INVALID_P2M_ENTRY; | |
b5eafe92 JF |
156 | } |
157 | ||
054954eb | 158 | static void p2m_init_identity(unsigned long *p2m, unsigned long pfn) |
b5eafe92 JF |
159 | { |
160 | unsigned i; | |
161 | ||
054954eb JG |
162 | for (i = 0; i < P2M_PER_PAGE; i++) |
163 | p2m[i] = IDENTITY_FRAME(pfn + i); | |
b5eafe92 JF |
164 | } |
165 | ||
7108c9ce JG |
166 | static void * __ref alloc_p2m_page(void) |
167 | { | |
7108c9ce JG |
168 | if (unlikely(!slab_is_available())) |
169 | return alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); | |
170 | ||
171 | return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT); | |
172 | } | |
173 | ||
701a261a | 174 | static void __ref free_p2m_page(void *p) |
7108c9ce | 175 | { |
701a261a BO |
176 | if (unlikely(!slab_is_available())) { |
177 | free_bootmem((unsigned long)p, PAGE_SIZE); | |
178 | return; | |
179 | } | |
180 | ||
7108c9ce JG |
181 | free_page((unsigned long)p); |
182 | } | |
183 | ||
b5eafe92 JF |
184 | /* |
185 | * Build the parallel p2m_top_mfn and p2m_mid_mfn structures | |
186 | * | |
187 | * This is called both at boot time, and after resuming from suspend: | |
2c185687 | 188 | * - At boot time we're called rather early, and must use alloc_bootmem*() |
b5eafe92 JF |
189 | * to allocate memory. |
190 | * | |
191 | * - After resume we're called from within stop_machine, but the mfn | |
2c185687 | 192 | * tree should already be completely allocated. |
b5eafe92 | 193 | */ |
44b46c3e | 194 | void __ref xen_build_mfn_list_list(void) |
b5eafe92 | 195 | { |
054954eb JG |
196 | unsigned long pfn, mfn; |
197 | pte_t *ptep; | |
198 | unsigned int level, topidx, mididx; | |
199 | unsigned long *mid_mfn_p; | |
b5eafe92 | 200 | |
696fd7c5 KRW |
201 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
202 | return; | |
203 | ||
b5eafe92 JF |
204 | /* Pre-initialize p2m_top_mfn to be completely missing */ |
205 | if (p2m_top_mfn == NULL) { | |
7108c9ce | 206 | p2m_mid_missing_mfn = alloc_p2m_page(); |
3cb83e46 | 207 | p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); |
b5eafe92 | 208 | |
7108c9ce | 209 | p2m_top_mfn_p = alloc_p2m_page(); |
b5eafe92 JF |
210 | p2m_top_mfn_p_init(p2m_top_mfn_p); |
211 | ||
7108c9ce | 212 | p2m_top_mfn = alloc_p2m_page(); |
b5eafe92 JF |
213 | p2m_top_mfn_init(p2m_top_mfn); |
214 | } else { | |
215 | /* Reinitialise, mfn's all change after migration */ | |
3cb83e46 | 216 | p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); |
b5eafe92 JF |
217 | } |
218 | ||
054954eb JG |
219 | for (pfn = 0; pfn < xen_max_p2m_pfn && pfn < MAX_P2M_PFN; |
220 | pfn += P2M_PER_PAGE) { | |
221 | topidx = p2m_top_index(pfn); | |
222 | mididx = p2m_mid_index(pfn); | |
b5eafe92 | 223 | |
b5eafe92 | 224 | mid_mfn_p = p2m_top_mfn_p[topidx]; |
054954eb JG |
225 | ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), |
226 | &level); | |
227 | BUG_ON(!ptep || level != PG_LEVEL_4K); | |
228 | mfn = pte_mfn(*ptep); | |
229 | ptep = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1)); | |
b5eafe92 JF |
230 | |
231 | /* Don't bother allocating any mfn mid levels if | |
232 | * they're just missing, just update the stored mfn, | |
233 | * since all could have changed over a migrate. | |
234 | */ | |
054954eb | 235 | if (ptep == p2m_missing_pte || ptep == p2m_identity_pte) { |
b5eafe92 JF |
236 | BUG_ON(mididx); |
237 | BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); | |
238 | p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn); | |
239 | pfn += (P2M_MID_PER_PAGE - 1) * P2M_PER_PAGE; | |
240 | continue; | |
241 | } | |
242 | ||
243 | if (mid_mfn_p == p2m_mid_missing_mfn) { | |
7108c9ce | 244 | mid_mfn_p = alloc_p2m_page(); |
3cb83e46 | 245 | p2m_mid_mfn_init(mid_mfn_p, p2m_missing); |
b5eafe92 JF |
246 | |
247 | p2m_top_mfn_p[topidx] = mid_mfn_p; | |
248 | } | |
249 | ||
250 | p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); | |
054954eb | 251 | mid_mfn_p[mididx] = mfn; |
b5eafe92 JF |
252 | } |
253 | } | |
254 | ||
255 | void xen_setup_mfn_list_list(void) | |
256 | { | |
4dd322bc MR |
257 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
258 | return; | |
259 | ||
b5eafe92 JF |
260 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); |
261 | ||
262 | HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = | |
263 | virt_to_mfn(p2m_top_mfn); | |
264 | HYPERVISOR_shared_info->arch.max_pfn = xen_max_p2m_pfn; | |
265 | } | |
266 | ||
267 | /* Set up p2m_top to point to the domain-builder provided p2m pages */ | |
268 | void __init xen_build_dynamic_phys_to_machine(void) | |
269 | { | |
b5eafe92 JF |
270 | unsigned long pfn; |
271 | ||
696fd7c5 KRW |
272 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
273 | return; | |
274 | ||
5b8e7d80 | 275 | xen_p2m_addr = (unsigned long *)xen_start_info->mfn_list; |
054954eb | 276 | xen_p2m_size = ALIGN(xen_start_info->nr_pages, P2M_PER_PAGE); |
b5eafe92 | 277 | |
054954eb JG |
278 | for (pfn = xen_start_info->nr_pages; pfn < xen_p2m_size; pfn++) |
279 | xen_p2m_addr[pfn] = INVALID_P2M_ENTRY; | |
b5eafe92 | 280 | |
054954eb JG |
281 | xen_max_p2m_pfn = xen_p2m_size; |
282 | } | |
b5eafe92 | 283 | |
054954eb JG |
284 | #define P2M_TYPE_IDENTITY 0 |
285 | #define P2M_TYPE_MISSING 1 | |
286 | #define P2M_TYPE_PFN 2 | |
287 | #define P2M_TYPE_UNKNOWN 3 | |
b5eafe92 | 288 | |
054954eb JG |
289 | static int xen_p2m_elem_type(unsigned long pfn) |
290 | { | |
291 | unsigned long mfn; | |
b5eafe92 | 292 | |
054954eb JG |
293 | if (pfn >= xen_p2m_size) |
294 | return P2M_TYPE_IDENTITY; | |
b5eafe92 | 295 | |
054954eb | 296 | mfn = xen_p2m_addr[pfn]; |
b5eafe92 | 297 | |
054954eb JG |
298 | if (mfn == INVALID_P2M_ENTRY) |
299 | return P2M_TYPE_MISSING; | |
cf04d120 | 300 | |
054954eb JG |
301 | if (mfn & IDENTITY_FRAME_BIT) |
302 | return P2M_TYPE_IDENTITY; | |
303 | ||
304 | return P2M_TYPE_PFN; | |
b5eafe92 | 305 | } |
054954eb JG |
306 | |
307 | static void __init xen_rebuild_p2m_list(unsigned long *p2m) | |
357a3cfb | 308 | { |
054954eb | 309 | unsigned int i, chunk; |
357a3cfb | 310 | unsigned long pfn; |
054954eb JG |
311 | unsigned long *mfns; |
312 | pte_t *ptep; | |
313 | pmd_t *pmdp; | |
314 | int type; | |
357a3cfb | 315 | |
054954eb JG |
316 | p2m_missing = alloc_p2m_page(); |
317 | p2m_init(p2m_missing); | |
318 | p2m_identity = alloc_p2m_page(); | |
319 | p2m_init(p2m_identity); | |
b5eafe92 | 320 | |
054954eb JG |
321 | p2m_missing_pte = alloc_p2m_page(); |
322 | paravirt_alloc_pte(&init_mm, __pa(p2m_missing_pte) >> PAGE_SHIFT); | |
323 | p2m_identity_pte = alloc_p2m_page(); | |
324 | paravirt_alloc_pte(&init_mm, __pa(p2m_identity_pte) >> PAGE_SHIFT); | |
325 | for (i = 0; i < PTRS_PER_PTE; i++) { | |
326 | set_pte(p2m_missing_pte + i, | |
2e917175 | 327 | pfn_pte(PFN_DOWN(__pa(p2m_missing)), PAGE_KERNEL_RO)); |
054954eb | 328 | set_pte(p2m_identity_pte + i, |
2e917175 | 329 | pfn_pte(PFN_DOWN(__pa(p2m_identity)), PAGE_KERNEL_RO)); |
054954eb | 330 | } |
357a3cfb | 331 | |
054954eb JG |
332 | for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += chunk) { |
333 | /* | |
334 | * Try to map missing/identity PMDs or p2m-pages if possible. | |
335 | * We have to respect the structure of the mfn_list_list | |
336 | * which will be built just afterwards. | |
337 | * Chunk size to test is one p2m page if we are in the middle | |
338 | * of a mfn_list_list mid page and the complete mid page area | |
339 | * if we are at index 0 of the mid page. Please note that a | |
340 | * mid page might cover more than one PMD, e.g. on 32 bit PAE | |
341 | * kernels. | |
342 | */ | |
343 | chunk = (pfn & (P2M_PER_PAGE * P2M_MID_PER_PAGE - 1)) ? | |
344 | P2M_PER_PAGE : P2M_PER_PAGE * P2M_MID_PER_PAGE; | |
345 | ||
346 | type = xen_p2m_elem_type(pfn); | |
347 | i = 0; | |
348 | if (type != P2M_TYPE_PFN) | |
349 | for (i = 1; i < chunk; i++) | |
350 | if (xen_p2m_elem_type(pfn + i) != type) | |
351 | break; | |
352 | if (i < chunk) | |
353 | /* Reset to minimal chunk size. */ | |
354 | chunk = P2M_PER_PAGE; | |
355 | ||
356 | if (type == P2M_TYPE_PFN || i < chunk) { | |
357 | /* Use initial p2m page contents. */ | |
358 | #ifdef CONFIG_X86_64 | |
359 | mfns = alloc_p2m_page(); | |
360 | copy_page(mfns, xen_p2m_addr + pfn); | |
361 | #else | |
362 | mfns = xen_p2m_addr + pfn; | |
363 | #endif | |
364 | ptep = populate_extra_pte((unsigned long)(p2m + pfn)); | |
365 | set_pte(ptep, | |
366 | pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL)); | |
357a3cfb | 367 | continue; |
054954eb | 368 | } |
b5eafe92 | 369 | |
054954eb JG |
370 | if (chunk == P2M_PER_PAGE) { |
371 | /* Map complete missing or identity p2m-page. */ | |
372 | mfns = (type == P2M_TYPE_MISSING) ? | |
373 | p2m_missing : p2m_identity; | |
374 | ptep = populate_extra_pte((unsigned long)(p2m + pfn)); | |
375 | set_pte(ptep, | |
2e917175 | 376 | pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL_RO)); |
357a3cfb | 377 | continue; |
054954eb | 378 | } |
357a3cfb | 379 | |
054954eb JG |
380 | /* Complete missing or identity PMD(s) can be mapped. */ |
381 | ptep = (type == P2M_TYPE_MISSING) ? | |
382 | p2m_missing_pte : p2m_identity_pte; | |
383 | for (i = 0; i < PMDS_PER_MID_PAGE; i++) { | |
384 | pmdp = populate_extra_pmd( | |
82c92ed1 | 385 | (unsigned long)(p2m + pfn) + i * PMD_SIZE); |
054954eb JG |
386 | set_pmd(pmdp, __pmd(__pa(ptep) | _KERNPG_TABLE)); |
387 | } | |
388 | } | |
389 | } | |
357a3cfb | 390 | |
054954eb JG |
391 | void __init xen_vmalloc_p2m_tree(void) |
392 | { | |
393 | static struct vm_struct vm; | |
633d6f17 | 394 | unsigned long p2m_limit; |
357a3cfb | 395 | |
633d6f17 | 396 | p2m_limit = (phys_addr_t)P2M_LIMIT * 1024 * 1024 * 1024 / PAGE_SIZE; |
054954eb | 397 | vm.flags = VM_ALLOC; |
633d6f17 | 398 | vm.size = ALIGN(sizeof(unsigned long) * max(xen_max_p2m_pfn, p2m_limit), |
054954eb JG |
399 | PMD_SIZE * PMDS_PER_MID_PAGE); |
400 | vm_area_register_early(&vm, PMD_SIZE * PMDS_PER_MID_PAGE); | |
401 | pr_notice("p2m virtual area at %p, size is %lx\n", vm.addr, vm.size); | |
3fc509fc | 402 | |
054954eb | 403 | xen_max_p2m_pfn = vm.size / sizeof(unsigned long); |
357a3cfb | 404 | |
054954eb | 405 | xen_rebuild_p2m_list(vm.addr); |
357a3cfb | 406 | |
054954eb | 407 | xen_p2m_addr = vm.addr; |
5b8e7d80 | 408 | xen_p2m_size = xen_max_p2m_pfn; |
5b8e7d80 JG |
409 | |
410 | xen_inv_extra_mem(); | |
357a3cfb | 411 | } |
054954eb | 412 | |
b5eafe92 JF |
413 | unsigned long get_phys_to_machine(unsigned long pfn) |
414 | { | |
054954eb JG |
415 | pte_t *ptep; |
416 | unsigned int level; | |
b5eafe92 | 417 | |
5b8e7d80 JG |
418 | if (unlikely(pfn >= xen_p2m_size)) { |
419 | if (pfn < xen_max_p2m_pfn) | |
420 | return xen_chk_extra_mem(pfn); | |
421 | ||
25b884a8 | 422 | return IDENTITY_FRAME(pfn); |
5b8e7d80 | 423 | } |
b5eafe92 | 424 | |
054954eb JG |
425 | ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), &level); |
426 | BUG_ON(!ptep || level != PG_LEVEL_4K); | |
b5eafe92 | 427 | |
f4cec35b KRW |
428 | /* |
429 | * The INVALID_P2M_ENTRY is filled in both p2m_*identity | |
430 | * and in p2m_*missing, so returning the INVALID_P2M_ENTRY | |
431 | * would be wrong. | |
432 | */ | |
054954eb | 433 | if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_identity))) |
f4cec35b KRW |
434 | return IDENTITY_FRAME(pfn); |
435 | ||
054954eb | 436 | return xen_p2m_addr[pfn]; |
b5eafe92 JF |
437 | } |
438 | EXPORT_SYMBOL_GPL(get_phys_to_machine); | |
439 | ||
054954eb JG |
440 | /* |
441 | * Allocate new pmd(s). It is checked whether the old pmd is still in place. | |
442 | * If not, nothing is changed. This is okay as the only reason for allocating | |
443 | * a new pmd is to replace p2m_missing_pte or p2m_identity_pte by a individual | |
444 | * pmd. In case of PAE/x86-32 there are multiple pmds to allocate! | |
445 | */ | |
f241b0b8 | 446 | static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *pte_pg) |
054954eb JG |
447 | { |
448 | pte_t *ptechk; | |
054954eb JG |
449 | pte_t *pte_newpg[PMDS_PER_MID_PAGE]; |
450 | pmd_t *pmdp; | |
451 | unsigned int level; | |
452 | unsigned long flags; | |
453 | unsigned long vaddr; | |
454 | int i; | |
455 | ||
456 | /* Do all allocations first to bail out in error case. */ | |
457 | for (i = 0; i < PMDS_PER_MID_PAGE; i++) { | |
458 | pte_newpg[i] = alloc_p2m_page(); | |
459 | if (!pte_newpg[i]) { | |
460 | for (i--; i >= 0; i--) | |
461 | free_p2m_page(pte_newpg[i]); | |
462 | ||
463 | return NULL; | |
464 | } | |
465 | } | |
466 | ||
467 | vaddr = addr & ~(PMD_SIZE * PMDS_PER_MID_PAGE - 1); | |
468 | ||
469 | for (i = 0; i < PMDS_PER_MID_PAGE; i++) { | |
470 | copy_page(pte_newpg[i], pte_pg); | |
471 | paravirt_alloc_pte(&init_mm, __pa(pte_newpg[i]) >> PAGE_SHIFT); | |
472 | ||
473 | pmdp = lookup_pmd_address(vaddr); | |
474 | BUG_ON(!pmdp); | |
475 | ||
476 | spin_lock_irqsave(&p2m_update_lock, flags); | |
477 | ||
478 | ptechk = lookup_address(vaddr, &level); | |
479 | if (ptechk == pte_pg) { | |
480 | set_pmd(pmdp, | |
481 | __pmd(__pa(pte_newpg[i]) | _KERNPG_TABLE)); | |
054954eb JG |
482 | pte_newpg[i] = NULL; |
483 | } | |
484 | ||
485 | spin_unlock_irqrestore(&p2m_update_lock, flags); | |
486 | ||
487 | if (pte_newpg[i]) { | |
488 | paravirt_release_pte(__pa(pte_newpg[i]) >> PAGE_SHIFT); | |
489 | free_p2m_page(pte_newpg[i]); | |
490 | } | |
491 | ||
492 | vaddr += PMD_SIZE; | |
493 | } | |
494 | ||
f241b0b8 | 495 | return lookup_address(addr, &level); |
054954eb JG |
496 | } |
497 | ||
a3118beb | 498 | /* |
b5eafe92 JF |
499 | * Fully allocate the p2m structure for a given pfn. We need to check |
500 | * that both the top and mid levels are allocated, and make sure the | |
501 | * parallel mfn tree is kept in sync. We may race with other cpus, so | |
502 | * the new pages are installed with cmpxchg; if we lose the race then | |
503 | * simply free the page we allocated and use the one that's there. | |
504 | */ | |
505 | static bool alloc_p2m(unsigned long pfn) | |
506 | { | |
507 | unsigned topidx, mididx; | |
b5eafe92 | 508 | unsigned long *top_mfn_p, *mid_mfn; |
054954eb JG |
509 | pte_t *ptep, *pte_pg; |
510 | unsigned int level; | |
511 | unsigned long flags; | |
512 | unsigned long addr = (unsigned long)(xen_p2m_addr + pfn); | |
513 | unsigned long p2m_pfn; | |
b5eafe92 JF |
514 | |
515 | topidx = p2m_top_index(pfn); | |
516 | mididx = p2m_mid_index(pfn); | |
517 | ||
054954eb JG |
518 | ptep = lookup_address(addr, &level); |
519 | BUG_ON(!ptep || level != PG_LEVEL_4K); | |
520 | pte_pg = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1)); | |
b5eafe92 | 521 | |
054954eb JG |
522 | if (pte_pg == p2m_missing_pte || pte_pg == p2m_identity_pte) { |
523 | /* PMD level is missing, allocate a new one */ | |
f241b0b8 | 524 | ptep = alloc_p2m_pmd(addr, pte_pg); |
054954eb | 525 | if (!ptep) |
b5eafe92 | 526 | return false; |
b5eafe92 JF |
527 | } |
528 | ||
054954eb JG |
529 | if (p2m_top_mfn) { |
530 | top_mfn_p = &p2m_top_mfn[topidx]; | |
531 | mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]); | |
b5eafe92 | 532 | |
054954eb | 533 | BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); |
b5eafe92 | 534 | |
054954eb JG |
535 | if (mid_mfn == p2m_mid_missing_mfn) { |
536 | /* Separately check the mid mfn level */ | |
537 | unsigned long missing_mfn; | |
538 | unsigned long mid_mfn_mfn; | |
539 | unsigned long old_mfn; | |
b5eafe92 | 540 | |
054954eb JG |
541 | mid_mfn = alloc_p2m_page(); |
542 | if (!mid_mfn) | |
543 | return false; | |
b5eafe92 | 544 | |
054954eb | 545 | p2m_mid_mfn_init(mid_mfn, p2m_missing); |
b5eafe92 | 546 | |
054954eb JG |
547 | missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); |
548 | mid_mfn_mfn = virt_to_mfn(mid_mfn); | |
549 | old_mfn = cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn); | |
550 | if (old_mfn != missing_mfn) { | |
551 | free_p2m_page(mid_mfn); | |
552 | mid_mfn = mfn_to_virt(old_mfn); | |
553 | } else { | |
554 | p2m_top_mfn_p[topidx] = mid_mfn; | |
555 | } | |
239af7c7 | 556 | } |
054954eb JG |
557 | } else { |
558 | mid_mfn = NULL; | |
b5eafe92 JF |
559 | } |
560 | ||
1760f1eb | 561 | p2m_pfn = pte_pfn(READ_ONCE(*ptep)); |
054954eb JG |
562 | if (p2m_pfn == PFN_DOWN(__pa(p2m_identity)) || |
563 | p2m_pfn == PFN_DOWN(__pa(p2m_missing))) { | |
b5eafe92 JF |
564 | /* p2m leaf page is missing */ |
565 | unsigned long *p2m; | |
566 | ||
567 | p2m = alloc_p2m_page(); | |
568 | if (!p2m) | |
569 | return false; | |
570 | ||
054954eb JG |
571 | if (p2m_pfn == PFN_DOWN(__pa(p2m_missing))) |
572 | p2m_init(p2m); | |
573 | else | |
b8f05c88 | 574 | p2m_init_identity(p2m, pfn & ~(P2M_PER_PAGE - 1)); |
054954eb JG |
575 | |
576 | spin_lock_irqsave(&p2m_update_lock, flags); | |
577 | ||
578 | if (pte_pfn(*ptep) == p2m_pfn) { | |
579 | set_pte(ptep, | |
580 | pfn_pte(PFN_DOWN(__pa(p2m)), PAGE_KERNEL)); | |
581 | if (mid_mfn) | |
582 | mid_mfn[mididx] = virt_to_mfn(p2m); | |
583 | p2m = NULL; | |
584 | } | |
585 | ||
586 | spin_unlock_irqrestore(&p2m_update_lock, flags); | |
b5eafe92 | 587 | |
054954eb | 588 | if (p2m) |
b5eafe92 | 589 | free_p2m_page(p2m); |
b5eafe92 JF |
590 | } |
591 | ||
592 | return true; | |
593 | } | |
594 | ||
b83c6e55 | 595 | unsigned long __init set_phys_range_identity(unsigned long pfn_s, |
f4cec35b KRW |
596 | unsigned long pfn_e) |
597 | { | |
598 | unsigned long pfn; | |
599 | ||
5b8e7d80 | 600 | if (unlikely(pfn_s >= xen_p2m_size)) |
f4cec35b KRW |
601 | return 0; |
602 | ||
603 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) | |
604 | return pfn_e - pfn_s; | |
605 | ||
606 | if (pfn_s > pfn_e) | |
607 | return 0; | |
608 | ||
5b8e7d80 JG |
609 | if (pfn_e > xen_p2m_size) |
610 | pfn_e = xen_p2m_size; | |
f4cec35b | 611 | |
5b8e7d80 JG |
612 | for (pfn = pfn_s; pfn < pfn_e; pfn++) |
613 | xen_p2m_addr[pfn] = IDENTITY_FRAME(pfn); | |
f4cec35b KRW |
614 | |
615 | return pfn - pfn_s; | |
616 | } | |
617 | ||
b5eafe92 JF |
618 | bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) |
619 | { | |
054954eb JG |
620 | pte_t *ptep; |
621 | unsigned int level; | |
b5eafe92 | 622 | |
2f558d40 SS |
623 | /* don't track P2M changes in autotranslate guests */ |
624 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) | |
6eaa412f | 625 | return true; |
2f558d40 | 626 | |
5b8e7d80 | 627 | if (unlikely(pfn >= xen_p2m_size)) { |
b5eafe92 JF |
628 | BUG_ON(mfn != INVALID_P2M_ENTRY); |
629 | return true; | |
630 | } | |
631 | ||
90fff3ea | 632 | if (likely(!xen_safe_write_ulong(xen_p2m_addr + pfn, mfn))) |
2e917175 JG |
633 | return true; |
634 | ||
054954eb JG |
635 | ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), &level); |
636 | BUG_ON(!ptep || level != PG_LEVEL_4K); | |
f4cec35b | 637 | |
054954eb | 638 | if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_missing))) |
b5eafe92 JF |
639 | return mfn == INVALID_P2M_ENTRY; |
640 | ||
054954eb JG |
641 | if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_identity))) |
642 | return mfn == IDENTITY_FRAME(pfn); | |
643 | ||
2e917175 | 644 | return false; |
b5eafe92 JF |
645 | } |
646 | ||
647 | bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) | |
648 | { | |
054954eb | 649 | if (unlikely(!__set_phys_to_machine(pfn, mfn))) { |
b5eafe92 JF |
650 | if (!alloc_p2m(pfn)) |
651 | return false; | |
652 | ||
054954eb | 653 | return __set_phys_to_machine(pfn, mfn); |
b5eafe92 JF |
654 | } |
655 | ||
656 | return true; | |
657 | } | |
448f2831 | 658 | |
820c4db2 JG |
659 | int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, |
660 | struct gnttab_map_grant_ref *kmap_ops, | |
661 | struct page **pages, unsigned int count) | |
1429d46d ZK |
662 | { |
663 | int i, ret = 0; | |
820c4db2 | 664 | pte_t *pte; |
1429d46d ZK |
665 | |
666 | if (xen_feature(XENFEAT_auto_translated_physmap)) | |
667 | return 0; | |
668 | ||
0bb599fd DV |
669 | if (kmap_ops) { |
670 | ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, | |
671 | kmap_ops, count); | |
672 | if (ret) | |
673 | goto out; | |
1429d46d ZK |
674 | } |
675 | ||
676 | for (i = 0; i < count; i++) { | |
820c4db2 | 677 | unsigned long mfn, pfn; |
1429d46d | 678 | |
820c4db2 JG |
679 | /* Do not add to override if the map failed. */ |
680 | if (map_ops[i].status) | |
681 | continue; | |
682 | ||
683 | if (map_ops[i].flags & GNTMAP_contains_pte) { | |
684 | pte = (pte_t *)(mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) + | |
685 | (map_ops[i].host_addr & ~PAGE_MASK)); | |
686 | mfn = pte_mfn(*pte); | |
687 | } else { | |
688 | mfn = PFN_DOWN(map_ops[i].dev_bus_addr); | |
1429d46d | 689 | } |
820c4db2 | 690 | pfn = page_to_pfn(pages[i]); |
1429d46d | 691 | |
0ae65f49 JH |
692 | WARN(pfn_to_mfn(pfn) != INVALID_P2M_ENTRY, "page must be ballooned"); |
693 | ||
820c4db2 JG |
694 | if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) { |
695 | ret = -ENOMEM; | |
1429d46d | 696 | goto out; |
820c4db2 | 697 | } |
1429d46d ZK |
698 | } |
699 | ||
700 | out: | |
1429d46d ZK |
701 | return ret; |
702 | } | |
820c4db2 | 703 | EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping); |
1429d46d | 704 | |
820c4db2 | 705 | int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, |
853d0289 | 706 | struct gnttab_unmap_grant_ref *kunmap_ops, |
820c4db2 | 707 | struct page **pages, unsigned int count) |
448f2831 | 708 | { |
820c4db2 | 709 | int i, ret = 0; |
448f2831 | 710 | |
820c4db2 JG |
711 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
712 | return 0; | |
448f2831 | 713 | |
820c4db2 | 714 | for (i = 0; i < count; i++) { |
0aad5689 | 715 | unsigned long mfn = __pfn_to_mfn(page_to_pfn(pages[i])); |
820c4db2 JG |
716 | unsigned long pfn = page_to_pfn(pages[i]); |
717 | ||
718 | if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) { | |
719 | ret = -EINVAL; | |
720 | goto out; | |
448f2831 | 721 | } |
448f2831 | 722 | |
0ae65f49 | 723 | set_phys_to_machine(pfn, INVALID_P2M_ENTRY); |
820c4db2 | 724 | } |
0bb599fd DV |
725 | if (kunmap_ops) |
726 | ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, | |
727 | kunmap_ops, count); | |
820c4db2 | 728 | out: |
448f2831 JF |
729 | return ret; |
730 | } | |
820c4db2 | 731 | EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping); |
448f2831 | 732 | |
2222e71b | 733 | #ifdef CONFIG_XEN_DEBUG_FS |
a867db10 KRW |
734 | #include <linux/debugfs.h> |
735 | #include "debugfs.h" | |
736 | static int p2m_dump_show(struct seq_file *m, void *v) | |
2222e71b | 737 | { |
a491dbef | 738 | static const char * const type_name[] = { |
054954eb JG |
739 | [P2M_TYPE_IDENTITY] = "identity", |
740 | [P2M_TYPE_MISSING] = "missing", | |
741 | [P2M_TYPE_PFN] = "pfn", | |
742 | [P2M_TYPE_UNKNOWN] = "abnormal"}; | |
743 | unsigned long pfn, first_pfn; | |
744 | int type, prev_type; | |
745 | ||
746 | prev_type = xen_p2m_elem_type(0); | |
747 | first_pfn = 0; | |
748 | ||
749 | for (pfn = 0; pfn < xen_p2m_size; pfn++) { | |
750 | type = xen_p2m_elem_type(pfn); | |
751 | if (type != prev_type) { | |
752 | seq_printf(m, " [0x%lx->0x%lx] %s\n", first_pfn, pfn, | |
753 | type_name[prev_type]); | |
2222e71b | 754 | prev_type = type; |
054954eb | 755 | first_pfn = pfn; |
2222e71b KRW |
756 | } |
757 | } | |
054954eb JG |
758 | seq_printf(m, " [0x%lx->0x%lx] %s\n", first_pfn, pfn, |
759 | type_name[prev_type]); | |
2222e71b | 760 | return 0; |
2222e71b | 761 | } |
a867db10 KRW |
762 | |
763 | static int p2m_dump_open(struct inode *inode, struct file *filp) | |
764 | { | |
765 | return single_open(filp, p2m_dump_show, NULL); | |
766 | } | |
767 | ||
768 | static const struct file_operations p2m_dump_fops = { | |
769 | .open = p2m_dump_open, | |
770 | .read = seq_read, | |
771 | .llseek = seq_lseek, | |
772 | .release = single_release, | |
773 | }; | |
774 | ||
775 | static struct dentry *d_mmu_debug; | |
776 | ||
777 | static int __init xen_p2m_debugfs(void) | |
778 | { | |
779 | struct dentry *d_xen = xen_init_debugfs(); | |
780 | ||
781 | if (d_xen == NULL) | |
782 | return -ENOMEM; | |
783 | ||
784 | d_mmu_debug = debugfs_create_dir("mmu", d_xen); | |
785 | ||
786 | debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops); | |
787 | return 0; | |
788 | } | |
789 | fs_initcall(xen_p2m_debugfs); | |
790 | #endif /* CONFIG_XEN_DEBUG_FS */ |