Commit | Line | Data |
---|---|---|
f4eb07c1 | 1 | /* |
f4eb07c1 HC |
2 | * Copyright IBM Corp. 2006 |
3 | * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> | |
4 | */ | |
5 | ||
6 | #include <linux/bootmem.h> | |
7 | #include <linux/pfn.h> | |
8 | #include <linux/mm.h> | |
9 | #include <linux/module.h> | |
10 | #include <linux/list.h> | |
53492b1d | 11 | #include <linux/hugetlb.h> |
5a0e3ad6 | 12 | #include <linux/slab.h> |
f4eb07c1 HC |
13 | #include <asm/pgalloc.h> |
14 | #include <asm/pgtable.h> | |
15 | #include <asm/setup.h> | |
16 | #include <asm/tlbflush.h> | |
53492b1d | 17 | #include <asm/sections.h> |
f4eb07c1 | 18 | |
f4eb07c1 HC |
19 | static DEFINE_MUTEX(vmem_mutex); |
20 | ||
21 | struct memory_segment { | |
22 | struct list_head list; | |
23 | unsigned long start; | |
24 | unsigned long size; | |
25 | }; | |
26 | ||
27 | static LIST_HEAD(mem_segs); | |
28 | ||
67060d9c HC |
29 | static void __ref *vmem_alloc_pages(unsigned int order) |
30 | { | |
31 | if (slab_is_available()) | |
32 | return (void *)__get_free_pages(GFP_KERNEL, order); | |
33 | return alloc_bootmem_pages((1 << order) * PAGE_SIZE); | |
34 | } | |
35 | ||
36 | static inline pud_t *vmem_pud_alloc(void) | |
5a216a20 MS |
37 | { |
38 | pud_t *pud = NULL; | |
39 | ||
40 | #ifdef CONFIG_64BIT | |
67060d9c | 41 | pud = vmem_alloc_pages(2); |
5a216a20 MS |
42 | if (!pud) |
43 | return NULL; | |
8fc63658 | 44 | clear_table((unsigned long *) pud, _REGION3_ENTRY_EMPTY, PAGE_SIZE * 4); |
5a216a20 MS |
45 | #endif |
46 | return pud; | |
47 | } | |
190a1d72 | 48 | |
67060d9c | 49 | static inline pmd_t *vmem_pmd_alloc(void) |
f4eb07c1 | 50 | { |
3610cce8 | 51 | pmd_t *pmd = NULL; |
f4eb07c1 | 52 | |
3610cce8 | 53 | #ifdef CONFIG_64BIT |
67060d9c | 54 | pmd = vmem_alloc_pages(2); |
f4eb07c1 HC |
55 | if (!pmd) |
56 | return NULL; | |
8fc63658 | 57 | clear_table((unsigned long *) pmd, _SEGMENT_ENTRY_EMPTY, PAGE_SIZE * 4); |
3610cce8 | 58 | #endif |
f4eb07c1 HC |
59 | return pmd; |
60 | } | |
61 | ||
e5992f2e | 62 | static pte_t __ref *vmem_pte_alloc(unsigned long address) |
f4eb07c1 | 63 | { |
146e4b3c | 64 | pte_t *pte; |
f4eb07c1 | 65 | |
146e4b3c | 66 | if (slab_is_available()) |
e5992f2e | 67 | pte = (pte_t *) page_table_alloc(&init_mm, address); |
146e4b3c MS |
68 | else |
69 | pte = alloc_bootmem(PTRS_PER_PTE * sizeof(pte_t)); | |
f4eb07c1 HC |
70 | if (!pte) |
71 | return NULL; | |
6af7eea2 CB |
72 | clear_table((unsigned long *) pte, _PAGE_TYPE_EMPTY, |
73 | PTRS_PER_PTE * sizeof(pte_t)); | |
f4eb07c1 HC |
74 | return pte; |
75 | } | |
76 | ||
77 | /* | |
78 | * Add a physical memory range to the 1:1 mapping. | |
79 | */ | |
17f34580 | 80 | static int vmem_add_mem(unsigned long start, unsigned long size, int ro) |
f4eb07c1 | 81 | { |
378b1e7a HC |
82 | unsigned long end = start + size; |
83 | unsigned long address = start; | |
f4eb07c1 | 84 | pgd_t *pg_dir; |
190a1d72 | 85 | pud_t *pu_dir; |
f4eb07c1 HC |
86 | pmd_t *pm_dir; |
87 | pte_t *pt_dir; | |
f4eb07c1 HC |
88 | int ret = -ENOMEM; |
89 | ||
378b1e7a | 90 | while (address < end) { |
f4eb07c1 HC |
91 | pg_dir = pgd_offset_k(address); |
92 | if (pgd_none(*pg_dir)) { | |
190a1d72 MS |
93 | pu_dir = vmem_pud_alloc(); |
94 | if (!pu_dir) | |
95 | goto out; | |
b2fa47e6 | 96 | pgd_populate(&init_mm, pg_dir, pu_dir); |
190a1d72 | 97 | } |
190a1d72 | 98 | pu_dir = pud_offset(pg_dir, address); |
18da2369 HC |
99 | #if defined(CONFIG_64BIT) && !defined(CONFIG_DEBUG_PAGEALLOC) |
100 | if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address && | |
101 | !(address & ~PUD_MASK) && (address + PUD_SIZE <= end)) { | |
abf09bed MS |
102 | pud_val(*pu_dir) = __pa(address) | |
103 | _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE | | |
104 | (ro ? _REGION_ENTRY_RO : 0); | |
18da2369 HC |
105 | address += PUD_SIZE; |
106 | continue; | |
107 | } | |
108 | #endif | |
190a1d72 | 109 | if (pud_none(*pu_dir)) { |
f4eb07c1 HC |
110 | pm_dir = vmem_pmd_alloc(); |
111 | if (!pm_dir) | |
112 | goto out; | |
b2fa47e6 | 113 | pud_populate(&init_mm, pu_dir, pm_dir); |
f4eb07c1 | 114 | } |
190a1d72 | 115 | pm_dir = pmd_offset(pu_dir, address); |
648609e3 | 116 | #if defined(CONFIG_64BIT) && !defined(CONFIG_DEBUG_PAGEALLOC) |
fc7e48aa HC |
117 | if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address && |
118 | !(address & ~PMD_MASK) && (address + PMD_SIZE <= end)) { | |
abf09bed MS |
119 | pmd_val(*pm_dir) = __pa(address) | |
120 | _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE | | |
121 | (ro ? _SEGMENT_ENTRY_RO : 0); | |
378b1e7a | 122 | address += PMD_SIZE; |
53492b1d GS |
123 | continue; |
124 | } | |
125 | #endif | |
f4eb07c1 | 126 | if (pmd_none(*pm_dir)) { |
e5992f2e | 127 | pt_dir = vmem_pte_alloc(address); |
f4eb07c1 HC |
128 | if (!pt_dir) |
129 | goto out; | |
b2fa47e6 | 130 | pmd_populate(&init_mm, pm_dir, pt_dir); |
f4eb07c1 HC |
131 | } |
132 | ||
133 | pt_dir = pte_offset_kernel(pm_dir, address); | |
abf09bed | 134 | pte_val(*pt_dir) = __pa(address) | (ro ? _PAGE_RO : 0); |
378b1e7a | 135 | address += PAGE_SIZE; |
f4eb07c1 HC |
136 | } |
137 | ret = 0; | |
138 | out: | |
378b1e7a | 139 | flush_tlb_kernel_range(start, end); |
f4eb07c1 HC |
140 | return ret; |
141 | } | |
142 | ||
143 | /* | |
144 | * Remove a physical memory range from the 1:1 mapping. | |
145 | * Currently only invalidates page table entries. | |
146 | */ | |
147 | static void vmem_remove_range(unsigned long start, unsigned long size) | |
148 | { | |
378b1e7a HC |
149 | unsigned long end = start + size; |
150 | unsigned long address = start; | |
f4eb07c1 | 151 | pgd_t *pg_dir; |
190a1d72 | 152 | pud_t *pu_dir; |
f4eb07c1 HC |
153 | pmd_t *pm_dir; |
154 | pte_t *pt_dir; | |
155 | pte_t pte; | |
156 | ||
157 | pte_val(pte) = _PAGE_TYPE_EMPTY; | |
378b1e7a | 158 | while (address < end) { |
f4eb07c1 | 159 | pg_dir = pgd_offset_k(address); |
fc7e48aa HC |
160 | if (pgd_none(*pg_dir)) { |
161 | address += PGDIR_SIZE; | |
162 | continue; | |
163 | } | |
190a1d72 | 164 | pu_dir = pud_offset(pg_dir, address); |
fc7e48aa HC |
165 | if (pud_none(*pu_dir)) { |
166 | address += PUD_SIZE; | |
f4eb07c1 | 167 | continue; |
fc7e48aa | 168 | } |
18da2369 HC |
169 | if (pud_large(*pu_dir)) { |
170 | pud_clear(pu_dir); | |
171 | address += PUD_SIZE; | |
172 | continue; | |
173 | } | |
190a1d72 | 174 | pm_dir = pmd_offset(pu_dir, address); |
fc7e48aa HC |
175 | if (pmd_none(*pm_dir)) { |
176 | address += PMD_SIZE; | |
f4eb07c1 | 177 | continue; |
fc7e48aa | 178 | } |
378b1e7a | 179 | if (pmd_large(*pm_dir)) { |
b2fa47e6 | 180 | pmd_clear(pm_dir); |
378b1e7a | 181 | address += PMD_SIZE; |
53492b1d GS |
182 | continue; |
183 | } | |
f4eb07c1 | 184 | pt_dir = pte_offset_kernel(pm_dir, address); |
c1821c2e | 185 | *pt_dir = pte; |
378b1e7a | 186 | address += PAGE_SIZE; |
f4eb07c1 | 187 | } |
378b1e7a | 188 | flush_tlb_kernel_range(start, end); |
f4eb07c1 HC |
189 | } |
190 | ||
191 | /* | |
192 | * Add a backed mem_map array to the virtual mem_map array. | |
193 | */ | |
17f34580 | 194 | int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) |
f4eb07c1 HC |
195 | { |
196 | unsigned long address, start_addr, end_addr; | |
f4eb07c1 | 197 | pgd_t *pg_dir; |
190a1d72 | 198 | pud_t *pu_dir; |
f4eb07c1 HC |
199 | pmd_t *pm_dir; |
200 | pte_t *pt_dir; | |
f4eb07c1 HC |
201 | int ret = -ENOMEM; |
202 | ||
17f34580 HC |
203 | start_addr = (unsigned long) start; |
204 | end_addr = (unsigned long) (start + nr); | |
f4eb07c1 | 205 | |
f7817968 | 206 | for (address = start_addr; address < end_addr;) { |
f4eb07c1 HC |
207 | pg_dir = pgd_offset_k(address); |
208 | if (pgd_none(*pg_dir)) { | |
190a1d72 MS |
209 | pu_dir = vmem_pud_alloc(); |
210 | if (!pu_dir) | |
211 | goto out; | |
b2fa47e6 | 212 | pgd_populate(&init_mm, pg_dir, pu_dir); |
190a1d72 MS |
213 | } |
214 | ||
215 | pu_dir = pud_offset(pg_dir, address); | |
216 | if (pud_none(*pu_dir)) { | |
f4eb07c1 HC |
217 | pm_dir = vmem_pmd_alloc(); |
218 | if (!pm_dir) | |
219 | goto out; | |
b2fa47e6 | 220 | pud_populate(&init_mm, pu_dir, pm_dir); |
f4eb07c1 HC |
221 | } |
222 | ||
190a1d72 | 223 | pm_dir = pmd_offset(pu_dir, address); |
f4eb07c1 | 224 | if (pmd_none(*pm_dir)) { |
f7817968 HC |
225 | #ifdef CONFIG_64BIT |
226 | /* Use 1MB frames for vmemmap if available. We always | |
227 | * use large frames even if they are only partially | |
228 | * used. | |
229 | * Otherwise we would have also page tables since | |
230 | * vmemmap_populate gets called for each section | |
231 | * separately. */ | |
232 | if (MACHINE_HAS_EDAT1) { | |
233 | void *new_page; | |
234 | ||
235 | new_page = vmemmap_alloc_block(PMD_SIZE, node); | |
236 | if (!new_page) | |
237 | goto out; | |
abf09bed MS |
238 | pmd_val(*pm_dir) = __pa(new_page) | |
239 | _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE; | |
f7817968 HC |
240 | address = (address + PMD_SIZE) & PMD_MASK; |
241 | continue; | |
242 | } | |
243 | #endif | |
e5992f2e | 244 | pt_dir = vmem_pte_alloc(address); |
f4eb07c1 HC |
245 | if (!pt_dir) |
246 | goto out; | |
b2fa47e6 | 247 | pmd_populate(&init_mm, pm_dir, pt_dir); |
f7817968 HC |
248 | } else if (pmd_large(*pm_dir)) { |
249 | address = (address + PMD_SIZE) & PMD_MASK; | |
250 | continue; | |
f4eb07c1 HC |
251 | } |
252 | ||
253 | pt_dir = pte_offset_kernel(pm_dir, address); | |
254 | if (pte_none(*pt_dir)) { | |
255 | unsigned long new_page; | |
256 | ||
67060d9c | 257 | new_page =__pa(vmem_alloc_pages(0)); |
f4eb07c1 HC |
258 | if (!new_page) |
259 | goto out; | |
abf09bed | 260 | pte_val(*pt_dir) = __pa(new_page); |
f4eb07c1 | 261 | } |
f7817968 | 262 | address += PAGE_SIZE; |
f4eb07c1 | 263 | } |
67060d9c | 264 | memset(start, 0, nr * sizeof(struct page)); |
f4eb07c1 HC |
265 | ret = 0; |
266 | out: | |
267 | flush_tlb_kernel_range(start_addr, end_addr); | |
268 | return ret; | |
269 | } | |
270 | ||
f4eb07c1 HC |
271 | /* |
272 | * Add memory segment to the segment list if it doesn't overlap with | |
273 | * an already present segment. | |
274 | */ | |
275 | static int insert_memory_segment(struct memory_segment *seg) | |
276 | { | |
277 | struct memory_segment *tmp; | |
278 | ||
ee0ddadd | 279 | if (seg->start + seg->size > VMEM_MAX_PHYS || |
f4eb07c1 HC |
280 | seg->start + seg->size < seg->start) |
281 | return -ERANGE; | |
282 | ||
283 | list_for_each_entry(tmp, &mem_segs, list) { | |
284 | if (seg->start >= tmp->start + tmp->size) | |
285 | continue; | |
286 | if (seg->start + seg->size <= tmp->start) | |
287 | continue; | |
288 | return -ENOSPC; | |
289 | } | |
290 | list_add(&seg->list, &mem_segs); | |
291 | return 0; | |
292 | } | |
293 | ||
294 | /* | |
295 | * Remove memory segment from the segment list. | |
296 | */ | |
297 | static void remove_memory_segment(struct memory_segment *seg) | |
298 | { | |
299 | list_del(&seg->list); | |
300 | } | |
301 | ||
302 | static void __remove_shared_memory(struct memory_segment *seg) | |
303 | { | |
304 | remove_memory_segment(seg); | |
305 | vmem_remove_range(seg->start, seg->size); | |
306 | } | |
307 | ||
17f34580 | 308 | int vmem_remove_mapping(unsigned long start, unsigned long size) |
f4eb07c1 HC |
309 | { |
310 | struct memory_segment *seg; | |
311 | int ret; | |
312 | ||
313 | mutex_lock(&vmem_mutex); | |
314 | ||
315 | ret = -ENOENT; | |
316 | list_for_each_entry(seg, &mem_segs, list) { | |
317 | if (seg->start == start && seg->size == size) | |
318 | break; | |
319 | } | |
320 | ||
321 | if (seg->start != start || seg->size != size) | |
322 | goto out; | |
323 | ||
324 | ret = 0; | |
325 | __remove_shared_memory(seg); | |
326 | kfree(seg); | |
327 | out: | |
328 | mutex_unlock(&vmem_mutex); | |
329 | return ret; | |
330 | } | |
331 | ||
17f34580 | 332 | int vmem_add_mapping(unsigned long start, unsigned long size) |
f4eb07c1 HC |
333 | { |
334 | struct memory_segment *seg; | |
f4eb07c1 HC |
335 | int ret; |
336 | ||
337 | mutex_lock(&vmem_mutex); | |
338 | ret = -ENOMEM; | |
339 | seg = kzalloc(sizeof(*seg), GFP_KERNEL); | |
340 | if (!seg) | |
341 | goto out; | |
342 | seg->start = start; | |
343 | seg->size = size; | |
344 | ||
345 | ret = insert_memory_segment(seg); | |
346 | if (ret) | |
347 | goto out_free; | |
348 | ||
53492b1d | 349 | ret = vmem_add_mem(start, size, 0); |
f4eb07c1 HC |
350 | if (ret) |
351 | goto out_remove; | |
f4eb07c1 HC |
352 | goto out; |
353 | ||
354 | out_remove: | |
355 | __remove_shared_memory(seg); | |
356 | out_free: | |
357 | kfree(seg); | |
358 | out: | |
359 | mutex_unlock(&vmem_mutex); | |
360 | return ret; | |
361 | } | |
362 | ||
363 | /* | |
364 | * map whole physical memory to virtual memory (identity mapping) | |
5fd9c6e2 CB |
365 | * we reserve enough space in the vmalloc area for vmemmap to hotplug |
366 | * additional memory segments. | |
f4eb07c1 HC |
367 | */ |
368 | void __init vmem_map_init(void) | |
369 | { | |
53492b1d GS |
370 | unsigned long ro_start, ro_end; |
371 | unsigned long start, end; | |
f4eb07c1 HC |
372 | int i; |
373 | ||
8fe234d3 HC |
374 | ro_start = PFN_ALIGN((unsigned long)&_stext); |
375 | ro_end = (unsigned long)&_eshared & PAGE_MASK; | |
53492b1d | 376 | for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { |
60a0c68d MH |
377 | if (memory_chunk[i].type == CHUNK_CRASHK || |
378 | memory_chunk[i].type == CHUNK_OLDMEM) | |
379 | continue; | |
53492b1d GS |
380 | start = memory_chunk[i].addr; |
381 | end = memory_chunk[i].addr + memory_chunk[i].size; | |
382 | if (start >= ro_end || end <= ro_start) | |
383 | vmem_add_mem(start, end - start, 0); | |
384 | else if (start >= ro_start && end <= ro_end) | |
385 | vmem_add_mem(start, end - start, 1); | |
386 | else if (start >= ro_start) { | |
387 | vmem_add_mem(start, ro_end - start, 1); | |
388 | vmem_add_mem(ro_end, end - ro_end, 0); | |
389 | } else if (end < ro_end) { | |
390 | vmem_add_mem(start, ro_start - start, 0); | |
391 | vmem_add_mem(ro_start, end - ro_start, 1); | |
392 | } else { | |
393 | vmem_add_mem(start, ro_start - start, 0); | |
394 | vmem_add_mem(ro_start, ro_end - ro_start, 1); | |
395 | vmem_add_mem(ro_end, end - ro_end, 0); | |
396 | } | |
397 | } | |
f4eb07c1 HC |
398 | } |
399 | ||
400 | /* | |
401 | * Convert memory chunk array to a memory segment list so there is a single | |
402 | * list that contains both r/w memory and shared memory segments. | |
403 | */ | |
404 | static int __init vmem_convert_memory_chunk(void) | |
405 | { | |
406 | struct memory_segment *seg; | |
407 | int i; | |
408 | ||
409 | mutex_lock(&vmem_mutex); | |
9f4b0ba8 | 410 | for (i = 0; i < MEMORY_CHUNKS; i++) { |
f4eb07c1 HC |
411 | if (!memory_chunk[i].size) |
412 | continue; | |
60a0c68d MH |
413 | if (memory_chunk[i].type == CHUNK_CRASHK || |
414 | memory_chunk[i].type == CHUNK_OLDMEM) | |
415 | continue; | |
f4eb07c1 HC |
416 | seg = kzalloc(sizeof(*seg), GFP_KERNEL); |
417 | if (!seg) | |
418 | panic("Out of memory...\n"); | |
419 | seg->start = memory_chunk[i].addr; | |
420 | seg->size = memory_chunk[i].size; | |
421 | insert_memory_segment(seg); | |
422 | } | |
423 | mutex_unlock(&vmem_mutex); | |
424 | return 0; | |
425 | } | |
426 | ||
427 | core_initcall(vmem_convert_memory_chunk); |