Commit | Line | Data |
---|---|---|
09b55412 CM |
1 | /* |
2 | * SWIOTLB-based DMA API implementation | |
3 | * | |
4 | * Copyright (C) 2012 ARM Ltd. | |
5 | * Author: Catalin Marinas <catalin.marinas@arm.com> | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License version 2 as | |
9 | * published by the Free Software Foundation. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
18 | */ | |
19 | ||
20 | #include <linux/gfp.h> | |
21 | #include <linux/export.h> | |
22 | #include <linux/slab.h> | |
d4932f9e | 23 | #include <linux/genalloc.h> |
09b55412 | 24 | #include <linux/dma-mapping.h> |
6ac2104d | 25 | #include <linux/dma-contiguous.h> |
09b55412 CM |
26 | #include <linux/vmalloc.h> |
27 | #include <linux/swiotlb.h> | |
28 | ||
29 | #include <asm/cacheflush.h> | |
30 | ||
31 | struct dma_map_ops *dma_ops; | |
32 | EXPORT_SYMBOL(dma_ops); | |
33 | ||
214fdbe7 LA |
34 | static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot, |
35 | bool coherent) | |
36 | { | |
196adf2f | 37 | if (!coherent || dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs)) |
214fdbe7 | 38 | return pgprot_writecombine(prot); |
214fdbe7 LA |
39 | return prot; |
40 | } | |
41 | ||
d4932f9e LA |
42 | static struct gen_pool *atomic_pool; |
43 | ||
44 | #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K | |
45 | static size_t atomic_pool_size = DEFAULT_DMA_COHERENT_POOL_SIZE; | |
46 | ||
47 | static int __init early_coherent_pool(char *p) | |
48 | { | |
49 | atomic_pool_size = memparse(p, &p); | |
50 | return 0; | |
51 | } | |
52 | early_param("coherent_pool", early_coherent_pool); | |
53 | ||
7132813c | 54 | static void *__alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags) |
d4932f9e LA |
55 | { |
56 | unsigned long val; | |
57 | void *ptr = NULL; | |
58 | ||
59 | if (!atomic_pool) { | |
60 | WARN(1, "coherent pool not initialised!\n"); | |
61 | return NULL; | |
62 | } | |
63 | ||
64 | val = gen_pool_alloc(atomic_pool, size); | |
65 | if (val) { | |
66 | phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val); | |
67 | ||
68 | *ret_page = phys_to_page(phys); | |
69 | ptr = (void *)val; | |
6829e274 | 70 | memset(ptr, 0, size); |
d4932f9e LA |
71 | } |
72 | ||
73 | return ptr; | |
74 | } | |
75 | ||
76 | static bool __in_atomic_pool(void *start, size_t size) | |
77 | { | |
78 | return addr_in_gen_pool(atomic_pool, (unsigned long)start, size); | |
79 | } | |
80 | ||
81 | static int __free_from_pool(void *start, size_t size) | |
82 | { | |
83 | if (!__in_atomic_pool(start, size)) | |
84 | return 0; | |
85 | ||
86 | gen_pool_free(atomic_pool, (unsigned long)start, size); | |
87 | ||
88 | return 1; | |
89 | } | |
90 | ||
bb10eb7b RH |
91 | static void *__dma_alloc_coherent(struct device *dev, size_t size, |
92 | dma_addr_t *dma_handle, gfp_t flags, | |
93 | struct dma_attrs *attrs) | |
09b55412 | 94 | { |
c666e8d5 LA |
95 | if (dev == NULL) { |
96 | WARN_ONCE(1, "Use an actual device structure for DMA allocation\n"); | |
97 | return NULL; | |
98 | } | |
99 | ||
19e7640d | 100 | if (IS_ENABLED(CONFIG_ZONE_DMA) && |
09b55412 | 101 | dev->coherent_dma_mask <= DMA_BIT_MASK(32)) |
19e7640d | 102 | flags |= GFP_DMA; |
ba9cc453 | 103 | if (dev_get_cma_area(dev) && (flags & __GFP_WAIT)) { |
6ac2104d | 104 | struct page *page; |
7132813c | 105 | void *addr; |
6ac2104d LA |
106 | |
107 | page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, | |
108 | get_order(size)); | |
109 | if (!page) | |
110 | return NULL; | |
111 | ||
112 | *dma_handle = phys_to_dma(dev, page_to_phys(page)); | |
7132813c | 113 | addr = page_address(page); |
6829e274 | 114 | memset(addr, 0, size); |
7132813c | 115 | return addr; |
6ac2104d LA |
116 | } else { |
117 | return swiotlb_alloc_coherent(dev, size, dma_handle, flags); | |
118 | } | |
09b55412 CM |
119 | } |
120 | ||
bb10eb7b RH |
121 | static void __dma_free_coherent(struct device *dev, size_t size, |
122 | void *vaddr, dma_addr_t dma_handle, | |
123 | struct dma_attrs *attrs) | |
09b55412 | 124 | { |
d4932f9e LA |
125 | bool freed; |
126 | phys_addr_t paddr = dma_to_phys(dev, dma_handle); | |
127 | ||
c666e8d5 LA |
128 | if (dev == NULL) { |
129 | WARN_ONCE(1, "Use an actual device structure for DMA allocation\n"); | |
130 | return; | |
131 | } | |
132 | ||
d4932f9e | 133 | freed = dma_release_from_contiguous(dev, |
6ac2104d LA |
134 | phys_to_page(paddr), |
135 | size >> PAGE_SHIFT); | |
d4932f9e | 136 | if (!freed) |
6ac2104d | 137 | swiotlb_free_coherent(dev, size, vaddr, dma_handle); |
09b55412 CM |
138 | } |
139 | ||
9d3bfbb4 CM |
140 | static void *__dma_alloc(struct device *dev, size_t size, |
141 | dma_addr_t *dma_handle, gfp_t flags, | |
142 | struct dma_attrs *attrs) | |
7363590d | 143 | { |
d4932f9e | 144 | struct page *page; |
7363590d | 145 | void *ptr, *coherent_ptr; |
9d3bfbb4 | 146 | bool coherent = is_device_dma_coherent(dev); |
97942c28 | 147 | pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, false); |
7363590d CM |
148 | |
149 | size = PAGE_ALIGN(size); | |
d4932f9e | 150 | |
9d3bfbb4 | 151 | if (!coherent && !(flags & __GFP_WAIT)) { |
d4932f9e | 152 | struct page *page = NULL; |
7132813c | 153 | void *addr = __alloc_from_pool(size, &page, flags); |
d4932f9e LA |
154 | |
155 | if (addr) | |
156 | *dma_handle = phys_to_dma(dev, page_to_phys(page)); | |
157 | ||
158 | return addr; | |
d4932f9e | 159 | } |
7363590d CM |
160 | |
161 | ptr = __dma_alloc_coherent(dev, size, dma_handle, flags, attrs); | |
162 | if (!ptr) | |
163 | goto no_mem; | |
7363590d | 164 | |
9d3bfbb4 CM |
165 | /* no need for non-cacheable mapping if coherent */ |
166 | if (coherent) | |
167 | return ptr; | |
168 | ||
7363590d CM |
169 | /* remove any dirty cache lines on the kernel alias */ |
170 | __dma_flush_range(ptr, ptr + size); | |
171 | ||
172 | /* create a coherent mapping */ | |
173 | page = virt_to_page(ptr); | |
d4932f9e | 174 | coherent_ptr = dma_common_contiguous_remap(page, size, VM_USERMAP, |
97942c28 | 175 | prot, NULL); |
7363590d CM |
176 | if (!coherent_ptr) |
177 | goto no_map; | |
178 | ||
179 | return coherent_ptr; | |
180 | ||
181 | no_map: | |
182 | __dma_free_coherent(dev, size, ptr, *dma_handle, attrs); | |
183 | no_mem: | |
a52ce121 | 184 | *dma_handle = DMA_ERROR_CODE; |
7363590d CM |
185 | return NULL; |
186 | } | |
187 | ||
9d3bfbb4 CM |
188 | static void __dma_free(struct device *dev, size_t size, |
189 | void *vaddr, dma_addr_t dma_handle, | |
190 | struct dma_attrs *attrs) | |
7363590d CM |
191 | { |
192 | void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle)); | |
193 | ||
2cff98b9 DN |
194 | size = PAGE_ALIGN(size); |
195 | ||
9d3bfbb4 CM |
196 | if (!is_device_dma_coherent(dev)) { |
197 | if (__free_from_pool(vaddr, size)) | |
198 | return; | |
199 | vunmap(vaddr); | |
200 | } | |
7363590d CM |
201 | __dma_free_coherent(dev, size, swiotlb_addr, dma_handle, attrs); |
202 | } | |
203 | ||
204 | static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page, | |
205 | unsigned long offset, size_t size, | |
206 | enum dma_data_direction dir, | |
207 | struct dma_attrs *attrs) | |
208 | { | |
209 | dma_addr_t dev_addr; | |
210 | ||
211 | dev_addr = swiotlb_map_page(dev, page, offset, size, dir, attrs); | |
9d3bfbb4 CM |
212 | if (!is_device_dma_coherent(dev)) |
213 | __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); | |
7363590d CM |
214 | |
215 | return dev_addr; | |
216 | } | |
217 | ||
218 | ||
219 | static void __swiotlb_unmap_page(struct device *dev, dma_addr_t dev_addr, | |
220 | size_t size, enum dma_data_direction dir, | |
221 | struct dma_attrs *attrs) | |
222 | { | |
9d3bfbb4 CM |
223 | if (!is_device_dma_coherent(dev)) |
224 | __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); | |
7363590d CM |
225 | swiotlb_unmap_page(dev, dev_addr, size, dir, attrs); |
226 | } | |
227 | ||
228 | static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl, | |
229 | int nelems, enum dma_data_direction dir, | |
230 | struct dma_attrs *attrs) | |
231 | { | |
232 | struct scatterlist *sg; | |
233 | int i, ret; | |
234 | ||
235 | ret = swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs); | |
9d3bfbb4 CM |
236 | if (!is_device_dma_coherent(dev)) |
237 | for_each_sg(sgl, sg, ret, i) | |
238 | __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), | |
239 | sg->length, dir); | |
7363590d CM |
240 | |
241 | return ret; | |
242 | } | |
243 | ||
244 | static void __swiotlb_unmap_sg_attrs(struct device *dev, | |
245 | struct scatterlist *sgl, int nelems, | |
246 | enum dma_data_direction dir, | |
247 | struct dma_attrs *attrs) | |
248 | { | |
249 | struct scatterlist *sg; | |
250 | int i; | |
251 | ||
9d3bfbb4 CM |
252 | if (!is_device_dma_coherent(dev)) |
253 | for_each_sg(sgl, sg, nelems, i) | |
254 | __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), | |
255 | sg->length, dir); | |
7363590d CM |
256 | swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs); |
257 | } | |
258 | ||
259 | static void __swiotlb_sync_single_for_cpu(struct device *dev, | |
260 | dma_addr_t dev_addr, size_t size, | |
261 | enum dma_data_direction dir) | |
262 | { | |
9d3bfbb4 CM |
263 | if (!is_device_dma_coherent(dev)) |
264 | __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); | |
7363590d CM |
265 | swiotlb_sync_single_for_cpu(dev, dev_addr, size, dir); |
266 | } | |
267 | ||
268 | static void __swiotlb_sync_single_for_device(struct device *dev, | |
269 | dma_addr_t dev_addr, size_t size, | |
270 | enum dma_data_direction dir) | |
271 | { | |
272 | swiotlb_sync_single_for_device(dev, dev_addr, size, dir); | |
9d3bfbb4 CM |
273 | if (!is_device_dma_coherent(dev)) |
274 | __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); | |
7363590d CM |
275 | } |
276 | ||
277 | static void __swiotlb_sync_sg_for_cpu(struct device *dev, | |
278 | struct scatterlist *sgl, int nelems, | |
279 | enum dma_data_direction dir) | |
280 | { | |
281 | struct scatterlist *sg; | |
282 | int i; | |
283 | ||
9d3bfbb4 CM |
284 | if (!is_device_dma_coherent(dev)) |
285 | for_each_sg(sgl, sg, nelems, i) | |
286 | __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), | |
287 | sg->length, dir); | |
7363590d CM |
288 | swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir); |
289 | } | |
290 | ||
291 | static void __swiotlb_sync_sg_for_device(struct device *dev, | |
292 | struct scatterlist *sgl, int nelems, | |
293 | enum dma_data_direction dir) | |
294 | { | |
295 | struct scatterlist *sg; | |
296 | int i; | |
297 | ||
298 | swiotlb_sync_sg_for_device(dev, sgl, nelems, dir); | |
9d3bfbb4 CM |
299 | if (!is_device_dma_coherent(dev)) |
300 | for_each_sg(sgl, sg, nelems, i) | |
301 | __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), | |
302 | sg->length, dir); | |
7363590d CM |
303 | } |
304 | ||
aaf6f2f0 RM |
305 | static int __swiotlb_mmap(struct device *dev, |
306 | struct vm_area_struct *vma, | |
307 | void *cpu_addr, dma_addr_t dma_addr, size_t size, | |
308 | struct dma_attrs *attrs) | |
6e8d7968 LA |
309 | { |
310 | int ret = -ENXIO; | |
311 | unsigned long nr_vma_pages = (vma->vm_end - vma->vm_start) >> | |
312 | PAGE_SHIFT; | |
313 | unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; | |
314 | unsigned long pfn = dma_to_phys(dev, dma_addr) >> PAGE_SHIFT; | |
315 | unsigned long off = vma->vm_pgoff; | |
316 | ||
aaf6f2f0 RM |
317 | vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, |
318 | is_device_dma_coherent(dev)); | |
319 | ||
6e8d7968 LA |
320 | if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) |
321 | return ret; | |
322 | ||
323 | if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) { | |
324 | ret = remap_pfn_range(vma, vma->vm_start, | |
325 | pfn + off, | |
326 | vma->vm_end - vma->vm_start, | |
327 | vma->vm_page_prot); | |
328 | } | |
329 | ||
330 | return ret; | |
331 | } | |
332 | ||
1d1ddf67 RM |
333 | static int __swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt, |
334 | void *cpu_addr, dma_addr_t handle, size_t size, | |
335 | struct dma_attrs *attrs) | |
336 | { | |
337 | int ret = sg_alloc_table(sgt, 1, GFP_KERNEL); | |
338 | ||
339 | if (!ret) | |
340 | sg_set_page(sgt->sgl, phys_to_page(dma_to_phys(dev, handle)), | |
341 | PAGE_ALIGN(size), 0); | |
342 | ||
343 | return ret; | |
344 | } | |
345 | ||
9d3bfbb4 CM |
346 | static struct dma_map_ops swiotlb_dma_ops = { |
347 | .alloc = __dma_alloc, | |
348 | .free = __dma_free, | |
349 | .mmap = __swiotlb_mmap, | |
1d1ddf67 | 350 | .get_sgtable = __swiotlb_get_sgtable, |
7363590d CM |
351 | .map_page = __swiotlb_map_page, |
352 | .unmap_page = __swiotlb_unmap_page, | |
353 | .map_sg = __swiotlb_map_sg_attrs, | |
354 | .unmap_sg = __swiotlb_unmap_sg_attrs, | |
355 | .sync_single_for_cpu = __swiotlb_sync_single_for_cpu, | |
356 | .sync_single_for_device = __swiotlb_sync_single_for_device, | |
357 | .sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu, | |
358 | .sync_sg_for_device = __swiotlb_sync_sg_for_device, | |
359 | .dma_supported = swiotlb_dma_supported, | |
360 | .mapping_error = swiotlb_dma_mapping_error, | |
361 | }; | |
09b55412 | 362 | |
d4932f9e LA |
363 | static int __init atomic_pool_init(void) |
364 | { | |
365 | pgprot_t prot = __pgprot(PROT_NORMAL_NC); | |
366 | unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT; | |
367 | struct page *page; | |
368 | void *addr; | |
369 | unsigned int pool_size_order = get_order(atomic_pool_size); | |
370 | ||
371 | if (dev_get_cma_area(NULL)) | |
372 | page = dma_alloc_from_contiguous(NULL, nr_pages, | |
373 | pool_size_order); | |
374 | else | |
375 | page = alloc_pages(GFP_DMA, pool_size_order); | |
376 | ||
377 | if (page) { | |
378 | int ret; | |
379 | void *page_addr = page_address(page); | |
380 | ||
381 | memset(page_addr, 0, atomic_pool_size); | |
382 | __dma_flush_range(page_addr, page_addr + atomic_pool_size); | |
383 | ||
384 | atomic_pool = gen_pool_create(PAGE_SHIFT, -1); | |
385 | if (!atomic_pool) | |
386 | goto free_page; | |
387 | ||
388 | addr = dma_common_contiguous_remap(page, atomic_pool_size, | |
389 | VM_USERMAP, prot, atomic_pool_init); | |
390 | ||
391 | if (!addr) | |
392 | goto destroy_genpool; | |
393 | ||
394 | ret = gen_pool_add_virt(atomic_pool, (unsigned long)addr, | |
395 | page_to_phys(page), | |
396 | atomic_pool_size, -1); | |
397 | if (ret) | |
398 | goto remove_mapping; | |
399 | ||
400 | gen_pool_set_algo(atomic_pool, | |
401 | gen_pool_first_fit_order_align, | |
402 | (void *)PAGE_SHIFT); | |
403 | ||
404 | pr_info("DMA: preallocated %zu KiB pool for atomic allocations\n", | |
405 | atomic_pool_size / 1024); | |
406 | return 0; | |
407 | } | |
408 | goto out; | |
409 | ||
410 | remove_mapping: | |
411 | dma_common_free_remap(addr, atomic_pool_size, VM_USERMAP); | |
412 | destroy_genpool: | |
413 | gen_pool_destroy(atomic_pool); | |
414 | atomic_pool = NULL; | |
415 | free_page: | |
416 | if (!dma_release_from_contiguous(NULL, page, nr_pages)) | |
417 | __free_pages(page, pool_size_order); | |
418 | out: | |
419 | pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n", | |
420 | atomic_pool_size / 1024); | |
421 | return -ENOMEM; | |
422 | } | |
423 | ||
b6197b93 SS |
424 | /******************************************** |
425 | * The following APIs are for dummy DMA ops * | |
426 | ********************************************/ | |
427 | ||
428 | static void *__dummy_alloc(struct device *dev, size_t size, | |
429 | dma_addr_t *dma_handle, gfp_t flags, | |
430 | struct dma_attrs *attrs) | |
431 | { | |
432 | return NULL; | |
433 | } | |
434 | ||
435 | static void __dummy_free(struct device *dev, size_t size, | |
436 | void *vaddr, dma_addr_t dma_handle, | |
437 | struct dma_attrs *attrs) | |
438 | { | |
439 | } | |
440 | ||
441 | static int __dummy_mmap(struct device *dev, | |
442 | struct vm_area_struct *vma, | |
443 | void *cpu_addr, dma_addr_t dma_addr, size_t size, | |
444 | struct dma_attrs *attrs) | |
445 | { | |
446 | return -ENXIO; | |
447 | } | |
448 | ||
449 | static dma_addr_t __dummy_map_page(struct device *dev, struct page *page, | |
450 | unsigned long offset, size_t size, | |
451 | enum dma_data_direction dir, | |
452 | struct dma_attrs *attrs) | |
453 | { | |
454 | return DMA_ERROR_CODE; | |
455 | } | |
456 | ||
457 | static void __dummy_unmap_page(struct device *dev, dma_addr_t dev_addr, | |
458 | size_t size, enum dma_data_direction dir, | |
459 | struct dma_attrs *attrs) | |
460 | { | |
461 | } | |
462 | ||
463 | static int __dummy_map_sg(struct device *dev, struct scatterlist *sgl, | |
464 | int nelems, enum dma_data_direction dir, | |
465 | struct dma_attrs *attrs) | |
466 | { | |
467 | return 0; | |
468 | } | |
469 | ||
470 | static void __dummy_unmap_sg(struct device *dev, | |
471 | struct scatterlist *sgl, int nelems, | |
472 | enum dma_data_direction dir, | |
473 | struct dma_attrs *attrs) | |
474 | { | |
475 | } | |
476 | ||
477 | static void __dummy_sync_single(struct device *dev, | |
478 | dma_addr_t dev_addr, size_t size, | |
479 | enum dma_data_direction dir) | |
480 | { | |
481 | } | |
482 | ||
483 | static void __dummy_sync_sg(struct device *dev, | |
484 | struct scatterlist *sgl, int nelems, | |
485 | enum dma_data_direction dir) | |
486 | { | |
487 | } | |
488 | ||
489 | static int __dummy_mapping_error(struct device *hwdev, dma_addr_t dma_addr) | |
490 | { | |
491 | return 1; | |
492 | } | |
493 | ||
494 | static int __dummy_dma_supported(struct device *hwdev, u64 mask) | |
495 | { | |
496 | return 0; | |
497 | } | |
498 | ||
499 | struct dma_map_ops dummy_dma_ops = { | |
500 | .alloc = __dummy_alloc, | |
501 | .free = __dummy_free, | |
502 | .mmap = __dummy_mmap, | |
503 | .map_page = __dummy_map_page, | |
504 | .unmap_page = __dummy_unmap_page, | |
505 | .map_sg = __dummy_map_sg, | |
506 | .unmap_sg = __dummy_unmap_sg, | |
507 | .sync_single_for_cpu = __dummy_sync_single, | |
508 | .sync_single_for_device = __dummy_sync_single, | |
509 | .sync_sg_for_cpu = __dummy_sync_sg, | |
510 | .sync_sg_for_device = __dummy_sync_sg, | |
511 | .mapping_error = __dummy_mapping_error, | |
512 | .dma_supported = __dummy_dma_supported, | |
513 | }; | |
514 | EXPORT_SYMBOL(dummy_dma_ops); | |
515 | ||
a1e50a82 | 516 | static int __init arm64_dma_init(void) |
09b55412 | 517 | { |
a1e50a82 | 518 | int ret; |
3690951f | 519 | |
9d3bfbb4 | 520 | dma_ops = &swiotlb_dma_ops; |
3690951f | 521 | |
a1e50a82 | 522 | ret = atomic_pool_init(); |
d4932f9e LA |
523 | |
524 | return ret; | |
525 | } | |
526 | arch_initcall(arm64_dma_init); | |
09b55412 CM |
527 | |
528 | #define PREALLOC_DMA_DEBUG_ENTRIES 4096 | |
529 | ||
530 | static int __init dma_debug_do_init(void) | |
531 | { | |
532 | dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); | |
533 | return 0; | |
534 | } | |
535 | fs_initcall(dma_debug_do_init); | |
13b8629f RM |
536 | |
537 | ||
538 | #ifdef CONFIG_IOMMU_DMA | |
539 | #include <linux/dma-iommu.h> | |
540 | #include <linux/platform_device.h> | |
541 | #include <linux/amba/bus.h> | |
542 | ||
543 | /* Thankfully, all cache ops are by VA so we can ignore phys here */ | |
544 | static void flush_page(struct device *dev, const void *virt, phys_addr_t phys) | |
545 | { | |
546 | __dma_flush_range(virt, virt + PAGE_SIZE); | |
547 | } | |
548 | ||
549 | static void *__iommu_alloc_attrs(struct device *dev, size_t size, | |
550 | dma_addr_t *handle, gfp_t gfp, | |
551 | struct dma_attrs *attrs) | |
552 | { | |
553 | bool coherent = is_device_dma_coherent(dev); | |
554 | int ioprot = dma_direction_to_prot(DMA_BIDIRECTIONAL, coherent); | |
555 | void *addr; | |
556 | ||
557 | if (WARN(!dev, "cannot create IOMMU mapping for unknown device\n")) | |
558 | return NULL; | |
559 | /* | |
560 | * Some drivers rely on this, and we probably don't want the | |
561 | * possibility of stale kernel data being read by devices anyway. | |
562 | */ | |
563 | gfp |= __GFP_ZERO; | |
564 | ||
565 | if (gfp & __GFP_WAIT) { | |
566 | struct page **pages; | |
567 | pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent); | |
568 | ||
569 | pages = iommu_dma_alloc(dev, size, gfp, ioprot, handle, | |
570 | flush_page); | |
571 | if (!pages) | |
572 | return NULL; | |
573 | ||
574 | addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot, | |
575 | __builtin_return_address(0)); | |
576 | if (!addr) | |
577 | iommu_dma_free(dev, pages, size, handle); | |
578 | } else { | |
579 | struct page *page; | |
580 | /* | |
581 | * In atomic context we can't remap anything, so we'll only | |
582 | * get the virtually contiguous buffer we need by way of a | |
583 | * physically contiguous allocation. | |
584 | */ | |
585 | if (coherent) { | |
586 | page = alloc_pages(gfp, get_order(size)); | |
587 | addr = page ? page_address(page) : NULL; | |
588 | } else { | |
589 | addr = __alloc_from_pool(size, &page, gfp); | |
590 | } | |
591 | if (!addr) | |
592 | return NULL; | |
593 | ||
594 | *handle = iommu_dma_map_page(dev, page, 0, size, ioprot); | |
595 | if (iommu_dma_mapping_error(dev, *handle)) { | |
596 | if (coherent) | |
597 | __free_pages(page, get_order(size)); | |
598 | else | |
599 | __free_from_pool(addr, size); | |
600 | addr = NULL; | |
601 | } | |
602 | } | |
603 | return addr; | |
604 | } | |
605 | ||
606 | static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, | |
607 | dma_addr_t handle, struct dma_attrs *attrs) | |
608 | { | |
609 | /* | |
610 | * @cpu_addr will be one of 3 things depending on how it was allocated: | |
611 | * - A remapped array of pages from iommu_dma_alloc(), for all | |
612 | * non-atomic allocations. | |
613 | * - A non-cacheable alias from the atomic pool, for atomic | |
614 | * allocations by non-coherent devices. | |
615 | * - A normal lowmem address, for atomic allocations by | |
616 | * coherent devices. | |
617 | * Hence how dodgy the below logic looks... | |
618 | */ | |
619 | if (__in_atomic_pool(cpu_addr, size)) { | |
620 | iommu_dma_unmap_page(dev, handle, size, 0, NULL); | |
621 | __free_from_pool(cpu_addr, size); | |
622 | } else if (is_vmalloc_addr(cpu_addr)){ | |
623 | struct vm_struct *area = find_vm_area(cpu_addr); | |
624 | ||
625 | if (WARN_ON(!area || !area->pages)) | |
626 | return; | |
627 | iommu_dma_free(dev, area->pages, size, &handle); | |
628 | dma_common_free_remap(cpu_addr, size, VM_USERMAP); | |
629 | } else { | |
630 | iommu_dma_unmap_page(dev, handle, size, 0, NULL); | |
631 | __free_pages(virt_to_page(cpu_addr), get_order(size)); | |
632 | } | |
633 | } | |
634 | ||
635 | static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, | |
636 | void *cpu_addr, dma_addr_t dma_addr, size_t size, | |
637 | struct dma_attrs *attrs) | |
638 | { | |
639 | struct vm_struct *area; | |
640 | int ret; | |
641 | ||
642 | vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, | |
643 | is_device_dma_coherent(dev)); | |
644 | ||
645 | if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) | |
646 | return ret; | |
647 | ||
648 | area = find_vm_area(cpu_addr); | |
649 | if (WARN_ON(!area || !area->pages)) | |
650 | return -ENXIO; | |
651 | ||
652 | return iommu_dma_mmap(area->pages, size, vma); | |
653 | } | |
654 | ||
655 | static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt, | |
656 | void *cpu_addr, dma_addr_t dma_addr, | |
657 | size_t size, struct dma_attrs *attrs) | |
658 | { | |
659 | unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; | |
660 | struct vm_struct *area = find_vm_area(cpu_addr); | |
661 | ||
662 | if (WARN_ON(!area || !area->pages)) | |
663 | return -ENXIO; | |
664 | ||
665 | return sg_alloc_table_from_pages(sgt, area->pages, count, 0, size, | |
666 | GFP_KERNEL); | |
667 | } | |
668 | ||
669 | static void __iommu_sync_single_for_cpu(struct device *dev, | |
670 | dma_addr_t dev_addr, size_t size, | |
671 | enum dma_data_direction dir) | |
672 | { | |
673 | phys_addr_t phys; | |
674 | ||
675 | if (is_device_dma_coherent(dev)) | |
676 | return; | |
677 | ||
678 | phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr); | |
679 | __dma_unmap_area(phys_to_virt(phys), size, dir); | |
680 | } | |
681 | ||
682 | static void __iommu_sync_single_for_device(struct device *dev, | |
683 | dma_addr_t dev_addr, size_t size, | |
684 | enum dma_data_direction dir) | |
685 | { | |
686 | phys_addr_t phys; | |
687 | ||
688 | if (is_device_dma_coherent(dev)) | |
689 | return; | |
690 | ||
691 | phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr); | |
692 | __dma_map_area(phys_to_virt(phys), size, dir); | |
693 | } | |
694 | ||
695 | static dma_addr_t __iommu_map_page(struct device *dev, struct page *page, | |
696 | unsigned long offset, size_t size, | |
697 | enum dma_data_direction dir, | |
698 | struct dma_attrs *attrs) | |
699 | { | |
700 | bool coherent = is_device_dma_coherent(dev); | |
701 | int prot = dma_direction_to_prot(dir, coherent); | |
702 | dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot); | |
703 | ||
704 | if (!iommu_dma_mapping_error(dev, dev_addr) && | |
705 | !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) | |
706 | __iommu_sync_single_for_device(dev, dev_addr, size, dir); | |
707 | ||
708 | return dev_addr; | |
709 | } | |
710 | ||
711 | static void __iommu_unmap_page(struct device *dev, dma_addr_t dev_addr, | |
712 | size_t size, enum dma_data_direction dir, | |
713 | struct dma_attrs *attrs) | |
714 | { | |
715 | if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) | |
716 | __iommu_sync_single_for_cpu(dev, dev_addr, size, dir); | |
717 | ||
718 | iommu_dma_unmap_page(dev, dev_addr, size, dir, attrs); | |
719 | } | |
720 | ||
721 | static void __iommu_sync_sg_for_cpu(struct device *dev, | |
722 | struct scatterlist *sgl, int nelems, | |
723 | enum dma_data_direction dir) | |
724 | { | |
725 | struct scatterlist *sg; | |
726 | int i; | |
727 | ||
728 | if (is_device_dma_coherent(dev)) | |
729 | return; | |
730 | ||
731 | for_each_sg(sgl, sg, nelems, i) | |
732 | __dma_unmap_area(sg_virt(sg), sg->length, dir); | |
733 | } | |
734 | ||
735 | static void __iommu_sync_sg_for_device(struct device *dev, | |
736 | struct scatterlist *sgl, int nelems, | |
737 | enum dma_data_direction dir) | |
738 | { | |
739 | struct scatterlist *sg; | |
740 | int i; | |
741 | ||
742 | if (is_device_dma_coherent(dev)) | |
743 | return; | |
744 | ||
745 | for_each_sg(sgl, sg, nelems, i) | |
746 | __dma_map_area(sg_virt(sg), sg->length, dir); | |
747 | } | |
748 | ||
749 | static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl, | |
750 | int nelems, enum dma_data_direction dir, | |
751 | struct dma_attrs *attrs) | |
752 | { | |
753 | bool coherent = is_device_dma_coherent(dev); | |
754 | ||
755 | if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) | |
756 | __iommu_sync_sg_for_device(dev, sgl, nelems, dir); | |
757 | ||
758 | return iommu_dma_map_sg(dev, sgl, nelems, | |
759 | dma_direction_to_prot(dir, coherent)); | |
760 | } | |
761 | ||
762 | static void __iommu_unmap_sg_attrs(struct device *dev, | |
763 | struct scatterlist *sgl, int nelems, | |
764 | enum dma_data_direction dir, | |
765 | struct dma_attrs *attrs) | |
766 | { | |
767 | if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) | |
768 | __iommu_sync_sg_for_cpu(dev, sgl, nelems, dir); | |
769 | ||
770 | iommu_dma_unmap_sg(dev, sgl, nelems, dir, attrs); | |
771 | } | |
772 | ||
773 | static struct dma_map_ops iommu_dma_ops = { | |
774 | .alloc = __iommu_alloc_attrs, | |
775 | .free = __iommu_free_attrs, | |
776 | .mmap = __iommu_mmap_attrs, | |
777 | .get_sgtable = __iommu_get_sgtable, | |
778 | .map_page = __iommu_map_page, | |
779 | .unmap_page = __iommu_unmap_page, | |
780 | .map_sg = __iommu_map_sg_attrs, | |
781 | .unmap_sg = __iommu_unmap_sg_attrs, | |
782 | .sync_single_for_cpu = __iommu_sync_single_for_cpu, | |
783 | .sync_single_for_device = __iommu_sync_single_for_device, | |
784 | .sync_sg_for_cpu = __iommu_sync_sg_for_cpu, | |
785 | .sync_sg_for_device = __iommu_sync_sg_for_device, | |
786 | .dma_supported = iommu_dma_supported, | |
787 | .mapping_error = iommu_dma_mapping_error, | |
788 | }; | |
789 | ||
790 | /* | |
791 | * TODO: Right now __iommu_setup_dma_ops() gets called too early to do | |
792 | * everything it needs to - the device is only partially created and the | |
793 | * IOMMU driver hasn't seen it yet, so it can't have a group. Thus we | |
794 | * need this delayed attachment dance. Once IOMMU probe ordering is sorted | |
795 | * to move the arch_setup_dma_ops() call later, all the notifier bits below | |
796 | * become unnecessary, and will go away. | |
797 | */ | |
798 | struct iommu_dma_notifier_data { | |
799 | struct list_head list; | |
800 | struct device *dev; | |
801 | const struct iommu_ops *ops; | |
802 | u64 dma_base; | |
803 | u64 size; | |
804 | }; | |
805 | static LIST_HEAD(iommu_dma_masters); | |
806 | static DEFINE_MUTEX(iommu_dma_notifier_lock); | |
807 | ||
808 | /* | |
809 | * Temporarily "borrow" a domain feature flag to to tell if we had to resort | |
810 | * to creating our own domain here, in case we need to clean it up again. | |
811 | */ | |
812 | #define __IOMMU_DOMAIN_FAKE_DEFAULT (1U << 31) | |
813 | ||
814 | static bool do_iommu_attach(struct device *dev, const struct iommu_ops *ops, | |
815 | u64 dma_base, u64 size) | |
816 | { | |
817 | struct iommu_domain *domain = iommu_get_domain_for_dev(dev); | |
818 | ||
819 | /* | |
820 | * Best case: The device is either part of a group which was | |
821 | * already attached to a domain in a previous call, or it's | |
822 | * been put in a default DMA domain by the IOMMU core. | |
823 | */ | |
824 | if (!domain) { | |
825 | /* | |
826 | * Urgh. The IOMMU core isn't going to do default domains | |
827 | * for non-PCI devices anyway, until it has some means of | |
828 | * abstracting the entirely implementation-specific | |
829 | * sideband data/SoC topology/unicorn dust that may or | |
830 | * may not differentiate upstream masters. | |
831 | * So until then, HORRIBLE HACKS! | |
832 | */ | |
833 | domain = ops->domain_alloc(IOMMU_DOMAIN_DMA); | |
834 | if (!domain) | |
835 | goto out_no_domain; | |
836 | ||
837 | domain->ops = ops; | |
838 | domain->type = IOMMU_DOMAIN_DMA | __IOMMU_DOMAIN_FAKE_DEFAULT; | |
839 | ||
840 | if (iommu_attach_device(domain, dev)) | |
841 | goto out_put_domain; | |
842 | } | |
843 | ||
844 | if (iommu_dma_init_domain(domain, dma_base, size)) | |
845 | goto out_detach; | |
846 | ||
847 | dev->archdata.dma_ops = &iommu_dma_ops; | |
848 | return true; | |
849 | ||
850 | out_detach: | |
851 | iommu_detach_device(domain, dev); | |
852 | out_put_domain: | |
853 | if (domain->type & __IOMMU_DOMAIN_FAKE_DEFAULT) | |
854 | iommu_domain_free(domain); | |
855 | out_no_domain: | |
856 | pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", | |
857 | dev_name(dev)); | |
858 | return false; | |
859 | } | |
860 | ||
861 | static void queue_iommu_attach(struct device *dev, const struct iommu_ops *ops, | |
862 | u64 dma_base, u64 size) | |
863 | { | |
864 | struct iommu_dma_notifier_data *iommudata; | |
865 | ||
866 | iommudata = kzalloc(sizeof(*iommudata), GFP_KERNEL); | |
867 | if (!iommudata) | |
868 | return; | |
869 | ||
870 | iommudata->dev = dev; | |
871 | iommudata->ops = ops; | |
872 | iommudata->dma_base = dma_base; | |
873 | iommudata->size = size; | |
874 | ||
875 | mutex_lock(&iommu_dma_notifier_lock); | |
876 | list_add(&iommudata->list, &iommu_dma_masters); | |
877 | mutex_unlock(&iommu_dma_notifier_lock); | |
878 | } | |
879 | ||
880 | static int __iommu_attach_notifier(struct notifier_block *nb, | |
881 | unsigned long action, void *data) | |
882 | { | |
883 | struct iommu_dma_notifier_data *master, *tmp; | |
884 | ||
885 | if (action != BUS_NOTIFY_ADD_DEVICE) | |
886 | return 0; | |
887 | ||
888 | mutex_lock(&iommu_dma_notifier_lock); | |
889 | list_for_each_entry_safe(master, tmp, &iommu_dma_masters, list) { | |
890 | if (do_iommu_attach(master->dev, master->ops, | |
891 | master->dma_base, master->size)) { | |
892 | list_del(&master->list); | |
893 | kfree(master); | |
894 | } | |
895 | } | |
896 | mutex_unlock(&iommu_dma_notifier_lock); | |
897 | return 0; | |
898 | } | |
899 | ||
900 | static int register_iommu_dma_ops_notifier(struct bus_type *bus) | |
901 | { | |
902 | struct notifier_block *nb = kzalloc(sizeof(*nb), GFP_KERNEL); | |
903 | int ret; | |
904 | ||
905 | if (!nb) | |
906 | return -ENOMEM; | |
907 | /* | |
908 | * The device must be attached to a domain before the driver probe | |
909 | * routine gets a chance to start allocating DMA buffers. However, | |
910 | * the IOMMU driver also needs a chance to configure the iommu_group | |
911 | * via its add_device callback first, so we need to make the attach | |
912 | * happen between those two points. Since the IOMMU core uses a bus | |
913 | * notifier with default priority for add_device, do the same but | |
914 | * with a lower priority to ensure the appropriate ordering. | |
915 | */ | |
916 | nb->notifier_call = __iommu_attach_notifier; | |
917 | nb->priority = -100; | |
918 | ||
919 | ret = bus_register_notifier(bus, nb); | |
920 | if (ret) { | |
921 | pr_warn("Failed to register DMA domain notifier; IOMMU DMA ops unavailable on bus '%s'\n", | |
922 | bus->name); | |
923 | kfree(nb); | |
924 | } | |
925 | return ret; | |
926 | } | |
927 | ||
928 | static int __init __iommu_dma_init(void) | |
929 | { | |
930 | int ret; | |
931 | ||
932 | ret = iommu_dma_init(); | |
933 | if (!ret) | |
934 | ret = register_iommu_dma_ops_notifier(&platform_bus_type); | |
935 | if (!ret) | |
936 | ret = register_iommu_dma_ops_notifier(&amba_bustype); | |
937 | return ret; | |
938 | } | |
939 | arch_initcall(__iommu_dma_init); | |
940 | ||
941 | static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, | |
942 | const struct iommu_ops *ops) | |
943 | { | |
944 | struct iommu_group *group; | |
945 | ||
946 | if (!ops) | |
947 | return; | |
948 | /* | |
949 | * TODO: As a concession to the future, we're ready to handle being | |
950 | * called both early and late (i.e. after bus_add_device). Once all | |
951 | * the platform bus code is reworked to call us late and the notifier | |
952 | * junk above goes away, move the body of do_iommu_attach here. | |
953 | */ | |
954 | group = iommu_group_get(dev); | |
955 | if (group) { | |
956 | do_iommu_attach(dev, ops, dma_base, size); | |
957 | iommu_group_put(group); | |
958 | } else { | |
959 | queue_iommu_attach(dev, ops, dma_base, size); | |
960 | } | |
961 | } | |
962 | ||
876945db RM |
963 | void arch_teardown_dma_ops(struct device *dev) |
964 | { | |
965 | struct iommu_domain *domain = iommu_get_domain_for_dev(dev); | |
966 | ||
967 | if (domain) { | |
968 | iommu_detach_device(domain, dev); | |
969 | if (domain->type & __IOMMU_DOMAIN_FAKE_DEFAULT) | |
970 | iommu_domain_free(domain); | |
971 | } | |
972 | ||
973 | dev->archdata.dma_ops = NULL; | |
974 | } | |
975 | ||
13b8629f RM |
976 | #else |
977 | ||
978 | static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, | |
979 | struct iommu_ops *iommu) | |
980 | { } | |
981 | ||
982 | #endif /* CONFIG_IOMMU_DMA */ | |
983 | ||
876945db RM |
984 | void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, |
985 | struct iommu_ops *iommu, bool coherent) | |
986 | { | |
987 | if (!acpi_disabled && !dev->archdata.dma_ops) | |
988 | dev->archdata.dma_ops = dma_ops; | |
989 | ||
990 | dev->archdata.dma_coherent = coherent; | |
991 | __iommu_setup_dma_ops(dev, dma_base, size, iommu); | |
992 | } |