Commit | Line | Data |
---|---|---|
09b55412 CM |
1 | /* |
2 | * SWIOTLB-based DMA API implementation | |
3 | * | |
4 | * Copyright (C) 2012 ARM Ltd. | |
5 | * Author: Catalin Marinas <catalin.marinas@arm.com> | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License version 2 as | |
9 | * published by the Free Software Foundation. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
18 | */ | |
19 | ||
20 | #include <linux/gfp.h> | |
1dccb598 | 21 | #include <linux/acpi.h> |
b67a8b29 | 22 | #include <linux/bootmem.h> |
09b55412 CM |
23 | #include <linux/export.h> |
24 | #include <linux/slab.h> | |
d4932f9e | 25 | #include <linux/genalloc.h> |
09b55412 | 26 | #include <linux/dma-mapping.h> |
6ac2104d | 27 | #include <linux/dma-contiguous.h> |
09b55412 CM |
28 | #include <linux/vmalloc.h> |
29 | #include <linux/swiotlb.h> | |
30 | ||
31 | #include <asm/cacheflush.h> | |
32 | ||
b67a8b29 JZ |
33 | static int swiotlb __read_mostly; |
34 | ||
00085f1e | 35 | static pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot, |
214fdbe7 LA |
36 | bool coherent) |
37 | { | |
00085f1e | 38 | if (!coherent || (attrs & DMA_ATTR_WRITE_COMBINE)) |
214fdbe7 | 39 | return pgprot_writecombine(prot); |
214fdbe7 LA |
40 | return prot; |
41 | } | |
42 | ||
d4932f9e LA |
43 | static struct gen_pool *atomic_pool; |
44 | ||
45 | #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K | |
a7c61a34 | 46 | static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE; |
d4932f9e LA |
47 | |
48 | static int __init early_coherent_pool(char *p) | |
49 | { | |
50 | atomic_pool_size = memparse(p, &p); | |
51 | return 0; | |
52 | } | |
53 | early_param("coherent_pool", early_coherent_pool); | |
54 | ||
7132813c | 55 | static void *__alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags) |
d4932f9e LA |
56 | { |
57 | unsigned long val; | |
58 | void *ptr = NULL; | |
59 | ||
60 | if (!atomic_pool) { | |
61 | WARN(1, "coherent pool not initialised!\n"); | |
62 | return NULL; | |
63 | } | |
64 | ||
65 | val = gen_pool_alloc(atomic_pool, size); | |
66 | if (val) { | |
67 | phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val); | |
68 | ||
69 | *ret_page = phys_to_page(phys); | |
70 | ptr = (void *)val; | |
6829e274 | 71 | memset(ptr, 0, size); |
d4932f9e LA |
72 | } |
73 | ||
74 | return ptr; | |
75 | } | |
76 | ||
77 | static bool __in_atomic_pool(void *start, size_t size) | |
78 | { | |
79 | return addr_in_gen_pool(atomic_pool, (unsigned long)start, size); | |
80 | } | |
81 | ||
82 | static int __free_from_pool(void *start, size_t size) | |
83 | { | |
84 | if (!__in_atomic_pool(start, size)) | |
85 | return 0; | |
86 | ||
87 | gen_pool_free(atomic_pool, (unsigned long)start, size); | |
88 | ||
89 | return 1; | |
90 | } | |
91 | ||
bb10eb7b RH |
92 | static void *__dma_alloc_coherent(struct device *dev, size_t size, |
93 | dma_addr_t *dma_handle, gfp_t flags, | |
00085f1e | 94 | unsigned long attrs) |
09b55412 | 95 | { |
c666e8d5 LA |
96 | if (dev == NULL) { |
97 | WARN_ONCE(1, "Use an actual device structure for DMA allocation\n"); | |
98 | return NULL; | |
99 | } | |
100 | ||
19e7640d | 101 | if (IS_ENABLED(CONFIG_ZONE_DMA) && |
09b55412 | 102 | dev->coherent_dma_mask <= DMA_BIT_MASK(32)) |
19e7640d | 103 | flags |= GFP_DMA; |
d0164adc | 104 | if (dev_get_cma_area(dev) && gfpflags_allow_blocking(flags)) { |
6ac2104d | 105 | struct page *page; |
7132813c | 106 | void *addr; |
6ac2104d LA |
107 | |
108 | page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, | |
109 | get_order(size)); | |
110 | if (!page) | |
111 | return NULL; | |
112 | ||
113 | *dma_handle = phys_to_dma(dev, page_to_phys(page)); | |
7132813c | 114 | addr = page_address(page); |
6829e274 | 115 | memset(addr, 0, size); |
7132813c | 116 | return addr; |
6ac2104d LA |
117 | } else { |
118 | return swiotlb_alloc_coherent(dev, size, dma_handle, flags); | |
119 | } | |
09b55412 CM |
120 | } |
121 | ||
bb10eb7b RH |
122 | static void __dma_free_coherent(struct device *dev, size_t size, |
123 | void *vaddr, dma_addr_t dma_handle, | |
00085f1e | 124 | unsigned long attrs) |
09b55412 | 125 | { |
d4932f9e LA |
126 | bool freed; |
127 | phys_addr_t paddr = dma_to_phys(dev, dma_handle); | |
128 | ||
c666e8d5 LA |
129 | if (dev == NULL) { |
130 | WARN_ONCE(1, "Use an actual device structure for DMA allocation\n"); | |
131 | return; | |
132 | } | |
133 | ||
d4932f9e | 134 | freed = dma_release_from_contiguous(dev, |
6ac2104d LA |
135 | phys_to_page(paddr), |
136 | size >> PAGE_SHIFT); | |
d4932f9e | 137 | if (!freed) |
6ac2104d | 138 | swiotlb_free_coherent(dev, size, vaddr, dma_handle); |
09b55412 CM |
139 | } |
140 | ||
9d3bfbb4 CM |
141 | static void *__dma_alloc(struct device *dev, size_t size, |
142 | dma_addr_t *dma_handle, gfp_t flags, | |
00085f1e | 143 | unsigned long attrs) |
7363590d | 144 | { |
d4932f9e | 145 | struct page *page; |
7363590d | 146 | void *ptr, *coherent_ptr; |
9d3bfbb4 | 147 | bool coherent = is_device_dma_coherent(dev); |
97942c28 | 148 | pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, false); |
7363590d CM |
149 | |
150 | size = PAGE_ALIGN(size); | |
d4932f9e | 151 | |
d0164adc | 152 | if (!coherent && !gfpflags_allow_blocking(flags)) { |
d4932f9e | 153 | struct page *page = NULL; |
7132813c | 154 | void *addr = __alloc_from_pool(size, &page, flags); |
d4932f9e LA |
155 | |
156 | if (addr) | |
157 | *dma_handle = phys_to_dma(dev, page_to_phys(page)); | |
158 | ||
159 | return addr; | |
d4932f9e | 160 | } |
7363590d CM |
161 | |
162 | ptr = __dma_alloc_coherent(dev, size, dma_handle, flags, attrs); | |
163 | if (!ptr) | |
164 | goto no_mem; | |
7363590d | 165 | |
9d3bfbb4 CM |
166 | /* no need for non-cacheable mapping if coherent */ |
167 | if (coherent) | |
168 | return ptr; | |
169 | ||
7363590d CM |
170 | /* remove any dirty cache lines on the kernel alias */ |
171 | __dma_flush_range(ptr, ptr + size); | |
172 | ||
173 | /* create a coherent mapping */ | |
174 | page = virt_to_page(ptr); | |
d4932f9e | 175 | coherent_ptr = dma_common_contiguous_remap(page, size, VM_USERMAP, |
97942c28 | 176 | prot, NULL); |
7363590d CM |
177 | if (!coherent_ptr) |
178 | goto no_map; | |
179 | ||
180 | return coherent_ptr; | |
181 | ||
182 | no_map: | |
183 | __dma_free_coherent(dev, size, ptr, *dma_handle, attrs); | |
184 | no_mem: | |
a52ce121 | 185 | *dma_handle = DMA_ERROR_CODE; |
7363590d CM |
186 | return NULL; |
187 | } | |
188 | ||
9d3bfbb4 CM |
189 | static void __dma_free(struct device *dev, size_t size, |
190 | void *vaddr, dma_addr_t dma_handle, | |
00085f1e | 191 | unsigned long attrs) |
7363590d CM |
192 | { |
193 | void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle)); | |
194 | ||
2cff98b9 DN |
195 | size = PAGE_ALIGN(size); |
196 | ||
9d3bfbb4 CM |
197 | if (!is_device_dma_coherent(dev)) { |
198 | if (__free_from_pool(vaddr, size)) | |
199 | return; | |
200 | vunmap(vaddr); | |
201 | } | |
7363590d CM |
202 | __dma_free_coherent(dev, size, swiotlb_addr, dma_handle, attrs); |
203 | } | |
204 | ||
205 | static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page, | |
206 | unsigned long offset, size_t size, | |
207 | enum dma_data_direction dir, | |
00085f1e | 208 | unsigned long attrs) |
7363590d CM |
209 | { |
210 | dma_addr_t dev_addr; | |
211 | ||
212 | dev_addr = swiotlb_map_page(dev, page, offset, size, dir, attrs); | |
9d3bfbb4 CM |
213 | if (!is_device_dma_coherent(dev)) |
214 | __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); | |
7363590d CM |
215 | |
216 | return dev_addr; | |
217 | } | |
218 | ||
219 | ||
220 | static void __swiotlb_unmap_page(struct device *dev, dma_addr_t dev_addr, | |
221 | size_t size, enum dma_data_direction dir, | |
00085f1e | 222 | unsigned long attrs) |
7363590d | 223 | { |
9d3bfbb4 CM |
224 | if (!is_device_dma_coherent(dev)) |
225 | __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); | |
7363590d CM |
226 | swiotlb_unmap_page(dev, dev_addr, size, dir, attrs); |
227 | } | |
228 | ||
229 | static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl, | |
230 | int nelems, enum dma_data_direction dir, | |
00085f1e | 231 | unsigned long attrs) |
7363590d CM |
232 | { |
233 | struct scatterlist *sg; | |
234 | int i, ret; | |
235 | ||
236 | ret = swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs); | |
9d3bfbb4 CM |
237 | if (!is_device_dma_coherent(dev)) |
238 | for_each_sg(sgl, sg, ret, i) | |
239 | __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), | |
240 | sg->length, dir); | |
7363590d CM |
241 | |
242 | return ret; | |
243 | } | |
244 | ||
245 | static void __swiotlb_unmap_sg_attrs(struct device *dev, | |
246 | struct scatterlist *sgl, int nelems, | |
247 | enum dma_data_direction dir, | |
00085f1e | 248 | unsigned long attrs) |
7363590d CM |
249 | { |
250 | struct scatterlist *sg; | |
251 | int i; | |
252 | ||
9d3bfbb4 CM |
253 | if (!is_device_dma_coherent(dev)) |
254 | for_each_sg(sgl, sg, nelems, i) | |
255 | __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), | |
256 | sg->length, dir); | |
7363590d CM |
257 | swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs); |
258 | } | |
259 | ||
260 | static void __swiotlb_sync_single_for_cpu(struct device *dev, | |
261 | dma_addr_t dev_addr, size_t size, | |
262 | enum dma_data_direction dir) | |
263 | { | |
9d3bfbb4 CM |
264 | if (!is_device_dma_coherent(dev)) |
265 | __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); | |
7363590d CM |
266 | swiotlb_sync_single_for_cpu(dev, dev_addr, size, dir); |
267 | } | |
268 | ||
269 | static void __swiotlb_sync_single_for_device(struct device *dev, | |
270 | dma_addr_t dev_addr, size_t size, | |
271 | enum dma_data_direction dir) | |
272 | { | |
273 | swiotlb_sync_single_for_device(dev, dev_addr, size, dir); | |
9d3bfbb4 CM |
274 | if (!is_device_dma_coherent(dev)) |
275 | __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); | |
7363590d CM |
276 | } |
277 | ||
278 | static void __swiotlb_sync_sg_for_cpu(struct device *dev, | |
279 | struct scatterlist *sgl, int nelems, | |
280 | enum dma_data_direction dir) | |
281 | { | |
282 | struct scatterlist *sg; | |
283 | int i; | |
284 | ||
9d3bfbb4 CM |
285 | if (!is_device_dma_coherent(dev)) |
286 | for_each_sg(sgl, sg, nelems, i) | |
287 | __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), | |
288 | sg->length, dir); | |
7363590d CM |
289 | swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir); |
290 | } | |
291 | ||
292 | static void __swiotlb_sync_sg_for_device(struct device *dev, | |
293 | struct scatterlist *sgl, int nelems, | |
294 | enum dma_data_direction dir) | |
295 | { | |
296 | struct scatterlist *sg; | |
297 | int i; | |
298 | ||
299 | swiotlb_sync_sg_for_device(dev, sgl, nelems, dir); | |
9d3bfbb4 CM |
300 | if (!is_device_dma_coherent(dev)) |
301 | for_each_sg(sgl, sg, nelems, i) | |
302 | __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), | |
303 | sg->length, dir); | |
7363590d CM |
304 | } |
305 | ||
aaf6f2f0 RM |
306 | static int __swiotlb_mmap(struct device *dev, |
307 | struct vm_area_struct *vma, | |
308 | void *cpu_addr, dma_addr_t dma_addr, size_t size, | |
00085f1e | 309 | unsigned long attrs) |
6e8d7968 LA |
310 | { |
311 | int ret = -ENXIO; | |
312 | unsigned long nr_vma_pages = (vma->vm_end - vma->vm_start) >> | |
313 | PAGE_SHIFT; | |
314 | unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; | |
315 | unsigned long pfn = dma_to_phys(dev, dma_addr) >> PAGE_SHIFT; | |
316 | unsigned long off = vma->vm_pgoff; | |
317 | ||
aaf6f2f0 RM |
318 | vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, |
319 | is_device_dma_coherent(dev)); | |
320 | ||
6e8d7968 LA |
321 | if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) |
322 | return ret; | |
323 | ||
324 | if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) { | |
325 | ret = remap_pfn_range(vma, vma->vm_start, | |
326 | pfn + off, | |
327 | vma->vm_end - vma->vm_start, | |
328 | vma->vm_page_prot); | |
329 | } | |
330 | ||
331 | return ret; | |
332 | } | |
333 | ||
1d1ddf67 RM |
334 | static int __swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt, |
335 | void *cpu_addr, dma_addr_t handle, size_t size, | |
00085f1e | 336 | unsigned long attrs) |
1d1ddf67 RM |
337 | { |
338 | int ret = sg_alloc_table(sgt, 1, GFP_KERNEL); | |
339 | ||
340 | if (!ret) | |
341 | sg_set_page(sgt->sgl, phys_to_page(dma_to_phys(dev, handle)), | |
342 | PAGE_ALIGN(size), 0); | |
343 | ||
344 | return ret; | |
345 | } | |
346 | ||
b67a8b29 JZ |
347 | static int __swiotlb_dma_supported(struct device *hwdev, u64 mask) |
348 | { | |
349 | if (swiotlb) | |
350 | return swiotlb_dma_supported(hwdev, mask); | |
351 | return 1; | |
352 | } | |
353 | ||
9d3bfbb4 CM |
354 | static struct dma_map_ops swiotlb_dma_ops = { |
355 | .alloc = __dma_alloc, | |
356 | .free = __dma_free, | |
357 | .mmap = __swiotlb_mmap, | |
1d1ddf67 | 358 | .get_sgtable = __swiotlb_get_sgtable, |
7363590d CM |
359 | .map_page = __swiotlb_map_page, |
360 | .unmap_page = __swiotlb_unmap_page, | |
361 | .map_sg = __swiotlb_map_sg_attrs, | |
362 | .unmap_sg = __swiotlb_unmap_sg_attrs, | |
363 | .sync_single_for_cpu = __swiotlb_sync_single_for_cpu, | |
364 | .sync_single_for_device = __swiotlb_sync_single_for_device, | |
365 | .sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu, | |
366 | .sync_sg_for_device = __swiotlb_sync_sg_for_device, | |
b67a8b29 | 367 | .dma_supported = __swiotlb_dma_supported, |
7363590d CM |
368 | .mapping_error = swiotlb_dma_mapping_error, |
369 | }; | |
09b55412 | 370 | |
d4932f9e LA |
371 | static int __init atomic_pool_init(void) |
372 | { | |
373 | pgprot_t prot = __pgprot(PROT_NORMAL_NC); | |
374 | unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT; | |
375 | struct page *page; | |
376 | void *addr; | |
377 | unsigned int pool_size_order = get_order(atomic_pool_size); | |
378 | ||
379 | if (dev_get_cma_area(NULL)) | |
380 | page = dma_alloc_from_contiguous(NULL, nr_pages, | |
381 | pool_size_order); | |
382 | else | |
383 | page = alloc_pages(GFP_DMA, pool_size_order); | |
384 | ||
385 | if (page) { | |
386 | int ret; | |
387 | void *page_addr = page_address(page); | |
388 | ||
389 | memset(page_addr, 0, atomic_pool_size); | |
390 | __dma_flush_range(page_addr, page_addr + atomic_pool_size); | |
391 | ||
392 | atomic_pool = gen_pool_create(PAGE_SHIFT, -1); | |
393 | if (!atomic_pool) | |
394 | goto free_page; | |
395 | ||
396 | addr = dma_common_contiguous_remap(page, atomic_pool_size, | |
397 | VM_USERMAP, prot, atomic_pool_init); | |
398 | ||
399 | if (!addr) | |
400 | goto destroy_genpool; | |
401 | ||
402 | ret = gen_pool_add_virt(atomic_pool, (unsigned long)addr, | |
403 | page_to_phys(page), | |
404 | atomic_pool_size, -1); | |
405 | if (ret) | |
406 | goto remove_mapping; | |
407 | ||
408 | gen_pool_set_algo(atomic_pool, | |
409 | gen_pool_first_fit_order_align, | |
410 | (void *)PAGE_SHIFT); | |
411 | ||
412 | pr_info("DMA: preallocated %zu KiB pool for atomic allocations\n", | |
413 | atomic_pool_size / 1024); | |
414 | return 0; | |
415 | } | |
416 | goto out; | |
417 | ||
418 | remove_mapping: | |
419 | dma_common_free_remap(addr, atomic_pool_size, VM_USERMAP); | |
420 | destroy_genpool: | |
421 | gen_pool_destroy(atomic_pool); | |
422 | atomic_pool = NULL; | |
423 | free_page: | |
424 | if (!dma_release_from_contiguous(NULL, page, nr_pages)) | |
425 | __free_pages(page, pool_size_order); | |
426 | out: | |
427 | pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n", | |
428 | atomic_pool_size / 1024); | |
429 | return -ENOMEM; | |
430 | } | |
431 | ||
b6197b93 SS |
432 | /******************************************** |
433 | * The following APIs are for dummy DMA ops * | |
434 | ********************************************/ | |
435 | ||
436 | static void *__dummy_alloc(struct device *dev, size_t size, | |
437 | dma_addr_t *dma_handle, gfp_t flags, | |
00085f1e | 438 | unsigned long attrs) |
b6197b93 SS |
439 | { |
440 | return NULL; | |
441 | } | |
442 | ||
443 | static void __dummy_free(struct device *dev, size_t size, | |
444 | void *vaddr, dma_addr_t dma_handle, | |
00085f1e | 445 | unsigned long attrs) |
b6197b93 SS |
446 | { |
447 | } | |
448 | ||
449 | static int __dummy_mmap(struct device *dev, | |
450 | struct vm_area_struct *vma, | |
451 | void *cpu_addr, dma_addr_t dma_addr, size_t size, | |
00085f1e | 452 | unsigned long attrs) |
b6197b93 SS |
453 | { |
454 | return -ENXIO; | |
455 | } | |
456 | ||
457 | static dma_addr_t __dummy_map_page(struct device *dev, struct page *page, | |
458 | unsigned long offset, size_t size, | |
459 | enum dma_data_direction dir, | |
00085f1e | 460 | unsigned long attrs) |
b6197b93 SS |
461 | { |
462 | return DMA_ERROR_CODE; | |
463 | } | |
464 | ||
465 | static void __dummy_unmap_page(struct device *dev, dma_addr_t dev_addr, | |
466 | size_t size, enum dma_data_direction dir, | |
00085f1e | 467 | unsigned long attrs) |
b6197b93 SS |
468 | { |
469 | } | |
470 | ||
471 | static int __dummy_map_sg(struct device *dev, struct scatterlist *sgl, | |
472 | int nelems, enum dma_data_direction dir, | |
00085f1e | 473 | unsigned long attrs) |
b6197b93 SS |
474 | { |
475 | return 0; | |
476 | } | |
477 | ||
478 | static void __dummy_unmap_sg(struct device *dev, | |
479 | struct scatterlist *sgl, int nelems, | |
480 | enum dma_data_direction dir, | |
00085f1e | 481 | unsigned long attrs) |
b6197b93 SS |
482 | { |
483 | } | |
484 | ||
485 | static void __dummy_sync_single(struct device *dev, | |
486 | dma_addr_t dev_addr, size_t size, | |
487 | enum dma_data_direction dir) | |
488 | { | |
489 | } | |
490 | ||
491 | static void __dummy_sync_sg(struct device *dev, | |
492 | struct scatterlist *sgl, int nelems, | |
493 | enum dma_data_direction dir) | |
494 | { | |
495 | } | |
496 | ||
497 | static int __dummy_mapping_error(struct device *hwdev, dma_addr_t dma_addr) | |
498 | { | |
499 | return 1; | |
500 | } | |
501 | ||
502 | static int __dummy_dma_supported(struct device *hwdev, u64 mask) | |
503 | { | |
504 | return 0; | |
505 | } | |
506 | ||
507 | struct dma_map_ops dummy_dma_ops = { | |
508 | .alloc = __dummy_alloc, | |
509 | .free = __dummy_free, | |
510 | .mmap = __dummy_mmap, | |
511 | .map_page = __dummy_map_page, | |
512 | .unmap_page = __dummy_unmap_page, | |
513 | .map_sg = __dummy_map_sg, | |
514 | .unmap_sg = __dummy_unmap_sg, | |
515 | .sync_single_for_cpu = __dummy_sync_single, | |
516 | .sync_single_for_device = __dummy_sync_single, | |
517 | .sync_sg_for_cpu = __dummy_sync_sg, | |
518 | .sync_sg_for_device = __dummy_sync_sg, | |
519 | .mapping_error = __dummy_mapping_error, | |
520 | .dma_supported = __dummy_dma_supported, | |
521 | }; | |
522 | EXPORT_SYMBOL(dummy_dma_ops); | |
523 | ||
a1e50a82 | 524 | static int __init arm64_dma_init(void) |
09b55412 | 525 | { |
b67a8b29 JZ |
526 | if (swiotlb_force || max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) |
527 | swiotlb = 1; | |
528 | ||
1dccb598 | 529 | return atomic_pool_init(); |
d4932f9e LA |
530 | } |
531 | arch_initcall(arm64_dma_init); | |
09b55412 CM |
532 | |
533 | #define PREALLOC_DMA_DEBUG_ENTRIES 4096 | |
534 | ||
535 | static int __init dma_debug_do_init(void) | |
536 | { | |
537 | dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); | |
538 | return 0; | |
539 | } | |
540 | fs_initcall(dma_debug_do_init); | |
13b8629f RM |
541 | |
542 | ||
543 | #ifdef CONFIG_IOMMU_DMA | |
544 | #include <linux/dma-iommu.h> | |
545 | #include <linux/platform_device.h> | |
546 | #include <linux/amba/bus.h> | |
547 | ||
548 | /* Thankfully, all cache ops are by VA so we can ignore phys here */ | |
549 | static void flush_page(struct device *dev, const void *virt, phys_addr_t phys) | |
550 | { | |
551 | __dma_flush_range(virt, virt + PAGE_SIZE); | |
552 | } | |
553 | ||
554 | static void *__iommu_alloc_attrs(struct device *dev, size_t size, | |
555 | dma_addr_t *handle, gfp_t gfp, | |
00085f1e | 556 | unsigned long attrs) |
13b8629f RM |
557 | { |
558 | bool coherent = is_device_dma_coherent(dev); | |
559 | int ioprot = dma_direction_to_prot(DMA_BIDIRECTIONAL, coherent); | |
bd1c6ff7 | 560 | size_t iosize = size; |
13b8629f RM |
561 | void *addr; |
562 | ||
563 | if (WARN(!dev, "cannot create IOMMU mapping for unknown device\n")) | |
564 | return NULL; | |
bd1c6ff7 RM |
565 | |
566 | size = PAGE_ALIGN(size); | |
567 | ||
13b8629f RM |
568 | /* |
569 | * Some drivers rely on this, and we probably don't want the | |
570 | * possibility of stale kernel data being read by devices anyway. | |
571 | */ | |
572 | gfp |= __GFP_ZERO; | |
573 | ||
ce5c2d2c | 574 | if (gfpflags_allow_blocking(gfp)) { |
13b8629f RM |
575 | struct page **pages; |
576 | pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent); | |
577 | ||
3b6b7e19 RM |
578 | pages = iommu_dma_alloc(dev, iosize, gfp, attrs, ioprot, |
579 | handle, flush_page); | |
13b8629f RM |
580 | if (!pages) |
581 | return NULL; | |
582 | ||
583 | addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot, | |
584 | __builtin_return_address(0)); | |
585 | if (!addr) | |
bd1c6ff7 | 586 | iommu_dma_free(dev, pages, iosize, handle); |
13b8629f RM |
587 | } else { |
588 | struct page *page; | |
589 | /* | |
590 | * In atomic context we can't remap anything, so we'll only | |
591 | * get the virtually contiguous buffer we need by way of a | |
592 | * physically contiguous allocation. | |
593 | */ | |
594 | if (coherent) { | |
595 | page = alloc_pages(gfp, get_order(size)); | |
596 | addr = page ? page_address(page) : NULL; | |
597 | } else { | |
598 | addr = __alloc_from_pool(size, &page, gfp); | |
599 | } | |
600 | if (!addr) | |
601 | return NULL; | |
602 | ||
bd1c6ff7 | 603 | *handle = iommu_dma_map_page(dev, page, 0, iosize, ioprot); |
13b8629f RM |
604 | if (iommu_dma_mapping_error(dev, *handle)) { |
605 | if (coherent) | |
606 | __free_pages(page, get_order(size)); | |
607 | else | |
608 | __free_from_pool(addr, size); | |
609 | addr = NULL; | |
610 | } | |
611 | } | |
612 | return addr; | |
613 | } | |
614 | ||
615 | static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, | |
00085f1e | 616 | dma_addr_t handle, unsigned long attrs) |
13b8629f | 617 | { |
bd1c6ff7 RM |
618 | size_t iosize = size; |
619 | ||
620 | size = PAGE_ALIGN(size); | |
13b8629f RM |
621 | /* |
622 | * @cpu_addr will be one of 3 things depending on how it was allocated: | |
623 | * - A remapped array of pages from iommu_dma_alloc(), for all | |
624 | * non-atomic allocations. | |
625 | * - A non-cacheable alias from the atomic pool, for atomic | |
626 | * allocations by non-coherent devices. | |
627 | * - A normal lowmem address, for atomic allocations by | |
628 | * coherent devices. | |
629 | * Hence how dodgy the below logic looks... | |
630 | */ | |
631 | if (__in_atomic_pool(cpu_addr, size)) { | |
00085f1e | 632 | iommu_dma_unmap_page(dev, handle, iosize, 0, 0); |
13b8629f RM |
633 | __free_from_pool(cpu_addr, size); |
634 | } else if (is_vmalloc_addr(cpu_addr)){ | |
635 | struct vm_struct *area = find_vm_area(cpu_addr); | |
636 | ||
637 | if (WARN_ON(!area || !area->pages)) | |
638 | return; | |
bd1c6ff7 | 639 | iommu_dma_free(dev, area->pages, iosize, &handle); |
13b8629f RM |
640 | dma_common_free_remap(cpu_addr, size, VM_USERMAP); |
641 | } else { | |
00085f1e | 642 | iommu_dma_unmap_page(dev, handle, iosize, 0, 0); |
13b8629f RM |
643 | __free_pages(virt_to_page(cpu_addr), get_order(size)); |
644 | } | |
645 | } | |
646 | ||
647 | static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, | |
648 | void *cpu_addr, dma_addr_t dma_addr, size_t size, | |
00085f1e | 649 | unsigned long attrs) |
13b8629f RM |
650 | { |
651 | struct vm_struct *area; | |
652 | int ret; | |
653 | ||
654 | vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, | |
655 | is_device_dma_coherent(dev)); | |
656 | ||
657 | if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) | |
658 | return ret; | |
659 | ||
660 | area = find_vm_area(cpu_addr); | |
661 | if (WARN_ON(!area || !area->pages)) | |
662 | return -ENXIO; | |
663 | ||
664 | return iommu_dma_mmap(area->pages, size, vma); | |
665 | } | |
666 | ||
667 | static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt, | |
668 | void *cpu_addr, dma_addr_t dma_addr, | |
00085f1e | 669 | size_t size, unsigned long attrs) |
13b8629f RM |
670 | { |
671 | unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; | |
672 | struct vm_struct *area = find_vm_area(cpu_addr); | |
673 | ||
674 | if (WARN_ON(!area || !area->pages)) | |
675 | return -ENXIO; | |
676 | ||
677 | return sg_alloc_table_from_pages(sgt, area->pages, count, 0, size, | |
678 | GFP_KERNEL); | |
679 | } | |
680 | ||
681 | static void __iommu_sync_single_for_cpu(struct device *dev, | |
682 | dma_addr_t dev_addr, size_t size, | |
683 | enum dma_data_direction dir) | |
684 | { | |
685 | phys_addr_t phys; | |
686 | ||
687 | if (is_device_dma_coherent(dev)) | |
688 | return; | |
689 | ||
690 | phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr); | |
691 | __dma_unmap_area(phys_to_virt(phys), size, dir); | |
692 | } | |
693 | ||
694 | static void __iommu_sync_single_for_device(struct device *dev, | |
695 | dma_addr_t dev_addr, size_t size, | |
696 | enum dma_data_direction dir) | |
697 | { | |
698 | phys_addr_t phys; | |
699 | ||
700 | if (is_device_dma_coherent(dev)) | |
701 | return; | |
702 | ||
703 | phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr); | |
704 | __dma_map_area(phys_to_virt(phys), size, dir); | |
705 | } | |
706 | ||
707 | static dma_addr_t __iommu_map_page(struct device *dev, struct page *page, | |
708 | unsigned long offset, size_t size, | |
709 | enum dma_data_direction dir, | |
00085f1e | 710 | unsigned long attrs) |
13b8629f RM |
711 | { |
712 | bool coherent = is_device_dma_coherent(dev); | |
713 | int prot = dma_direction_to_prot(dir, coherent); | |
714 | dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot); | |
715 | ||
716 | if (!iommu_dma_mapping_error(dev, dev_addr) && | |
00085f1e | 717 | (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) |
13b8629f RM |
718 | __iommu_sync_single_for_device(dev, dev_addr, size, dir); |
719 | ||
720 | return dev_addr; | |
721 | } | |
722 | ||
723 | static void __iommu_unmap_page(struct device *dev, dma_addr_t dev_addr, | |
724 | size_t size, enum dma_data_direction dir, | |
00085f1e | 725 | unsigned long attrs) |
13b8629f | 726 | { |
00085f1e | 727 | if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) |
13b8629f RM |
728 | __iommu_sync_single_for_cpu(dev, dev_addr, size, dir); |
729 | ||
730 | iommu_dma_unmap_page(dev, dev_addr, size, dir, attrs); | |
731 | } | |
732 | ||
733 | static void __iommu_sync_sg_for_cpu(struct device *dev, | |
734 | struct scatterlist *sgl, int nelems, | |
735 | enum dma_data_direction dir) | |
736 | { | |
737 | struct scatterlist *sg; | |
738 | int i; | |
739 | ||
740 | if (is_device_dma_coherent(dev)) | |
741 | return; | |
742 | ||
743 | for_each_sg(sgl, sg, nelems, i) | |
744 | __dma_unmap_area(sg_virt(sg), sg->length, dir); | |
745 | } | |
746 | ||
747 | static void __iommu_sync_sg_for_device(struct device *dev, | |
748 | struct scatterlist *sgl, int nelems, | |
749 | enum dma_data_direction dir) | |
750 | { | |
751 | struct scatterlist *sg; | |
752 | int i; | |
753 | ||
754 | if (is_device_dma_coherent(dev)) | |
755 | return; | |
756 | ||
757 | for_each_sg(sgl, sg, nelems, i) | |
758 | __dma_map_area(sg_virt(sg), sg->length, dir); | |
759 | } | |
760 | ||
761 | static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl, | |
762 | int nelems, enum dma_data_direction dir, | |
00085f1e | 763 | unsigned long attrs) |
13b8629f RM |
764 | { |
765 | bool coherent = is_device_dma_coherent(dev); | |
766 | ||
00085f1e | 767 | if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) |
13b8629f RM |
768 | __iommu_sync_sg_for_device(dev, sgl, nelems, dir); |
769 | ||
770 | return iommu_dma_map_sg(dev, sgl, nelems, | |
771 | dma_direction_to_prot(dir, coherent)); | |
772 | } | |
773 | ||
774 | static void __iommu_unmap_sg_attrs(struct device *dev, | |
775 | struct scatterlist *sgl, int nelems, | |
776 | enum dma_data_direction dir, | |
00085f1e | 777 | unsigned long attrs) |
13b8629f | 778 | { |
00085f1e | 779 | if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) |
13b8629f RM |
780 | __iommu_sync_sg_for_cpu(dev, sgl, nelems, dir); |
781 | ||
782 | iommu_dma_unmap_sg(dev, sgl, nelems, dir, attrs); | |
783 | } | |
784 | ||
785 | static struct dma_map_ops iommu_dma_ops = { | |
786 | .alloc = __iommu_alloc_attrs, | |
787 | .free = __iommu_free_attrs, | |
788 | .mmap = __iommu_mmap_attrs, | |
789 | .get_sgtable = __iommu_get_sgtable, | |
790 | .map_page = __iommu_map_page, | |
791 | .unmap_page = __iommu_unmap_page, | |
792 | .map_sg = __iommu_map_sg_attrs, | |
793 | .unmap_sg = __iommu_unmap_sg_attrs, | |
794 | .sync_single_for_cpu = __iommu_sync_single_for_cpu, | |
795 | .sync_single_for_device = __iommu_sync_single_for_device, | |
796 | .sync_sg_for_cpu = __iommu_sync_sg_for_cpu, | |
797 | .sync_sg_for_device = __iommu_sync_sg_for_device, | |
798 | .dma_supported = iommu_dma_supported, | |
799 | .mapping_error = iommu_dma_mapping_error, | |
800 | }; | |
801 | ||
802 | /* | |
803 | * TODO: Right now __iommu_setup_dma_ops() gets called too early to do | |
804 | * everything it needs to - the device is only partially created and the | |
805 | * IOMMU driver hasn't seen it yet, so it can't have a group. Thus we | |
806 | * need this delayed attachment dance. Once IOMMU probe ordering is sorted | |
807 | * to move the arch_setup_dma_ops() call later, all the notifier bits below | |
808 | * become unnecessary, and will go away. | |
809 | */ | |
810 | struct iommu_dma_notifier_data { | |
811 | struct list_head list; | |
812 | struct device *dev; | |
813 | const struct iommu_ops *ops; | |
814 | u64 dma_base; | |
815 | u64 size; | |
816 | }; | |
817 | static LIST_HEAD(iommu_dma_masters); | |
818 | static DEFINE_MUTEX(iommu_dma_notifier_lock); | |
819 | ||
13b8629f RM |
820 | static bool do_iommu_attach(struct device *dev, const struct iommu_ops *ops, |
821 | u64 dma_base, u64 size) | |
822 | { | |
823 | struct iommu_domain *domain = iommu_get_domain_for_dev(dev); | |
824 | ||
825 | /* | |
921b1f52 RM |
826 | * If the IOMMU driver has the DMA domain support that we require, |
827 | * then the IOMMU core will have already configured a group for this | |
828 | * device, and allocated the default domain for that group. | |
13b8629f | 829 | */ |
921b1f52 RM |
830 | if (!domain || iommu_dma_init_domain(domain, dma_base, size)) { |
831 | pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", | |
832 | dev_name(dev)); | |
833 | return false; | |
13b8629f RM |
834 | } |
835 | ||
13b8629f RM |
836 | dev->archdata.dma_ops = &iommu_dma_ops; |
837 | return true; | |
13b8629f RM |
838 | } |
839 | ||
840 | static void queue_iommu_attach(struct device *dev, const struct iommu_ops *ops, | |
841 | u64 dma_base, u64 size) | |
842 | { | |
843 | struct iommu_dma_notifier_data *iommudata; | |
844 | ||
845 | iommudata = kzalloc(sizeof(*iommudata), GFP_KERNEL); | |
846 | if (!iommudata) | |
847 | return; | |
848 | ||
849 | iommudata->dev = dev; | |
850 | iommudata->ops = ops; | |
851 | iommudata->dma_base = dma_base; | |
852 | iommudata->size = size; | |
853 | ||
854 | mutex_lock(&iommu_dma_notifier_lock); | |
855 | list_add(&iommudata->list, &iommu_dma_masters); | |
856 | mutex_unlock(&iommu_dma_notifier_lock); | |
857 | } | |
858 | ||
859 | static int __iommu_attach_notifier(struct notifier_block *nb, | |
860 | unsigned long action, void *data) | |
861 | { | |
862 | struct iommu_dma_notifier_data *master, *tmp; | |
863 | ||
16c11325 | 864 | if (action != BUS_NOTIFY_BIND_DRIVER) |
13b8629f RM |
865 | return 0; |
866 | ||
867 | mutex_lock(&iommu_dma_notifier_lock); | |
868 | list_for_each_entry_safe(master, tmp, &iommu_dma_masters, list) { | |
16c11325 LP |
869 | if (data == master->dev && do_iommu_attach(master->dev, |
870 | master->ops, master->dma_base, master->size)) { | |
13b8629f RM |
871 | list_del(&master->list); |
872 | kfree(master); | |
16c11325 | 873 | break; |
13b8629f RM |
874 | } |
875 | } | |
876 | mutex_unlock(&iommu_dma_notifier_lock); | |
877 | return 0; | |
878 | } | |
879 | ||
a7c61a34 | 880 | static int __init register_iommu_dma_ops_notifier(struct bus_type *bus) |
13b8629f RM |
881 | { |
882 | struct notifier_block *nb = kzalloc(sizeof(*nb), GFP_KERNEL); | |
883 | int ret; | |
884 | ||
885 | if (!nb) | |
886 | return -ENOMEM; | |
16c11325 | 887 | |
13b8629f | 888 | nb->notifier_call = __iommu_attach_notifier; |
13b8629f RM |
889 | |
890 | ret = bus_register_notifier(bus, nb); | |
891 | if (ret) { | |
892 | pr_warn("Failed to register DMA domain notifier; IOMMU DMA ops unavailable on bus '%s'\n", | |
893 | bus->name); | |
894 | kfree(nb); | |
895 | } | |
896 | return ret; | |
897 | } | |
898 | ||
899 | static int __init __iommu_dma_init(void) | |
900 | { | |
901 | int ret; | |
902 | ||
903 | ret = iommu_dma_init(); | |
904 | if (!ret) | |
905 | ret = register_iommu_dma_ops_notifier(&platform_bus_type); | |
906 | if (!ret) | |
907 | ret = register_iommu_dma_ops_notifier(&amba_bustype); | |
226d89cb RM |
908 | #ifdef CONFIG_PCI |
909 | if (!ret) | |
910 | ret = register_iommu_dma_ops_notifier(&pci_bus_type); | |
911 | #endif | |
13b8629f RM |
912 | return ret; |
913 | } | |
914 | arch_initcall(__iommu_dma_init); | |
915 | ||
916 | static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, | |
917 | const struct iommu_ops *ops) | |
918 | { | |
919 | struct iommu_group *group; | |
920 | ||
921 | if (!ops) | |
922 | return; | |
923 | /* | |
924 | * TODO: As a concession to the future, we're ready to handle being | |
925 | * called both early and late (i.e. after bus_add_device). Once all | |
926 | * the platform bus code is reworked to call us late and the notifier | |
927 | * junk above goes away, move the body of do_iommu_attach here. | |
928 | */ | |
929 | group = iommu_group_get(dev); | |
930 | if (group) { | |
931 | do_iommu_attach(dev, ops, dma_base, size); | |
932 | iommu_group_put(group); | |
933 | } else { | |
934 | queue_iommu_attach(dev, ops, dma_base, size); | |
935 | } | |
936 | } | |
937 | ||
876945db RM |
938 | void arch_teardown_dma_ops(struct device *dev) |
939 | { | |
940 | struct iommu_domain *domain = iommu_get_domain_for_dev(dev); | |
941 | ||
921b1f52 | 942 | if (WARN_ON(domain)) |
876945db | 943 | iommu_detach_device(domain, dev); |
876945db RM |
944 | |
945 | dev->archdata.dma_ops = NULL; | |
946 | } | |
947 | ||
13b8629f RM |
948 | #else |
949 | ||
950 | static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, | |
53c92d79 | 951 | const struct iommu_ops *iommu) |
13b8629f RM |
952 | { } |
953 | ||
954 | #endif /* CONFIG_IOMMU_DMA */ | |
955 | ||
876945db | 956 | void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, |
53c92d79 | 957 | const struct iommu_ops *iommu, bool coherent) |
876945db | 958 | { |
1dccb598 AB |
959 | if (!dev->archdata.dma_ops) |
960 | dev->archdata.dma_ops = &swiotlb_dma_ops; | |
876945db RM |
961 | |
962 | dev->archdata.dma_coherent = coherent; | |
963 | __iommu_setup_dma_ops(dev, dma_base, size, iommu); | |
964 | } |