x86/amd-iommu: Use get_device_id and check_device where appropriate
[deliverable/linux.git] / arch / x86 / kernel / amd_iommu.c
CommitLineData
b6c02715 1/*
bf3118c1 2 * Copyright (C) 2007-2009 Advanced Micro Devices, Inc.
b6c02715
JR
3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 * Leo Duran <leo.duran@amd.com>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include <linux/pci.h>
21#include <linux/gfp.h>
22#include <linux/bitops.h>
7f26508b 23#include <linux/debugfs.h>
b6c02715 24#include <linux/scatterlist.h>
51491367 25#include <linux/dma-mapping.h>
b6c02715 26#include <linux/iommu-helper.h>
c156e347 27#include <linux/iommu.h>
b6c02715 28#include <asm/proto.h>
46a7fa27 29#include <asm/iommu.h>
1d9b16d1 30#include <asm/gart.h>
6a9401a7 31#include <asm/amd_iommu_proto.h>
b6c02715 32#include <asm/amd_iommu_types.h>
c6da992e 33#include <asm/amd_iommu.h>
b6c02715
JR
34
35#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
36
136f78a1
JR
37#define EXIT_LOOP_COUNT 10000000
38
b6c02715
JR
39static DEFINE_RWLOCK(amd_iommu_devtable_lock);
40
bd60b735
JR
41/* A list of preallocated protection domains */
42static LIST_HEAD(iommu_pd_list);
43static DEFINE_SPINLOCK(iommu_pd_list_lock);
44
0feae533
JR
45/*
46 * Domain for untranslated devices - only allocated
47 * if iommu=pt passed on kernel cmd line.
48 */
49static struct protection_domain *pt_domain;
50
26961efe 51static struct iommu_ops amd_iommu_ops;
26961efe 52
431b2a20
JR
53/*
54 * general struct to manage commands send to an IOMMU
55 */
d6449536 56struct iommu_cmd {
b6c02715
JR
57 u32 data[4];
58};
59
bd0e5211
JR
60static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
61 struct unity_map_entry *e);
8bc3e127 62static u64 *alloc_pte(struct protection_domain *domain,
abdc5eb3
JR
63 unsigned long address, int end_lvl,
64 u64 **pte_page, gfp_t gfp);
00cd122a
JR
65static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
66 unsigned long start_page,
67 unsigned int pages);
a345b23b 68static void reset_iommu_command_buffer(struct amd_iommu *iommu);
9355a081 69static u64 *fetch_pte(struct protection_domain *domain,
a6b256b4 70 unsigned long address, int map_size);
04bfdd84 71static void update_domain(struct protection_domain *domain);
c1eee67b 72
15898bbc
JR
73/****************************************************************************
74 *
75 * Helper functions
76 *
77 ****************************************************************************/
78
79static inline u16 get_device_id(struct device *dev)
80{
81 struct pci_dev *pdev = to_pci_dev(dev);
82
83 return calc_devid(pdev->bus->number, pdev->devfn);
84}
85
71c70984
JR
86/*
87 * In this function the list of preallocated protection domains is traversed to
88 * find the domain for a specific device
89 */
90static struct dma_ops_domain *find_protection_domain(u16 devid)
91{
92 struct dma_ops_domain *entry, *ret = NULL;
93 unsigned long flags;
94 u16 alias = amd_iommu_alias_table[devid];
95
96 if (list_empty(&iommu_pd_list))
97 return NULL;
98
99 spin_lock_irqsave(&iommu_pd_list_lock, flags);
100
101 list_for_each_entry(entry, &iommu_pd_list, list) {
102 if (entry->target_dev == devid ||
103 entry->target_dev == alias) {
104 ret = entry;
105 break;
106 }
107 }
108
109 spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
110
111 return ret;
112}
113
98fc5a69
JR
114/*
115 * This function checks if the driver got a valid device from the caller to
116 * avoid dereferencing invalid pointers.
117 */
118static bool check_device(struct device *dev)
119{
120 u16 devid;
121
122 if (!dev || !dev->dma_mask)
123 return false;
124
125 /* No device or no PCI device */
126 if (!dev || dev->bus != &pci_bus_type)
127 return false;
128
129 devid = get_device_id(dev);
130
131 /* Out of our scope? */
132 if (devid > amd_iommu_last_bdf)
133 return false;
134
135 if (amd_iommu_rlookup_table[devid] == NULL)
136 return false;
137
138 return true;
139}
140
7f26508b
JR
141#ifdef CONFIG_AMD_IOMMU_STATS
142
143/*
144 * Initialization code for statistics collection
145 */
146
da49f6df 147DECLARE_STATS_COUNTER(compl_wait);
0f2a86f2 148DECLARE_STATS_COUNTER(cnt_map_single);
146a6917 149DECLARE_STATS_COUNTER(cnt_unmap_single);
d03f067a 150DECLARE_STATS_COUNTER(cnt_map_sg);
55877a6b 151DECLARE_STATS_COUNTER(cnt_unmap_sg);
c8f0fb36 152DECLARE_STATS_COUNTER(cnt_alloc_coherent);
5d31ee7e 153DECLARE_STATS_COUNTER(cnt_free_coherent);
c1858976 154DECLARE_STATS_COUNTER(cross_page);
f57d98ae 155DECLARE_STATS_COUNTER(domain_flush_single);
18811f55 156DECLARE_STATS_COUNTER(domain_flush_all);
5774f7c5 157DECLARE_STATS_COUNTER(alloced_io_mem);
8ecaf8f1 158DECLARE_STATS_COUNTER(total_map_requests);
da49f6df 159
7f26508b
JR
160static struct dentry *stats_dir;
161static struct dentry *de_isolate;
162static struct dentry *de_fflush;
163
164static void amd_iommu_stats_add(struct __iommu_counter *cnt)
165{
166 if (stats_dir == NULL)
167 return;
168
169 cnt->dent = debugfs_create_u64(cnt->name, 0444, stats_dir,
170 &cnt->value);
171}
172
173static void amd_iommu_stats_init(void)
174{
175 stats_dir = debugfs_create_dir("amd-iommu", NULL);
176 if (stats_dir == NULL)
177 return;
178
179 de_isolate = debugfs_create_bool("isolation", 0444, stats_dir,
180 (u32 *)&amd_iommu_isolate);
181
182 de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir,
183 (u32 *)&amd_iommu_unmap_flush);
da49f6df
JR
184
185 amd_iommu_stats_add(&compl_wait);
0f2a86f2 186 amd_iommu_stats_add(&cnt_map_single);
146a6917 187 amd_iommu_stats_add(&cnt_unmap_single);
d03f067a 188 amd_iommu_stats_add(&cnt_map_sg);
55877a6b 189 amd_iommu_stats_add(&cnt_unmap_sg);
c8f0fb36 190 amd_iommu_stats_add(&cnt_alloc_coherent);
5d31ee7e 191 amd_iommu_stats_add(&cnt_free_coherent);
c1858976 192 amd_iommu_stats_add(&cross_page);
f57d98ae 193 amd_iommu_stats_add(&domain_flush_single);
18811f55 194 amd_iommu_stats_add(&domain_flush_all);
5774f7c5 195 amd_iommu_stats_add(&alloced_io_mem);
8ecaf8f1 196 amd_iommu_stats_add(&total_map_requests);
7f26508b
JR
197}
198
199#endif
200
a80dc3e0
JR
201/****************************************************************************
202 *
203 * Interrupt handling functions
204 *
205 ****************************************************************************/
206
e3e59876
JR
207static void dump_dte_entry(u16 devid)
208{
209 int i;
210
211 for (i = 0; i < 8; ++i)
212 pr_err("AMD-Vi: DTE[%d]: %08x\n", i,
213 amd_iommu_dev_table[devid].data[i]);
214}
215
945b4ac4
JR
216static void dump_command(unsigned long phys_addr)
217{
218 struct iommu_cmd *cmd = phys_to_virt(phys_addr);
219 int i;
220
221 for (i = 0; i < 4; ++i)
222 pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]);
223}
224
a345b23b 225static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
90008ee4
JR
226{
227 u32 *event = __evt;
228 int type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK;
229 int devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK;
230 int domid = (event[1] >> EVENT_DOMID_SHIFT) & EVENT_DOMID_MASK;
231 int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
232 u64 address = (u64)(((u64)event[3]) << 32) | event[2];
233
4c6f40d4 234 printk(KERN_ERR "AMD-Vi: Event logged [");
90008ee4
JR
235
236 switch (type) {
237 case EVENT_TYPE_ILL_DEV:
238 printk("ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x "
239 "address=0x%016llx flags=0x%04x]\n",
240 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
241 address, flags);
e3e59876 242 dump_dte_entry(devid);
90008ee4
JR
243 break;
244 case EVENT_TYPE_IO_FAULT:
245 printk("IO_PAGE_FAULT device=%02x:%02x.%x "
246 "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
247 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
248 domid, address, flags);
249 break;
250 case EVENT_TYPE_DEV_TAB_ERR:
251 printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
252 "address=0x%016llx flags=0x%04x]\n",
253 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
254 address, flags);
255 break;
256 case EVENT_TYPE_PAGE_TAB_ERR:
257 printk("PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
258 "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
259 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
260 domid, address, flags);
261 break;
262 case EVENT_TYPE_ILL_CMD:
263 printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
a345b23b 264 reset_iommu_command_buffer(iommu);
945b4ac4 265 dump_command(address);
90008ee4
JR
266 break;
267 case EVENT_TYPE_CMD_HARD_ERR:
268 printk("COMMAND_HARDWARE_ERROR address=0x%016llx "
269 "flags=0x%04x]\n", address, flags);
270 break;
271 case EVENT_TYPE_IOTLB_INV_TO:
272 printk("IOTLB_INV_TIMEOUT device=%02x:%02x.%x "
273 "address=0x%016llx]\n",
274 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
275 address);
276 break;
277 case EVENT_TYPE_INV_DEV_REQ:
278 printk("INVALID_DEVICE_REQUEST device=%02x:%02x.%x "
279 "address=0x%016llx flags=0x%04x]\n",
280 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
281 address, flags);
282 break;
283 default:
284 printk(KERN_ERR "UNKNOWN type=0x%02x]\n", type);
285 }
286}
287
288static void iommu_poll_events(struct amd_iommu *iommu)
289{
290 u32 head, tail;
291 unsigned long flags;
292
293 spin_lock_irqsave(&iommu->lock, flags);
294
295 head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
296 tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
297
298 while (head != tail) {
a345b23b 299 iommu_print_event(iommu, iommu->evt_buf + head);
90008ee4
JR
300 head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size;
301 }
302
303 writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
304
305 spin_unlock_irqrestore(&iommu->lock, flags);
306}
307
a80dc3e0
JR
308irqreturn_t amd_iommu_int_handler(int irq, void *data)
309{
90008ee4
JR
310 struct amd_iommu *iommu;
311
3bd22172 312 for_each_iommu(iommu)
90008ee4
JR
313 iommu_poll_events(iommu);
314
315 return IRQ_HANDLED;
a80dc3e0
JR
316}
317
431b2a20
JR
318/****************************************************************************
319 *
320 * IOMMU command queuing functions
321 *
322 ****************************************************************************/
323
324/*
325 * Writes the command to the IOMMUs command buffer and informs the
326 * hardware about the new command. Must be called with iommu->lock held.
327 */
d6449536 328static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
a19ae1ec
JR
329{
330 u32 tail, head;
331 u8 *target;
332
333 tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
8a7c5ef3 334 target = iommu->cmd_buf + tail;
a19ae1ec
JR
335 memcpy_toio(target, cmd, sizeof(*cmd));
336 tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
337 head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
338 if (tail == head)
339 return -ENOMEM;
340 writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
341
342 return 0;
343}
344
431b2a20
JR
345/*
346 * General queuing function for commands. Takes iommu->lock and calls
347 * __iommu_queue_command().
348 */
d6449536 349static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
a19ae1ec
JR
350{
351 unsigned long flags;
352 int ret;
353
354 spin_lock_irqsave(&iommu->lock, flags);
355 ret = __iommu_queue_command(iommu, cmd);
09ee17eb 356 if (!ret)
0cfd7aa9 357 iommu->need_sync = true;
a19ae1ec
JR
358 spin_unlock_irqrestore(&iommu->lock, flags);
359
360 return ret;
361}
362
8d201968
JR
363/*
364 * This function waits until an IOMMU has completed a completion
365 * wait command
366 */
367static void __iommu_wait_for_completion(struct amd_iommu *iommu)
368{
369 int ready = 0;
370 unsigned status = 0;
371 unsigned long i = 0;
372
da49f6df
JR
373 INC_STATS_COUNTER(compl_wait);
374
8d201968
JR
375 while (!ready && (i < EXIT_LOOP_COUNT)) {
376 ++i;
377 /* wait for the bit to become one */
378 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
379 ready = status & MMIO_STATUS_COM_WAIT_INT_MASK;
380 }
381
382 /* set bit back to zero */
383 status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
384 writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
385
6a1eddd2
JR
386 if (unlikely(i == EXIT_LOOP_COUNT)) {
387 spin_unlock(&iommu->lock);
388 reset_iommu_command_buffer(iommu);
389 spin_lock(&iommu->lock);
390 }
8d201968
JR
391}
392
393/*
394 * This function queues a completion wait command into the command
395 * buffer of an IOMMU
396 */
397static int __iommu_completion_wait(struct amd_iommu *iommu)
398{
399 struct iommu_cmd cmd;
400
401 memset(&cmd, 0, sizeof(cmd));
402 cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
403 CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
404
405 return __iommu_queue_command(iommu, &cmd);
406}
407
431b2a20
JR
408/*
409 * This function is called whenever we need to ensure that the IOMMU has
410 * completed execution of all commands we sent. It sends a
411 * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs
412 * us about that by writing a value to a physical address we pass with
413 * the command.
414 */
a19ae1ec
JR
415static int iommu_completion_wait(struct amd_iommu *iommu)
416{
8d201968
JR
417 int ret = 0;
418 unsigned long flags;
a19ae1ec 419
7e4f88da
JR
420 spin_lock_irqsave(&iommu->lock, flags);
421
09ee17eb
JR
422 if (!iommu->need_sync)
423 goto out;
424
8d201968 425 ret = __iommu_completion_wait(iommu);
09ee17eb 426
0cfd7aa9 427 iommu->need_sync = false;
a19ae1ec
JR
428
429 if (ret)
7e4f88da 430 goto out;
a19ae1ec 431
8d201968 432 __iommu_wait_for_completion(iommu);
84df8175 433
7e4f88da
JR
434out:
435 spin_unlock_irqrestore(&iommu->lock, flags);
a19ae1ec
JR
436
437 return 0;
438}
439
0518a3a4
JR
440static void iommu_flush_complete(struct protection_domain *domain)
441{
442 int i;
443
444 for (i = 0; i < amd_iommus_present; ++i) {
445 if (!domain->dev_iommu[i])
446 continue;
447
448 /*
449 * Devices of this domain are behind this IOMMU
450 * We need to wait for completion of all commands.
451 */
452 iommu_completion_wait(amd_iommus[i]);
453 }
454}
455
431b2a20
JR
456/*
457 * Command send function for invalidating a device table entry
458 */
a19ae1ec
JR
459static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
460{
d6449536 461 struct iommu_cmd cmd;
ee2fa743 462 int ret;
a19ae1ec
JR
463
464 BUG_ON(iommu == NULL);
465
466 memset(&cmd, 0, sizeof(cmd));
467 CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY);
468 cmd.data[0] = devid;
469
ee2fa743
JR
470 ret = iommu_queue_command(iommu, &cmd);
471
ee2fa743 472 return ret;
a19ae1ec
JR
473}
474
237b6f33
JR
475static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
476 u16 domid, int pde, int s)
477{
478 memset(cmd, 0, sizeof(*cmd));
479 address &= PAGE_MASK;
480 CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
481 cmd->data[1] |= domid;
482 cmd->data[2] = lower_32_bits(address);
483 cmd->data[3] = upper_32_bits(address);
484 if (s) /* size bit - we flush more than one 4kb page */
485 cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
486 if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
487 cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
488}
489
431b2a20
JR
490/*
491 * Generic command send function for invalidaing TLB entries
492 */
a19ae1ec
JR
493static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
494 u64 address, u16 domid, int pde, int s)
495{
d6449536 496 struct iommu_cmd cmd;
ee2fa743 497 int ret;
a19ae1ec 498
237b6f33 499 __iommu_build_inv_iommu_pages(&cmd, address, domid, pde, s);
a19ae1ec 500
ee2fa743
JR
501 ret = iommu_queue_command(iommu, &cmd);
502
ee2fa743 503 return ret;
a19ae1ec
JR
504}
505
431b2a20
JR
506/*
507 * TLB invalidation function which is called from the mapping functions.
508 * It invalidates a single PTE if the range to flush is within a single
509 * page. Otherwise it flushes the whole TLB of the IOMMU.
510 */
6de8ad9b
JR
511static void __iommu_flush_pages(struct protection_domain *domain,
512 u64 address, size_t size, int pde)
a19ae1ec 513{
6de8ad9b 514 int s = 0, i;
dcd1e92e 515 unsigned long pages = iommu_num_pages(address, size, PAGE_SIZE);
a19ae1ec
JR
516
517 address &= PAGE_MASK;
518
999ba417
JR
519 if (pages > 1) {
520 /*
521 * If we have to flush more than one page, flush all
522 * TLB entries for this domain
523 */
524 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
525 s = 1;
a19ae1ec
JR
526 }
527
999ba417 528
6de8ad9b
JR
529 for (i = 0; i < amd_iommus_present; ++i) {
530 if (!domain->dev_iommu[i])
531 continue;
532
533 /*
534 * Devices of this domain are behind this IOMMU
535 * We need a TLB flush
536 */
537 iommu_queue_inv_iommu_pages(amd_iommus[i], address,
538 domain->id, pde, s);
539 }
540
541 return;
542}
543
544static void iommu_flush_pages(struct protection_domain *domain,
545 u64 address, size_t size)
546{
547 __iommu_flush_pages(domain, address, size, 0);
a19ae1ec 548}
b6c02715 549
1c655773 550/* Flush the whole IO/TLB for a given protection domain */
dcd1e92e 551static void iommu_flush_tlb(struct protection_domain *domain)
1c655773 552{
dcd1e92e 553 __iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0);
1c655773
JR
554}
555
42a49f96 556/* Flush the whole IO/TLB for a given protection domain - including PDE */
dcd1e92e 557static void iommu_flush_tlb_pde(struct protection_domain *domain)
42a49f96 558{
dcd1e92e 559 __iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
42a49f96
CW
560}
561
43f49609 562/*
09b42804 563 * This function flushes all domains that have devices on the given IOMMU
43f49609 564 */
09b42804 565static void flush_all_domains_on_iommu(struct amd_iommu *iommu)
43f49609 566{
09b42804
JR
567 u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
568 struct protection_domain *domain;
e394d72a 569 unsigned long flags;
18811f55 570
09b42804 571 spin_lock_irqsave(&amd_iommu_pd_lock, flags);
bfd1be18 572
09b42804
JR
573 list_for_each_entry(domain, &amd_iommu_pd_list, list) {
574 if (domain->dev_iommu[iommu->index] == 0)
bfd1be18 575 continue;
09b42804
JR
576
577 spin_lock(&domain->lock);
578 iommu_queue_inv_iommu_pages(iommu, address, domain->id, 1, 1);
579 iommu_flush_complete(domain);
580 spin_unlock(&domain->lock);
bfd1be18 581 }
e394d72a 582
09b42804 583 spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
e394d72a
JR
584}
585
09b42804
JR
586/*
587 * This function uses heavy locking and may disable irqs for some time. But
588 * this is no issue because it is only called during resume.
589 */
bfd1be18 590void amd_iommu_flush_all_domains(void)
e394d72a 591{
e3306664 592 struct protection_domain *domain;
09b42804
JR
593 unsigned long flags;
594
595 spin_lock_irqsave(&amd_iommu_pd_lock, flags);
e394d72a 596
e3306664 597 list_for_each_entry(domain, &amd_iommu_pd_list, list) {
09b42804 598 spin_lock(&domain->lock);
e3306664
JR
599 iommu_flush_tlb_pde(domain);
600 iommu_flush_complete(domain);
09b42804 601 spin_unlock(&domain->lock);
e3306664 602 }
09b42804
JR
603
604 spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
bfd1be18
JR
605}
606
d586d785 607static void flush_all_devices_for_iommu(struct amd_iommu *iommu)
bfd1be18
JR
608{
609 int i;
610
d586d785
JR
611 for (i = 0; i <= amd_iommu_last_bdf; ++i) {
612 if (iommu != amd_iommu_rlookup_table[i])
bfd1be18 613 continue;
d586d785
JR
614
615 iommu_queue_inv_dev_entry(iommu, i);
616 iommu_completion_wait(iommu);
bfd1be18
JR
617 }
618}
619
6a0dbcbe 620static void flush_devices_by_domain(struct protection_domain *domain)
7d7a110c
JR
621{
622 struct amd_iommu *iommu;
623 int i;
624
625 for (i = 0; i <= amd_iommu_last_bdf; ++i) {
6a0dbcbe
JR
626 if ((domain == NULL && amd_iommu_pd_table[i] == NULL) ||
627 (amd_iommu_pd_table[i] != domain))
7d7a110c
JR
628 continue;
629
630 iommu = amd_iommu_rlookup_table[i];
631 if (!iommu)
632 continue;
633
634 iommu_queue_inv_dev_entry(iommu, i);
635 iommu_completion_wait(iommu);
636 }
637}
638
a345b23b
JR
639static void reset_iommu_command_buffer(struct amd_iommu *iommu)
640{
641 pr_err("AMD-Vi: Resetting IOMMU command buffer\n");
642
b26e81b8
JR
643 if (iommu->reset_in_progress)
644 panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n");
645
646 iommu->reset_in_progress = true;
647
a345b23b
JR
648 amd_iommu_reset_cmd_buffer(iommu);
649 flush_all_devices_for_iommu(iommu);
650 flush_all_domains_on_iommu(iommu);
b26e81b8
JR
651
652 iommu->reset_in_progress = false;
a345b23b
JR
653}
654
6a0dbcbe
JR
655void amd_iommu_flush_all_devices(void)
656{
657 flush_devices_by_domain(NULL);
658}
659
431b2a20
JR
660/****************************************************************************
661 *
662 * The functions below are used the create the page table mappings for
663 * unity mapped regions.
664 *
665 ****************************************************************************/
666
667/*
668 * Generic mapping functions. It maps a physical address into a DMA
669 * address space. It allocates the page table pages if necessary.
670 * In the future it can be extended to a generic mapping function
671 * supporting all features of AMD IOMMU page tables like level skipping
672 * and full 64 bit address spaces.
673 */
38e817fe
JR
674static int iommu_map_page(struct protection_domain *dom,
675 unsigned long bus_addr,
676 unsigned long phys_addr,
abdc5eb3
JR
677 int prot,
678 int map_size)
bd0e5211 679{
8bda3092 680 u64 __pte, *pte;
bd0e5211
JR
681
682 bus_addr = PAGE_ALIGN(bus_addr);
bb9d4ff8 683 phys_addr = PAGE_ALIGN(phys_addr);
bd0e5211 684
abdc5eb3
JR
685 BUG_ON(!PM_ALIGNED(map_size, bus_addr));
686 BUG_ON(!PM_ALIGNED(map_size, phys_addr));
687
bad1cac2 688 if (!(prot & IOMMU_PROT_MASK))
bd0e5211
JR
689 return -EINVAL;
690
abdc5eb3 691 pte = alloc_pte(dom, bus_addr, map_size, NULL, GFP_KERNEL);
bd0e5211
JR
692
693 if (IOMMU_PTE_PRESENT(*pte))
694 return -EBUSY;
695
696 __pte = phys_addr | IOMMU_PTE_P;
697 if (prot & IOMMU_PROT_IR)
698 __pte |= IOMMU_PTE_IR;
699 if (prot & IOMMU_PROT_IW)
700 __pte |= IOMMU_PTE_IW;
701
702 *pte = __pte;
703
04bfdd84
JR
704 update_domain(dom);
705
bd0e5211
JR
706 return 0;
707}
708
eb74ff6c 709static void iommu_unmap_page(struct protection_domain *dom,
a6b256b4 710 unsigned long bus_addr, int map_size)
eb74ff6c 711{
a6b256b4 712 u64 *pte = fetch_pte(dom, bus_addr, map_size);
eb74ff6c 713
38a76eee
JR
714 if (pte)
715 *pte = 0;
eb74ff6c 716}
eb74ff6c 717
431b2a20
JR
718/*
719 * This function checks if a specific unity mapping entry is needed for
720 * this specific IOMMU.
721 */
bd0e5211
JR
722static int iommu_for_unity_map(struct amd_iommu *iommu,
723 struct unity_map_entry *entry)
724{
725 u16 bdf, i;
726
727 for (i = entry->devid_start; i <= entry->devid_end; ++i) {
728 bdf = amd_iommu_alias_table[i];
729 if (amd_iommu_rlookup_table[bdf] == iommu)
730 return 1;
731 }
732
733 return 0;
734}
735
431b2a20
JR
736/*
737 * Init the unity mappings for a specific IOMMU in the system
738 *
739 * Basically iterates over all unity mapping entries and applies them to
740 * the default domain DMA of that IOMMU if necessary.
741 */
bd0e5211
JR
742static int iommu_init_unity_mappings(struct amd_iommu *iommu)
743{
744 struct unity_map_entry *entry;
745 int ret;
746
747 list_for_each_entry(entry, &amd_iommu_unity_map, list) {
748 if (!iommu_for_unity_map(iommu, entry))
749 continue;
750 ret = dma_ops_unity_map(iommu->default_dom, entry);
751 if (ret)
752 return ret;
753 }
754
755 return 0;
756}
757
431b2a20
JR
758/*
759 * This function actually applies the mapping to the page table of the
760 * dma_ops domain.
761 */
bd0e5211
JR
762static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
763 struct unity_map_entry *e)
764{
765 u64 addr;
766 int ret;
767
768 for (addr = e->address_start; addr < e->address_end;
769 addr += PAGE_SIZE) {
abdc5eb3
JR
770 ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot,
771 PM_MAP_4k);
bd0e5211
JR
772 if (ret)
773 return ret;
774 /*
775 * if unity mapping is in aperture range mark the page
776 * as allocated in the aperture
777 */
778 if (addr < dma_dom->aperture_size)
c3239567 779 __set_bit(addr >> PAGE_SHIFT,
384de729 780 dma_dom->aperture[0]->bitmap);
bd0e5211
JR
781 }
782
783 return 0;
784}
785
431b2a20
JR
786/*
787 * Inits the unity mappings required for a specific device
788 */
bd0e5211
JR
789static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
790 u16 devid)
791{
792 struct unity_map_entry *e;
793 int ret;
794
795 list_for_each_entry(e, &amd_iommu_unity_map, list) {
796 if (!(devid >= e->devid_start && devid <= e->devid_end))
797 continue;
798 ret = dma_ops_unity_map(dma_dom, e);
799 if (ret)
800 return ret;
801 }
802
803 return 0;
804}
805
431b2a20
JR
806/****************************************************************************
807 *
808 * The next functions belong to the address allocator for the dma_ops
809 * interface functions. They work like the allocators in the other IOMMU
810 * drivers. Its basically a bitmap which marks the allocated pages in
811 * the aperture. Maybe it could be enhanced in the future to a more
812 * efficient allocator.
813 *
814 ****************************************************************************/
d3086444 815
431b2a20 816/*
384de729 817 * The address allocator core functions.
431b2a20
JR
818 *
819 * called with domain->lock held
820 */
384de729 821
00cd122a
JR
822/*
823 * This function checks if there is a PTE for a given dma address. If
824 * there is one, it returns the pointer to it.
825 */
9355a081 826static u64 *fetch_pte(struct protection_domain *domain,
a6b256b4 827 unsigned long address, int map_size)
00cd122a 828{
9355a081 829 int level;
00cd122a
JR
830 u64 *pte;
831
9355a081
JR
832 level = domain->mode - 1;
833 pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
00cd122a 834
a6b256b4 835 while (level > map_size) {
9355a081
JR
836 if (!IOMMU_PTE_PRESENT(*pte))
837 return NULL;
00cd122a 838
9355a081 839 level -= 1;
00cd122a 840
9355a081
JR
841 pte = IOMMU_PTE_PAGE(*pte);
842 pte = &pte[PM_LEVEL_INDEX(level, address)];
00cd122a 843
a6b256b4
JR
844 if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) {
845 pte = NULL;
846 break;
847 }
9355a081 848 }
00cd122a
JR
849
850 return pte;
851}
852
9cabe89b
JR
853/*
854 * This function is used to add a new aperture range to an existing
855 * aperture in case of dma_ops domain allocation or address allocation
856 * failure.
857 */
576175c2 858static int alloc_new_range(struct dma_ops_domain *dma_dom,
9cabe89b
JR
859 bool populate, gfp_t gfp)
860{
861 int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
576175c2 862 struct amd_iommu *iommu;
00cd122a 863 int i;
9cabe89b 864
f5e9705c
JR
865#ifdef CONFIG_IOMMU_STRESS
866 populate = false;
867#endif
868
9cabe89b
JR
869 if (index >= APERTURE_MAX_RANGES)
870 return -ENOMEM;
871
872 dma_dom->aperture[index] = kzalloc(sizeof(struct aperture_range), gfp);
873 if (!dma_dom->aperture[index])
874 return -ENOMEM;
875
876 dma_dom->aperture[index]->bitmap = (void *)get_zeroed_page(gfp);
877 if (!dma_dom->aperture[index]->bitmap)
878 goto out_free;
879
880 dma_dom->aperture[index]->offset = dma_dom->aperture_size;
881
882 if (populate) {
883 unsigned long address = dma_dom->aperture_size;
884 int i, num_ptes = APERTURE_RANGE_PAGES / 512;
885 u64 *pte, *pte_page;
886
887 for (i = 0; i < num_ptes; ++i) {
abdc5eb3 888 pte = alloc_pte(&dma_dom->domain, address, PM_MAP_4k,
9cabe89b
JR
889 &pte_page, gfp);
890 if (!pte)
891 goto out_free;
892
893 dma_dom->aperture[index]->pte_pages[i] = pte_page;
894
895 address += APERTURE_RANGE_SIZE / 64;
896 }
897 }
898
899 dma_dom->aperture_size += APERTURE_RANGE_SIZE;
900
00cd122a 901 /* Intialize the exclusion range if necessary */
576175c2
JR
902 for_each_iommu(iommu) {
903 if (iommu->exclusion_start &&
904 iommu->exclusion_start >= dma_dom->aperture[index]->offset
905 && iommu->exclusion_start < dma_dom->aperture_size) {
906 unsigned long startpage;
907 int pages = iommu_num_pages(iommu->exclusion_start,
908 iommu->exclusion_length,
909 PAGE_SIZE);
910 startpage = iommu->exclusion_start >> PAGE_SHIFT;
911 dma_ops_reserve_addresses(dma_dom, startpage, pages);
912 }
00cd122a
JR
913 }
914
915 /*
916 * Check for areas already mapped as present in the new aperture
917 * range and mark those pages as reserved in the allocator. Such
918 * mappings may already exist as a result of requested unity
919 * mappings for devices.
920 */
921 for (i = dma_dom->aperture[index]->offset;
922 i < dma_dom->aperture_size;
923 i += PAGE_SIZE) {
a6b256b4 924 u64 *pte = fetch_pte(&dma_dom->domain, i, PM_MAP_4k);
00cd122a
JR
925 if (!pte || !IOMMU_PTE_PRESENT(*pte))
926 continue;
927
928 dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1);
929 }
930
04bfdd84
JR
931 update_domain(&dma_dom->domain);
932
9cabe89b
JR
933 return 0;
934
935out_free:
04bfdd84
JR
936 update_domain(&dma_dom->domain);
937
9cabe89b
JR
938 free_page((unsigned long)dma_dom->aperture[index]->bitmap);
939
940 kfree(dma_dom->aperture[index]);
941 dma_dom->aperture[index] = NULL;
942
943 return -ENOMEM;
944}
945
384de729
JR
946static unsigned long dma_ops_area_alloc(struct device *dev,
947 struct dma_ops_domain *dom,
948 unsigned int pages,
949 unsigned long align_mask,
950 u64 dma_mask,
951 unsigned long start)
952{
803b8cb4 953 unsigned long next_bit = dom->next_address % APERTURE_RANGE_SIZE;
384de729
JR
954 int max_index = dom->aperture_size >> APERTURE_RANGE_SHIFT;
955 int i = start >> APERTURE_RANGE_SHIFT;
956 unsigned long boundary_size;
957 unsigned long address = -1;
958 unsigned long limit;
959
803b8cb4
JR
960 next_bit >>= PAGE_SHIFT;
961
384de729
JR
962 boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
963 PAGE_SIZE) >> PAGE_SHIFT;
964
965 for (;i < max_index; ++i) {
966 unsigned long offset = dom->aperture[i]->offset >> PAGE_SHIFT;
967
968 if (dom->aperture[i]->offset >= dma_mask)
969 break;
970
971 limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset,
972 dma_mask >> PAGE_SHIFT);
973
974 address = iommu_area_alloc(dom->aperture[i]->bitmap,
975 limit, next_bit, pages, 0,
976 boundary_size, align_mask);
977 if (address != -1) {
978 address = dom->aperture[i]->offset +
979 (address << PAGE_SHIFT);
803b8cb4 980 dom->next_address = address + (pages << PAGE_SHIFT);
384de729
JR
981 break;
982 }
983
984 next_bit = 0;
985 }
986
987 return address;
988}
989
d3086444
JR
990static unsigned long dma_ops_alloc_addresses(struct device *dev,
991 struct dma_ops_domain *dom,
6d4f343f 992 unsigned int pages,
832a90c3
JR
993 unsigned long align_mask,
994 u64 dma_mask)
d3086444 995{
d3086444 996 unsigned long address;
d3086444 997
fe16f088
JR
998#ifdef CONFIG_IOMMU_STRESS
999 dom->next_address = 0;
1000 dom->need_flush = true;
1001#endif
d3086444 1002
384de729 1003 address = dma_ops_area_alloc(dev, dom, pages, align_mask,
803b8cb4 1004 dma_mask, dom->next_address);
d3086444 1005
1c655773 1006 if (address == -1) {
803b8cb4 1007 dom->next_address = 0;
384de729
JR
1008 address = dma_ops_area_alloc(dev, dom, pages, align_mask,
1009 dma_mask, 0);
1c655773
JR
1010 dom->need_flush = true;
1011 }
d3086444 1012
384de729 1013 if (unlikely(address == -1))
8fd524b3 1014 address = DMA_ERROR_CODE;
d3086444
JR
1015
1016 WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size);
1017
1018 return address;
1019}
1020
431b2a20
JR
1021/*
1022 * The address free function.
1023 *
1024 * called with domain->lock held
1025 */
d3086444
JR
1026static void dma_ops_free_addresses(struct dma_ops_domain *dom,
1027 unsigned long address,
1028 unsigned int pages)
1029{
384de729
JR
1030 unsigned i = address >> APERTURE_RANGE_SHIFT;
1031 struct aperture_range *range = dom->aperture[i];
80be308d 1032
384de729
JR
1033 BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL);
1034
47bccd6b
JR
1035#ifdef CONFIG_IOMMU_STRESS
1036 if (i < 4)
1037 return;
1038#endif
80be308d 1039
803b8cb4 1040 if (address >= dom->next_address)
80be308d 1041 dom->need_flush = true;
384de729
JR
1042
1043 address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT;
803b8cb4 1044
384de729
JR
1045 iommu_area_free(range->bitmap, address, pages);
1046
d3086444
JR
1047}
1048
431b2a20
JR
1049/****************************************************************************
1050 *
1051 * The next functions belong to the domain allocation. A domain is
1052 * allocated for every IOMMU as the default domain. If device isolation
1053 * is enabled, every device get its own domain. The most important thing
1054 * about domains is the page table mapping the DMA address space they
1055 * contain.
1056 *
1057 ****************************************************************************/
1058
aeb26f55
JR
1059/*
1060 * This function adds a protection domain to the global protection domain list
1061 */
1062static void add_domain_to_list(struct protection_domain *domain)
1063{
1064 unsigned long flags;
1065
1066 spin_lock_irqsave(&amd_iommu_pd_lock, flags);
1067 list_add(&domain->list, &amd_iommu_pd_list);
1068 spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
1069}
1070
1071/*
1072 * This function removes a protection domain to the global
1073 * protection domain list
1074 */
1075static void del_domain_from_list(struct protection_domain *domain)
1076{
1077 unsigned long flags;
1078
1079 spin_lock_irqsave(&amd_iommu_pd_lock, flags);
1080 list_del(&domain->list);
1081 spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
1082}
1083
ec487d1a
JR
1084static u16 domain_id_alloc(void)
1085{
1086 unsigned long flags;
1087 int id;
1088
1089 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1090 id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID);
1091 BUG_ON(id == 0);
1092 if (id > 0 && id < MAX_DOMAIN_ID)
1093 __set_bit(id, amd_iommu_pd_alloc_bitmap);
1094 else
1095 id = 0;
1096 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1097
1098 return id;
1099}
1100
a2acfb75
JR
1101static void domain_id_free(int id)
1102{
1103 unsigned long flags;
1104
1105 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1106 if (id > 0 && id < MAX_DOMAIN_ID)
1107 __clear_bit(id, amd_iommu_pd_alloc_bitmap);
1108 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1109}
a2acfb75 1110
431b2a20
JR
1111/*
1112 * Used to reserve address ranges in the aperture (e.g. for exclusion
1113 * ranges.
1114 */
ec487d1a
JR
1115static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
1116 unsigned long start_page,
1117 unsigned int pages)
1118{
384de729 1119 unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT;
ec487d1a
JR
1120
1121 if (start_page + pages > last_page)
1122 pages = last_page - start_page;
1123
384de729
JR
1124 for (i = start_page; i < start_page + pages; ++i) {
1125 int index = i / APERTURE_RANGE_PAGES;
1126 int page = i % APERTURE_RANGE_PAGES;
1127 __set_bit(page, dom->aperture[index]->bitmap);
1128 }
ec487d1a
JR
1129}
1130
86db2e5d 1131static void free_pagetable(struct protection_domain *domain)
ec487d1a
JR
1132{
1133 int i, j;
1134 u64 *p1, *p2, *p3;
1135
86db2e5d 1136 p1 = domain->pt_root;
ec487d1a
JR
1137
1138 if (!p1)
1139 return;
1140
1141 for (i = 0; i < 512; ++i) {
1142 if (!IOMMU_PTE_PRESENT(p1[i]))
1143 continue;
1144
1145 p2 = IOMMU_PTE_PAGE(p1[i]);
3cc3d84b 1146 for (j = 0; j < 512; ++j) {
ec487d1a
JR
1147 if (!IOMMU_PTE_PRESENT(p2[j]))
1148 continue;
1149 p3 = IOMMU_PTE_PAGE(p2[j]);
1150 free_page((unsigned long)p3);
1151 }
1152
1153 free_page((unsigned long)p2);
1154 }
1155
1156 free_page((unsigned long)p1);
86db2e5d
JR
1157
1158 domain->pt_root = NULL;
ec487d1a
JR
1159}
1160
431b2a20
JR
1161/*
1162 * Free a domain, only used if something went wrong in the
1163 * allocation path and we need to free an already allocated page table
1164 */
ec487d1a
JR
1165static void dma_ops_domain_free(struct dma_ops_domain *dom)
1166{
384de729
JR
1167 int i;
1168
ec487d1a
JR
1169 if (!dom)
1170 return;
1171
aeb26f55
JR
1172 del_domain_from_list(&dom->domain);
1173
86db2e5d 1174 free_pagetable(&dom->domain);
ec487d1a 1175
384de729
JR
1176 for (i = 0; i < APERTURE_MAX_RANGES; ++i) {
1177 if (!dom->aperture[i])
1178 continue;
1179 free_page((unsigned long)dom->aperture[i]->bitmap);
1180 kfree(dom->aperture[i]);
1181 }
ec487d1a
JR
1182
1183 kfree(dom);
1184}
1185
431b2a20
JR
1186/*
1187 * Allocates a new protection domain usable for the dma_ops functions.
1188 * It also intializes the page table and the address allocator data
1189 * structures required for the dma_ops interface
1190 */
d9cfed92 1191static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu)
ec487d1a
JR
1192{
1193 struct dma_ops_domain *dma_dom;
ec487d1a
JR
1194
1195 dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
1196 if (!dma_dom)
1197 return NULL;
1198
1199 spin_lock_init(&dma_dom->domain.lock);
1200
1201 dma_dom->domain.id = domain_id_alloc();
1202 if (dma_dom->domain.id == 0)
1203 goto free_dma_dom;
8f7a017c 1204 dma_dom->domain.mode = PAGE_MODE_2_LEVEL;
ec487d1a 1205 dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
9fdb19d6 1206 dma_dom->domain.flags = PD_DMA_OPS_MASK;
ec487d1a
JR
1207 dma_dom->domain.priv = dma_dom;
1208 if (!dma_dom->domain.pt_root)
1209 goto free_dma_dom;
ec487d1a 1210
1c655773 1211 dma_dom->need_flush = false;
bd60b735 1212 dma_dom->target_dev = 0xffff;
1c655773 1213
aeb26f55
JR
1214 add_domain_to_list(&dma_dom->domain);
1215
576175c2 1216 if (alloc_new_range(dma_dom, true, GFP_KERNEL))
ec487d1a 1217 goto free_dma_dom;
ec487d1a 1218
431b2a20 1219 /*
ec487d1a
JR
1220 * mark the first page as allocated so we never return 0 as
1221 * a valid dma-address. So we can use 0 as error value
431b2a20 1222 */
384de729 1223 dma_dom->aperture[0]->bitmap[0] = 1;
803b8cb4 1224 dma_dom->next_address = 0;
ec487d1a 1225
ec487d1a
JR
1226
1227 return dma_dom;
1228
1229free_dma_dom:
1230 dma_ops_domain_free(dma_dom);
1231
1232 return NULL;
1233}
1234
5b28df6f
JR
1235/*
1236 * little helper function to check whether a given protection domain is a
1237 * dma_ops domain
1238 */
1239static bool dma_ops_domain(struct protection_domain *domain)
1240{
1241 return domain->flags & PD_DMA_OPS_MASK;
1242}
1243
407d733e 1244static void set_dte_entry(u16 devid, struct protection_domain *domain)
b20ac0d4 1245{
15898bbc 1246 struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
b20ac0d4 1247 u64 pte_root = virt_to_phys(domain->pt_root);
863c74eb 1248
15898bbc
JR
1249 BUG_ON(amd_iommu_pd_table[devid] != NULL);
1250
38ddf41b
JR
1251 pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
1252 << DEV_ENTRY_MODE_SHIFT;
1253 pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
b20ac0d4 1254
b20ac0d4 1255 amd_iommu_dev_table[devid].data[2] = domain->id;
aa879fff
JR
1256 amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root);
1257 amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root);
b20ac0d4
JR
1258
1259 amd_iommu_pd_table[devid] = domain;
15898bbc
JR
1260
1261 /* Do reference counting */
1262 domain->dev_iommu[iommu->index] += 1;
1263 domain->dev_cnt += 1;
1264
1265 /* Flush the changes DTE entry */
1266 iommu_queue_inv_dev_entry(iommu, devid);
1267}
1268
1269static void clear_dte_entry(u16 devid)
1270{
1271 struct protection_domain *domain = amd_iommu_pd_table[devid];
1272 struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
1273
1274 BUG_ON(domain == NULL);
1275
1276 /* remove domain from the lookup table */
1277 amd_iommu_pd_table[devid] = NULL;
1278
1279 /* remove entry from the device table seen by the hardware */
1280 amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV;
1281 amd_iommu_dev_table[devid].data[1] = 0;
1282 amd_iommu_dev_table[devid].data[2] = 0;
1283
1284 amd_iommu_apply_erratum_63(devid);
1285
1286 /* decrease reference counters */
1287 domain->dev_iommu[iommu->index] -= 1;
1288 domain->dev_cnt -= 1;
1289
1290 iommu_queue_inv_dev_entry(iommu, devid);
2b681faf
JR
1291}
1292
1293/*
1294 * If a device is not yet associated with a domain, this function does
1295 * assigns it visible for the hardware
1296 */
15898bbc
JR
1297static int __attach_device(struct device *dev,
1298 struct protection_domain *domain)
2b681faf 1299{
15898bbc
JR
1300 u16 devid = get_device_id(dev);
1301 u16 alias = amd_iommu_alias_table[devid];
1302
2b681faf
JR
1303 /* lock domain */
1304 spin_lock(&domain->lock);
1305
15898bbc
JR
1306 /* Some sanity checks */
1307 if (amd_iommu_pd_table[alias] != NULL &&
1308 amd_iommu_pd_table[alias] != domain)
1309 return -EBUSY;
eba6ac60 1310
15898bbc
JR
1311 if (amd_iommu_pd_table[devid] != NULL &&
1312 amd_iommu_pd_table[devid] != domain)
1313 return -EBUSY;
1314
1315 /* Do real assignment */
1316 if (alias != devid &&
1317 amd_iommu_pd_table[alias] == NULL)
1318 set_dte_entry(alias, domain);
1319
1320 if (amd_iommu_pd_table[devid] == NULL)
1321 set_dte_entry(devid, domain);
eba6ac60
JR
1322
1323 /* ready */
1324 spin_unlock(&domain->lock);
15898bbc
JR
1325
1326 return 0;
0feae533 1327}
b20ac0d4 1328
407d733e
JR
1329/*
1330 * If a device is not yet associated with a domain, this function does
1331 * assigns it visible for the hardware
1332 */
15898bbc
JR
1333static int attach_device(struct device *dev,
1334 struct protection_domain *domain)
0feae533 1335{
eba6ac60 1336 unsigned long flags;
15898bbc 1337 int ret;
eba6ac60
JR
1338
1339 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
15898bbc 1340 ret = __attach_device(dev, domain);
b20ac0d4
JR
1341 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1342
0feae533
JR
1343 /*
1344 * We might boot into a crash-kernel here. The crashed kernel
1345 * left the caches in the IOMMU dirty. So we have to flush
1346 * here to evict all dirty stuff.
1347 */
dcd1e92e 1348 iommu_flush_tlb_pde(domain);
15898bbc
JR
1349
1350 return ret;
b20ac0d4
JR
1351}
1352
355bf553
JR
1353/*
1354 * Removes a device from a protection domain (unlocked)
1355 */
15898bbc 1356static void __detach_device(struct device *dev)
355bf553 1357{
15898bbc 1358 u16 devid = get_device_id(dev);
c4596114
JR
1359 struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
1360
1361 BUG_ON(!iommu);
355bf553 1362
15898bbc 1363 clear_dte_entry(devid);
21129f78
JR
1364
1365 /*
1366 * If we run in passthrough mode the device must be assigned to the
1367 * passthrough domain if it is detached from any other domain
1368 */
15898bbc
JR
1369 if (iommu_pass_through)
1370 __attach_device(dev, pt_domain);
355bf553
JR
1371}
1372
1373/*
1374 * Removes a device from a protection domain (with devtable_lock held)
1375 */
15898bbc 1376static void detach_device(struct device *dev)
355bf553
JR
1377{
1378 unsigned long flags;
1379
1380 /* lock device table */
1381 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
15898bbc 1382 __detach_device(dev);
355bf553
JR
1383 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1384}
e275a2a0 1385
15898bbc
JR
1386/*
1387 * Find out the protection domain structure for a given PCI device. This
1388 * will give us the pointer to the page table root for example.
1389 */
1390static struct protection_domain *domain_for_device(struct device *dev)
1391{
1392 struct protection_domain *dom;
1393 unsigned long flags;
1394 u16 devid, alias;
1395
1396 devid = get_device_id(dev);
1397 alias = amd_iommu_alias_table[devid];
1398
1399 read_lock_irqsave(&amd_iommu_devtable_lock, flags);
1400 dom = amd_iommu_pd_table[devid];
1401 if (dom == NULL &&
1402 amd_iommu_pd_table[alias] != NULL) {
1403 __attach_device(dev, amd_iommu_pd_table[alias]);
1404 dom = amd_iommu_pd_table[devid];
1405 }
1406
1407 read_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1408
1409 return dom;
1410}
1411
e275a2a0
JR
1412static int device_change_notifier(struct notifier_block *nb,
1413 unsigned long action, void *data)
1414{
1415 struct device *dev = data;
98fc5a69 1416 u16 devid;
e275a2a0
JR
1417 struct protection_domain *domain;
1418 struct dma_ops_domain *dma_domain;
1419 struct amd_iommu *iommu;
1ac4cbbc 1420 unsigned long flags;
e275a2a0 1421
98fc5a69
JR
1422 if (!check_device(dev))
1423 return 0;
e275a2a0 1424
98fc5a69
JR
1425 devid = get_device_id(dev);
1426 iommu = amd_iommu_rlookup_table[devid];
15898bbc 1427 domain = domain_for_device(dev);
e275a2a0
JR
1428
1429 if (domain && !dma_ops_domain(domain))
1430 WARN_ONCE(1, "AMD IOMMU WARNING: device %s already bound "
1431 "to a non-dma-ops domain\n", dev_name(dev));
1432
1433 switch (action) {
c1eee67b 1434 case BUS_NOTIFY_UNBOUND_DRIVER:
e275a2a0
JR
1435 if (!domain)
1436 goto out;
a1ca331c
JR
1437 if (iommu_pass_through)
1438 break;
15898bbc 1439 detach_device(dev);
1ac4cbbc
JR
1440 break;
1441 case BUS_NOTIFY_ADD_DEVICE:
1442 /* allocate a protection domain if a device is added */
1443 dma_domain = find_protection_domain(devid);
1444 if (dma_domain)
1445 goto out;
d9cfed92 1446 dma_domain = dma_ops_domain_alloc(iommu);
1ac4cbbc
JR
1447 if (!dma_domain)
1448 goto out;
1449 dma_domain->target_dev = devid;
1450
1451 spin_lock_irqsave(&iommu_pd_list_lock, flags);
1452 list_add_tail(&dma_domain->list, &iommu_pd_list);
1453 spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
1454
e275a2a0
JR
1455 break;
1456 default:
1457 goto out;
1458 }
1459
1460 iommu_queue_inv_dev_entry(iommu, devid);
1461 iommu_completion_wait(iommu);
1462
1463out:
1464 return 0;
1465}
1466
b25ae679 1467static struct notifier_block device_nb = {
e275a2a0
JR
1468 .notifier_call = device_change_notifier,
1469};
355bf553 1470
431b2a20
JR
1471/*****************************************************************************
1472 *
1473 * The next functions belong to the dma_ops mapping/unmapping code.
1474 *
1475 *****************************************************************************/
1476
1477/*
1478 * In the dma_ops path we only have the struct device. This function
1479 * finds the corresponding IOMMU, the protection domain and the
1480 * requestor id for a given device.
1481 * If the device is not yet associated with a domain this is also done
1482 * in this function.
1483 */
94f6d190 1484static struct protection_domain *get_domain(struct device *dev)
b20ac0d4 1485{
94f6d190 1486 struct protection_domain *domain;
b20ac0d4 1487 struct dma_ops_domain *dma_dom;
94f6d190 1488 u16 devid = get_device_id(dev);
b20ac0d4 1489
f99c0f1c 1490 if (!check_device(dev))
94f6d190 1491 return ERR_PTR(-EINVAL);
b20ac0d4 1492
94f6d190
JR
1493 domain = domain_for_device(dev);
1494 if (domain != NULL && !dma_ops_domain(domain))
1495 return ERR_PTR(-EBUSY);
f99c0f1c 1496
94f6d190
JR
1497 if (domain != NULL)
1498 return domain;
b20ac0d4 1499
15898bbc 1500 /* Device not bount yet - bind it */
94f6d190 1501 dma_dom = find_protection_domain(devid);
15898bbc 1502 if (!dma_dom)
94f6d190
JR
1503 dma_dom = amd_iommu_rlookup_table[devid]->default_dom;
1504 attach_device(dev, &dma_dom->domain);
15898bbc 1505 DUMP_printk("Using protection domain %d for device %s\n",
94f6d190 1506 dma_dom->domain.id, dev_name(dev));
f91ba190 1507
94f6d190 1508 return &dma_dom->domain;
b20ac0d4
JR
1509}
1510
04bfdd84
JR
1511static void update_device_table(struct protection_domain *domain)
1512{
2b681faf 1513 unsigned long flags;
04bfdd84
JR
1514 int i;
1515
1516 for (i = 0; i <= amd_iommu_last_bdf; ++i) {
1517 if (amd_iommu_pd_table[i] != domain)
1518 continue;
2b681faf 1519 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
04bfdd84 1520 set_dte_entry(i, domain);
2b681faf 1521 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
04bfdd84
JR
1522 }
1523}
1524
1525static void update_domain(struct protection_domain *domain)
1526{
1527 if (!domain->updated)
1528 return;
1529
1530 update_device_table(domain);
1531 flush_devices_by_domain(domain);
601367d7 1532 iommu_flush_tlb_pde(domain);
04bfdd84
JR
1533
1534 domain->updated = false;
1535}
1536
8bda3092 1537/*
50020fb6
JR
1538 * This function is used to add another level to an IO page table. Adding
1539 * another level increases the size of the address space by 9 bits to a size up
1540 * to 64 bits.
8bda3092 1541 */
50020fb6
JR
1542static bool increase_address_space(struct protection_domain *domain,
1543 gfp_t gfp)
1544{
1545 u64 *pte;
1546
1547 if (domain->mode == PAGE_MODE_6_LEVEL)
1548 /* address space already 64 bit large */
1549 return false;
1550
1551 pte = (void *)get_zeroed_page(gfp);
1552 if (!pte)
1553 return false;
1554
1555 *pte = PM_LEVEL_PDE(domain->mode,
1556 virt_to_phys(domain->pt_root));
1557 domain->pt_root = pte;
1558 domain->mode += 1;
1559 domain->updated = true;
1560
1561 return true;
1562}
1563
8bc3e127 1564static u64 *alloc_pte(struct protection_domain *domain,
abdc5eb3
JR
1565 unsigned long address,
1566 int end_lvl,
1567 u64 **pte_page,
1568 gfp_t gfp)
8bda3092
JR
1569{
1570 u64 *pte, *page;
8bc3e127 1571 int level;
8bda3092 1572
8bc3e127
JR
1573 while (address > PM_LEVEL_SIZE(domain->mode))
1574 increase_address_space(domain, gfp);
8bda3092 1575
8bc3e127
JR
1576 level = domain->mode - 1;
1577 pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
8bda3092 1578
abdc5eb3 1579 while (level > end_lvl) {
8bc3e127
JR
1580 if (!IOMMU_PTE_PRESENT(*pte)) {
1581 page = (u64 *)get_zeroed_page(gfp);
1582 if (!page)
1583 return NULL;
1584 *pte = PM_LEVEL_PDE(level, virt_to_phys(page));
1585 }
8bda3092 1586
8bc3e127 1587 level -= 1;
8bda3092 1588
8bc3e127 1589 pte = IOMMU_PTE_PAGE(*pte);
8bda3092 1590
abdc5eb3 1591 if (pte_page && level == end_lvl)
8bc3e127 1592 *pte_page = pte;
8bda3092 1593
8bc3e127
JR
1594 pte = &pte[PM_LEVEL_INDEX(level, address)];
1595 }
8bda3092
JR
1596
1597 return pte;
1598}
1599
1600/*
1601 * This function fetches the PTE for a given address in the aperture
1602 */
1603static u64* dma_ops_get_pte(struct dma_ops_domain *dom,
1604 unsigned long address)
1605{
384de729 1606 struct aperture_range *aperture;
8bda3092
JR
1607 u64 *pte, *pte_page;
1608
384de729
JR
1609 aperture = dom->aperture[APERTURE_RANGE_INDEX(address)];
1610 if (!aperture)
1611 return NULL;
1612
1613 pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
8bda3092 1614 if (!pte) {
abdc5eb3
JR
1615 pte = alloc_pte(&dom->domain, address, PM_MAP_4k, &pte_page,
1616 GFP_ATOMIC);
384de729
JR
1617 aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page;
1618 } else
8c8c143c 1619 pte += PM_LEVEL_INDEX(0, address);
8bda3092 1620
04bfdd84 1621 update_domain(&dom->domain);
8bda3092
JR
1622
1623 return pte;
1624}
1625
431b2a20
JR
1626/*
1627 * This is the generic map function. It maps one 4kb page at paddr to
1628 * the given address in the DMA address space for the domain.
1629 */
680525e0 1630static dma_addr_t dma_ops_domain_map(struct dma_ops_domain *dom,
cb76c322
JR
1631 unsigned long address,
1632 phys_addr_t paddr,
1633 int direction)
1634{
1635 u64 *pte, __pte;
1636
1637 WARN_ON(address > dom->aperture_size);
1638
1639 paddr &= PAGE_MASK;
1640
8bda3092 1641 pte = dma_ops_get_pte(dom, address);
53812c11 1642 if (!pte)
8fd524b3 1643 return DMA_ERROR_CODE;
cb76c322
JR
1644
1645 __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC;
1646
1647 if (direction == DMA_TO_DEVICE)
1648 __pte |= IOMMU_PTE_IR;
1649 else if (direction == DMA_FROM_DEVICE)
1650 __pte |= IOMMU_PTE_IW;
1651 else if (direction == DMA_BIDIRECTIONAL)
1652 __pte |= IOMMU_PTE_IR | IOMMU_PTE_IW;
1653
1654 WARN_ON(*pte);
1655
1656 *pte = __pte;
1657
1658 return (dma_addr_t)address;
1659}
1660
431b2a20
JR
1661/*
1662 * The generic unmapping function for on page in the DMA address space.
1663 */
680525e0 1664static void dma_ops_domain_unmap(struct dma_ops_domain *dom,
cb76c322
JR
1665 unsigned long address)
1666{
384de729 1667 struct aperture_range *aperture;
cb76c322
JR
1668 u64 *pte;
1669
1670 if (address >= dom->aperture_size)
1671 return;
1672
384de729
JR
1673 aperture = dom->aperture[APERTURE_RANGE_INDEX(address)];
1674 if (!aperture)
1675 return;
1676
1677 pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
1678 if (!pte)
1679 return;
cb76c322 1680
8c8c143c 1681 pte += PM_LEVEL_INDEX(0, address);
cb76c322
JR
1682
1683 WARN_ON(!*pte);
1684
1685 *pte = 0ULL;
1686}
1687
431b2a20
JR
1688/*
1689 * This function contains common code for mapping of a physically
24f81160
JR
1690 * contiguous memory region into DMA address space. It is used by all
1691 * mapping functions provided with this IOMMU driver.
431b2a20
JR
1692 * Must be called with the domain lock held.
1693 */
cb76c322 1694static dma_addr_t __map_single(struct device *dev,
cb76c322
JR
1695 struct dma_ops_domain *dma_dom,
1696 phys_addr_t paddr,
1697 size_t size,
6d4f343f 1698 int dir,
832a90c3
JR
1699 bool align,
1700 u64 dma_mask)
cb76c322
JR
1701{
1702 dma_addr_t offset = paddr & ~PAGE_MASK;
53812c11 1703 dma_addr_t address, start, ret;
cb76c322 1704 unsigned int pages;
6d4f343f 1705 unsigned long align_mask = 0;
cb76c322
JR
1706 int i;
1707
e3c449f5 1708 pages = iommu_num_pages(paddr, size, PAGE_SIZE);
cb76c322
JR
1709 paddr &= PAGE_MASK;
1710
8ecaf8f1
JR
1711 INC_STATS_COUNTER(total_map_requests);
1712
c1858976
JR
1713 if (pages > 1)
1714 INC_STATS_COUNTER(cross_page);
1715
6d4f343f
JR
1716 if (align)
1717 align_mask = (1UL << get_order(size)) - 1;
1718
11b83888 1719retry:
832a90c3
JR
1720 address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask,
1721 dma_mask);
8fd524b3 1722 if (unlikely(address == DMA_ERROR_CODE)) {
11b83888
JR
1723 /*
1724 * setting next_address here will let the address
1725 * allocator only scan the new allocated range in the
1726 * first run. This is a small optimization.
1727 */
1728 dma_dom->next_address = dma_dom->aperture_size;
1729
576175c2 1730 if (alloc_new_range(dma_dom, false, GFP_ATOMIC))
11b83888
JR
1731 goto out;
1732
1733 /*
1734 * aperture was sucessfully enlarged by 128 MB, try
1735 * allocation again
1736 */
1737 goto retry;
1738 }
cb76c322
JR
1739
1740 start = address;
1741 for (i = 0; i < pages; ++i) {
680525e0 1742 ret = dma_ops_domain_map(dma_dom, start, paddr, dir);
8fd524b3 1743 if (ret == DMA_ERROR_CODE)
53812c11
JR
1744 goto out_unmap;
1745
cb76c322
JR
1746 paddr += PAGE_SIZE;
1747 start += PAGE_SIZE;
1748 }
1749 address += offset;
1750
5774f7c5
JR
1751 ADD_STATS_COUNTER(alloced_io_mem, size);
1752
afa9fdc2 1753 if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {
dcd1e92e 1754 iommu_flush_tlb(&dma_dom->domain);
1c655773 1755 dma_dom->need_flush = false;
318afd41 1756 } else if (unlikely(amd_iommu_np_cache))
6de8ad9b 1757 iommu_flush_pages(&dma_dom->domain, address, size);
270cab24 1758
cb76c322
JR
1759out:
1760 return address;
53812c11
JR
1761
1762out_unmap:
1763
1764 for (--i; i >= 0; --i) {
1765 start -= PAGE_SIZE;
680525e0 1766 dma_ops_domain_unmap(dma_dom, start);
53812c11
JR
1767 }
1768
1769 dma_ops_free_addresses(dma_dom, address, pages);
1770
8fd524b3 1771 return DMA_ERROR_CODE;
cb76c322
JR
1772}
1773
431b2a20
JR
1774/*
1775 * Does the reverse of the __map_single function. Must be called with
1776 * the domain lock held too
1777 */
cd8c82e8 1778static void __unmap_single(struct dma_ops_domain *dma_dom,
cb76c322
JR
1779 dma_addr_t dma_addr,
1780 size_t size,
1781 int dir)
1782{
1783 dma_addr_t i, start;
1784 unsigned int pages;
1785
8fd524b3 1786 if ((dma_addr == DMA_ERROR_CODE) ||
b8d9905d 1787 (dma_addr + size > dma_dom->aperture_size))
cb76c322
JR
1788 return;
1789
e3c449f5 1790 pages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
cb76c322
JR
1791 dma_addr &= PAGE_MASK;
1792 start = dma_addr;
1793
1794 for (i = 0; i < pages; ++i) {
680525e0 1795 dma_ops_domain_unmap(dma_dom, start);
cb76c322
JR
1796 start += PAGE_SIZE;
1797 }
1798
5774f7c5
JR
1799 SUB_STATS_COUNTER(alloced_io_mem, size);
1800
cb76c322 1801 dma_ops_free_addresses(dma_dom, dma_addr, pages);
270cab24 1802
80be308d 1803 if (amd_iommu_unmap_flush || dma_dom->need_flush) {
6de8ad9b 1804 iommu_flush_pages(&dma_dom->domain, dma_addr, size);
80be308d
JR
1805 dma_dom->need_flush = false;
1806 }
cb76c322
JR
1807}
1808
431b2a20
JR
1809/*
1810 * The exported map_single function for dma_ops.
1811 */
51491367
FT
1812static dma_addr_t map_page(struct device *dev, struct page *page,
1813 unsigned long offset, size_t size,
1814 enum dma_data_direction dir,
1815 struct dma_attrs *attrs)
4da70b9e
JR
1816{
1817 unsigned long flags;
4da70b9e 1818 struct protection_domain *domain;
4da70b9e 1819 dma_addr_t addr;
832a90c3 1820 u64 dma_mask;
51491367 1821 phys_addr_t paddr = page_to_phys(page) + offset;
4da70b9e 1822
0f2a86f2
JR
1823 INC_STATS_COUNTER(cnt_map_single);
1824
94f6d190
JR
1825 domain = get_domain(dev);
1826 if (PTR_ERR(domain) == -EINVAL)
4da70b9e 1827 return (dma_addr_t)paddr;
94f6d190
JR
1828 else if (IS_ERR(domain))
1829 return DMA_ERROR_CODE;
4da70b9e 1830
f99c0f1c
JR
1831 dma_mask = *dev->dma_mask;
1832
4da70b9e 1833 spin_lock_irqsave(&domain->lock, flags);
94f6d190 1834
cd8c82e8 1835 addr = __map_single(dev, domain->priv, paddr, size, dir, false,
832a90c3 1836 dma_mask);
8fd524b3 1837 if (addr == DMA_ERROR_CODE)
4da70b9e
JR
1838 goto out;
1839
0518a3a4 1840 iommu_flush_complete(domain);
4da70b9e
JR
1841
1842out:
1843 spin_unlock_irqrestore(&domain->lock, flags);
1844
1845 return addr;
1846}
1847
431b2a20
JR
1848/*
1849 * The exported unmap_single function for dma_ops.
1850 */
51491367
FT
1851static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
1852 enum dma_data_direction dir, struct dma_attrs *attrs)
4da70b9e
JR
1853{
1854 unsigned long flags;
4da70b9e 1855 struct protection_domain *domain;
4da70b9e 1856
146a6917
JR
1857 INC_STATS_COUNTER(cnt_unmap_single);
1858
94f6d190
JR
1859 domain = get_domain(dev);
1860 if (IS_ERR(domain))
5b28df6f
JR
1861 return;
1862
4da70b9e
JR
1863 spin_lock_irqsave(&domain->lock, flags);
1864
cd8c82e8 1865 __unmap_single(domain->priv, dma_addr, size, dir);
4da70b9e 1866
0518a3a4 1867 iommu_flush_complete(domain);
4da70b9e
JR
1868
1869 spin_unlock_irqrestore(&domain->lock, flags);
1870}
1871
431b2a20
JR
1872/*
1873 * This is a special map_sg function which is used if we should map a
1874 * device which is not handled by an AMD IOMMU in the system.
1875 */
65b050ad
JR
1876static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist,
1877 int nelems, int dir)
1878{
1879 struct scatterlist *s;
1880 int i;
1881
1882 for_each_sg(sglist, s, nelems, i) {
1883 s->dma_address = (dma_addr_t)sg_phys(s);
1884 s->dma_length = s->length;
1885 }
1886
1887 return nelems;
1888}
1889
431b2a20
JR
1890/*
1891 * The exported map_sg function for dma_ops (handles scatter-gather
1892 * lists).
1893 */
65b050ad 1894static int map_sg(struct device *dev, struct scatterlist *sglist,
160c1d8e
FT
1895 int nelems, enum dma_data_direction dir,
1896 struct dma_attrs *attrs)
65b050ad
JR
1897{
1898 unsigned long flags;
65b050ad 1899 struct protection_domain *domain;
65b050ad
JR
1900 int i;
1901 struct scatterlist *s;
1902 phys_addr_t paddr;
1903 int mapped_elems = 0;
832a90c3 1904 u64 dma_mask;
65b050ad 1905
d03f067a
JR
1906 INC_STATS_COUNTER(cnt_map_sg);
1907
94f6d190
JR
1908 domain = get_domain(dev);
1909 if (PTR_ERR(domain) == -EINVAL)
f99c0f1c 1910 return map_sg_no_iommu(dev, sglist, nelems, dir);
94f6d190
JR
1911 else if (IS_ERR(domain))
1912 return 0;
dbcc112e 1913
832a90c3 1914 dma_mask = *dev->dma_mask;
65b050ad 1915
65b050ad
JR
1916 spin_lock_irqsave(&domain->lock, flags);
1917
1918 for_each_sg(sglist, s, nelems, i) {
1919 paddr = sg_phys(s);
1920
cd8c82e8 1921 s->dma_address = __map_single(dev, domain->priv,
832a90c3
JR
1922 paddr, s->length, dir, false,
1923 dma_mask);
65b050ad
JR
1924
1925 if (s->dma_address) {
1926 s->dma_length = s->length;
1927 mapped_elems++;
1928 } else
1929 goto unmap;
65b050ad
JR
1930 }
1931
0518a3a4 1932 iommu_flush_complete(domain);
65b050ad
JR
1933
1934out:
1935 spin_unlock_irqrestore(&domain->lock, flags);
1936
1937 return mapped_elems;
1938unmap:
1939 for_each_sg(sglist, s, mapped_elems, i) {
1940 if (s->dma_address)
cd8c82e8 1941 __unmap_single(domain->priv, s->dma_address,
65b050ad
JR
1942 s->dma_length, dir);
1943 s->dma_address = s->dma_length = 0;
1944 }
1945
1946 mapped_elems = 0;
1947
1948 goto out;
1949}
1950
431b2a20
JR
1951/*
1952 * The exported map_sg function for dma_ops (handles scatter-gather
1953 * lists).
1954 */
65b050ad 1955static void unmap_sg(struct device *dev, struct scatterlist *sglist,
160c1d8e
FT
1956 int nelems, enum dma_data_direction dir,
1957 struct dma_attrs *attrs)
65b050ad
JR
1958{
1959 unsigned long flags;
65b050ad
JR
1960 struct protection_domain *domain;
1961 struct scatterlist *s;
65b050ad
JR
1962 int i;
1963
55877a6b
JR
1964 INC_STATS_COUNTER(cnt_unmap_sg);
1965
94f6d190
JR
1966 domain = get_domain(dev);
1967 if (IS_ERR(domain))
5b28df6f
JR
1968 return;
1969
65b050ad
JR
1970 spin_lock_irqsave(&domain->lock, flags);
1971
1972 for_each_sg(sglist, s, nelems, i) {
cd8c82e8 1973 __unmap_single(domain->priv, s->dma_address,
65b050ad 1974 s->dma_length, dir);
65b050ad
JR
1975 s->dma_address = s->dma_length = 0;
1976 }
1977
0518a3a4 1978 iommu_flush_complete(domain);
65b050ad
JR
1979
1980 spin_unlock_irqrestore(&domain->lock, flags);
1981}
1982
431b2a20
JR
1983/*
1984 * The exported alloc_coherent function for dma_ops.
1985 */
5d8b53cf
JR
1986static void *alloc_coherent(struct device *dev, size_t size,
1987 dma_addr_t *dma_addr, gfp_t flag)
1988{
1989 unsigned long flags;
1990 void *virt_addr;
5d8b53cf 1991 struct protection_domain *domain;
5d8b53cf 1992 phys_addr_t paddr;
832a90c3 1993 u64 dma_mask = dev->coherent_dma_mask;
5d8b53cf 1994
c8f0fb36
JR
1995 INC_STATS_COUNTER(cnt_alloc_coherent);
1996
94f6d190
JR
1997 domain = get_domain(dev);
1998 if (PTR_ERR(domain) == -EINVAL) {
f99c0f1c
JR
1999 virt_addr = (void *)__get_free_pages(flag, get_order(size));
2000 *dma_addr = __pa(virt_addr);
2001 return virt_addr;
94f6d190
JR
2002 } else if (IS_ERR(domain))
2003 return NULL;
5d8b53cf 2004
f99c0f1c
JR
2005 dma_mask = dev->coherent_dma_mask;
2006 flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
2007 flag |= __GFP_ZERO;
5d8b53cf
JR
2008
2009 virt_addr = (void *)__get_free_pages(flag, get_order(size));
2010 if (!virt_addr)
b25ae679 2011 return NULL;
5d8b53cf 2012
5d8b53cf
JR
2013 paddr = virt_to_phys(virt_addr);
2014
832a90c3
JR
2015 if (!dma_mask)
2016 dma_mask = *dev->dma_mask;
2017
5d8b53cf
JR
2018 spin_lock_irqsave(&domain->lock, flags);
2019
cd8c82e8 2020 *dma_addr = __map_single(dev, domain->priv, paddr,
832a90c3 2021 size, DMA_BIDIRECTIONAL, true, dma_mask);
5d8b53cf 2022
8fd524b3 2023 if (*dma_addr == DMA_ERROR_CODE) {
367d04c4 2024 spin_unlock_irqrestore(&domain->lock, flags);
5b28df6f 2025 goto out_free;
367d04c4 2026 }
5d8b53cf 2027
0518a3a4 2028 iommu_flush_complete(domain);
5d8b53cf 2029
5d8b53cf
JR
2030 spin_unlock_irqrestore(&domain->lock, flags);
2031
2032 return virt_addr;
5b28df6f
JR
2033
2034out_free:
2035
2036 free_pages((unsigned long)virt_addr, get_order(size));
2037
2038 return NULL;
5d8b53cf
JR
2039}
2040
431b2a20
JR
2041/*
2042 * The exported free_coherent function for dma_ops.
431b2a20 2043 */
5d8b53cf
JR
2044static void free_coherent(struct device *dev, size_t size,
2045 void *virt_addr, dma_addr_t dma_addr)
2046{
2047 unsigned long flags;
5d8b53cf 2048 struct protection_domain *domain;
5d8b53cf 2049
5d31ee7e
JR
2050 INC_STATS_COUNTER(cnt_free_coherent);
2051
94f6d190
JR
2052 domain = get_domain(dev);
2053 if (IS_ERR(domain))
5b28df6f
JR
2054 goto free_mem;
2055
5d8b53cf
JR
2056 spin_lock_irqsave(&domain->lock, flags);
2057
cd8c82e8 2058 __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
5d8b53cf 2059
0518a3a4 2060 iommu_flush_complete(domain);
5d8b53cf
JR
2061
2062 spin_unlock_irqrestore(&domain->lock, flags);
2063
2064free_mem:
2065 free_pages((unsigned long)virt_addr, get_order(size));
2066}
2067
b39ba6ad
JR
2068/*
2069 * This function is called by the DMA layer to find out if we can handle a
2070 * particular device. It is part of the dma_ops.
2071 */
2072static int amd_iommu_dma_supported(struct device *dev, u64 mask)
2073{
420aef8a 2074 return check_device(dev);
b39ba6ad
JR
2075}
2076
c432f3df 2077/*
431b2a20
JR
2078 * The function for pre-allocating protection domains.
2079 *
c432f3df
JR
2080 * If the driver core informs the DMA layer if a driver grabs a device
2081 * we don't need to preallocate the protection domains anymore.
2082 * For now we have to.
2083 */
0e93dd88 2084static void prealloc_protection_domains(void)
c432f3df
JR
2085{
2086 struct pci_dev *dev = NULL;
2087 struct dma_ops_domain *dma_dom;
2088 struct amd_iommu *iommu;
98fc5a69 2089 u16 devid;
c432f3df
JR
2090
2091 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
98fc5a69
JR
2092
2093 /* Do we handle this device? */
2094 if (!check_device(&dev->dev))
c432f3df 2095 continue;
98fc5a69
JR
2096
2097 /* Is there already any domain for it? */
15898bbc 2098 if (domain_for_device(&dev->dev))
c432f3df 2099 continue;
98fc5a69
JR
2100
2101 devid = get_device_id(&dev->dev);
2102
c432f3df
JR
2103 iommu = amd_iommu_rlookup_table[devid];
2104 if (!iommu)
2105 continue;
d9cfed92 2106 dma_dom = dma_ops_domain_alloc(iommu);
c432f3df
JR
2107 if (!dma_dom)
2108 continue;
2109 init_unity_mappings_for_device(dma_dom, devid);
bd60b735
JR
2110 dma_dom->target_dev = devid;
2111
15898bbc 2112 attach_device(&dev->dev, &dma_dom->domain);
be831297 2113
bd60b735 2114 list_add_tail(&dma_dom->list, &iommu_pd_list);
c432f3df
JR
2115 }
2116}
2117
160c1d8e 2118static struct dma_map_ops amd_iommu_dma_ops = {
6631ee9d
JR
2119 .alloc_coherent = alloc_coherent,
2120 .free_coherent = free_coherent,
51491367
FT
2121 .map_page = map_page,
2122 .unmap_page = unmap_page,
6631ee9d
JR
2123 .map_sg = map_sg,
2124 .unmap_sg = unmap_sg,
b39ba6ad 2125 .dma_supported = amd_iommu_dma_supported,
6631ee9d
JR
2126};
2127
431b2a20
JR
2128/*
2129 * The function which clues the AMD IOMMU driver into dma_ops.
2130 */
6631ee9d
JR
2131int __init amd_iommu_init_dma_ops(void)
2132{
2133 struct amd_iommu *iommu;
6631ee9d
JR
2134 int ret;
2135
431b2a20
JR
2136 /*
2137 * first allocate a default protection domain for every IOMMU we
2138 * found in the system. Devices not assigned to any other
2139 * protection domain will be assigned to the default one.
2140 */
3bd22172 2141 for_each_iommu(iommu) {
d9cfed92 2142 iommu->default_dom = dma_ops_domain_alloc(iommu);
6631ee9d
JR
2143 if (iommu->default_dom == NULL)
2144 return -ENOMEM;
e2dc14a2 2145 iommu->default_dom->domain.flags |= PD_DEFAULT_MASK;
6631ee9d
JR
2146 ret = iommu_init_unity_mappings(iommu);
2147 if (ret)
2148 goto free_domains;
2149 }
2150
431b2a20
JR
2151 /*
2152 * If device isolation is enabled, pre-allocate the protection
2153 * domains for each device.
2154 */
6631ee9d
JR
2155 if (amd_iommu_isolate)
2156 prealloc_protection_domains();
2157
2158 iommu_detected = 1;
75f1cdf1 2159 swiotlb = 0;
92af4e29 2160#ifdef CONFIG_GART_IOMMU
6631ee9d
JR
2161 gart_iommu_aperture_disabled = 1;
2162 gart_iommu_aperture = 0;
92af4e29 2163#endif
6631ee9d 2164
431b2a20 2165 /* Make the driver finally visible to the drivers */
6631ee9d
JR
2166 dma_ops = &amd_iommu_dma_ops;
2167
26961efe 2168 register_iommu(&amd_iommu_ops);
26961efe 2169
e275a2a0
JR
2170 bus_register_notifier(&pci_bus_type, &device_nb);
2171
7f26508b
JR
2172 amd_iommu_stats_init();
2173
6631ee9d
JR
2174 return 0;
2175
2176free_domains:
2177
3bd22172 2178 for_each_iommu(iommu) {
6631ee9d
JR
2179 if (iommu->default_dom)
2180 dma_ops_domain_free(iommu->default_dom);
2181 }
2182
2183 return ret;
2184}
6d98cd80
JR
2185
2186/*****************************************************************************
2187 *
2188 * The following functions belong to the exported interface of AMD IOMMU
2189 *
2190 * This interface allows access to lower level functions of the IOMMU
2191 * like protection domain handling and assignement of devices to domains
2192 * which is not possible with the dma_ops interface.
2193 *
2194 *****************************************************************************/
2195
6d98cd80
JR
2196static void cleanup_domain(struct protection_domain *domain)
2197{
2198 unsigned long flags;
2199 u16 devid;
2200
2201 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
2202
2203 for (devid = 0; devid <= amd_iommu_last_bdf; ++devid)
2204 if (amd_iommu_pd_table[devid] == domain)
15898bbc 2205 clear_dte_entry(devid);
6d98cd80
JR
2206
2207 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
2208}
2209
2650815f
JR
2210static void protection_domain_free(struct protection_domain *domain)
2211{
2212 if (!domain)
2213 return;
2214
aeb26f55
JR
2215 del_domain_from_list(domain);
2216
2650815f
JR
2217 if (domain->id)
2218 domain_id_free(domain->id);
2219
2220 kfree(domain);
2221}
2222
2223static struct protection_domain *protection_domain_alloc(void)
c156e347
JR
2224{
2225 struct protection_domain *domain;
2226
2227 domain = kzalloc(sizeof(*domain), GFP_KERNEL);
2228 if (!domain)
2650815f 2229 return NULL;
c156e347
JR
2230
2231 spin_lock_init(&domain->lock);
c156e347
JR
2232 domain->id = domain_id_alloc();
2233 if (!domain->id)
2650815f
JR
2234 goto out_err;
2235
aeb26f55
JR
2236 add_domain_to_list(domain);
2237
2650815f
JR
2238 return domain;
2239
2240out_err:
2241 kfree(domain);
2242
2243 return NULL;
2244}
2245
2246static int amd_iommu_domain_init(struct iommu_domain *dom)
2247{
2248 struct protection_domain *domain;
2249
2250 domain = protection_domain_alloc();
2251 if (!domain)
c156e347 2252 goto out_free;
2650815f
JR
2253
2254 domain->mode = PAGE_MODE_3_LEVEL;
c156e347
JR
2255 domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);
2256 if (!domain->pt_root)
2257 goto out_free;
2258
2259 dom->priv = domain;
2260
2261 return 0;
2262
2263out_free:
2650815f 2264 protection_domain_free(domain);
c156e347
JR
2265
2266 return -ENOMEM;
2267}
2268
98383fc3
JR
2269static void amd_iommu_domain_destroy(struct iommu_domain *dom)
2270{
2271 struct protection_domain *domain = dom->priv;
2272
2273 if (!domain)
2274 return;
2275
2276 if (domain->dev_cnt > 0)
2277 cleanup_domain(domain);
2278
2279 BUG_ON(domain->dev_cnt != 0);
2280
2281 free_pagetable(domain);
2282
2283 domain_id_free(domain->id);
2284
2285 kfree(domain);
2286
2287 dom->priv = NULL;
2288}
2289
684f2888
JR
2290static void amd_iommu_detach_device(struct iommu_domain *dom,
2291 struct device *dev)
2292{
684f2888 2293 struct amd_iommu *iommu;
684f2888
JR
2294 u16 devid;
2295
98fc5a69 2296 if (!check_device(dev))
684f2888
JR
2297 return;
2298
98fc5a69 2299 devid = get_device_id(dev);
684f2888 2300
98fc5a69 2301 if (amd_iommu_pd_table[devid] != NULL)
15898bbc 2302 detach_device(dev);
684f2888
JR
2303
2304 iommu = amd_iommu_rlookup_table[devid];
2305 if (!iommu)
2306 return;
2307
2308 iommu_queue_inv_dev_entry(iommu, devid);
2309 iommu_completion_wait(iommu);
2310}
2311
01106066
JR
2312static int amd_iommu_attach_device(struct iommu_domain *dom,
2313 struct device *dev)
2314{
2315 struct protection_domain *domain = dom->priv;
2316 struct protection_domain *old_domain;
2317 struct amd_iommu *iommu;
15898bbc 2318 int ret;
01106066
JR
2319 u16 devid;
2320
98fc5a69 2321 if (!check_device(dev))
01106066
JR
2322 return -EINVAL;
2323
98fc5a69 2324 devid = get_device_id(dev);
01106066
JR
2325
2326 iommu = amd_iommu_rlookup_table[devid];
2327 if (!iommu)
2328 return -EINVAL;
2329
15898bbc 2330 old_domain = amd_iommu_pd_table[devid];
01106066 2331 if (old_domain)
15898bbc 2332 detach_device(dev);
01106066 2333
15898bbc 2334 ret = attach_device(dev, domain);
01106066
JR
2335
2336 iommu_completion_wait(iommu);
2337
15898bbc 2338 return ret;
01106066
JR
2339}
2340
c6229ca6
JR
2341static int amd_iommu_map_range(struct iommu_domain *dom,
2342 unsigned long iova, phys_addr_t paddr,
2343 size_t size, int iommu_prot)
2344{
2345 struct protection_domain *domain = dom->priv;
2346 unsigned long i, npages = iommu_num_pages(paddr, size, PAGE_SIZE);
2347 int prot = 0;
2348 int ret;
2349
2350 if (iommu_prot & IOMMU_READ)
2351 prot |= IOMMU_PROT_IR;
2352 if (iommu_prot & IOMMU_WRITE)
2353 prot |= IOMMU_PROT_IW;
2354
2355 iova &= PAGE_MASK;
2356 paddr &= PAGE_MASK;
2357
2358 for (i = 0; i < npages; ++i) {
abdc5eb3 2359 ret = iommu_map_page(domain, iova, paddr, prot, PM_MAP_4k);
c6229ca6
JR
2360 if (ret)
2361 return ret;
2362
2363 iova += PAGE_SIZE;
2364 paddr += PAGE_SIZE;
2365 }
2366
2367 return 0;
2368}
2369
eb74ff6c
JR
2370static void amd_iommu_unmap_range(struct iommu_domain *dom,
2371 unsigned long iova, size_t size)
2372{
2373
2374 struct protection_domain *domain = dom->priv;
2375 unsigned long i, npages = iommu_num_pages(iova, size, PAGE_SIZE);
2376
2377 iova &= PAGE_MASK;
2378
2379 for (i = 0; i < npages; ++i) {
a6b256b4 2380 iommu_unmap_page(domain, iova, PM_MAP_4k);
eb74ff6c
JR
2381 iova += PAGE_SIZE;
2382 }
2383
601367d7 2384 iommu_flush_tlb_pde(domain);
eb74ff6c
JR
2385}
2386
645c4c8d
JR
2387static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
2388 unsigned long iova)
2389{
2390 struct protection_domain *domain = dom->priv;
2391 unsigned long offset = iova & ~PAGE_MASK;
2392 phys_addr_t paddr;
2393 u64 *pte;
2394
a6b256b4 2395 pte = fetch_pte(domain, iova, PM_MAP_4k);
645c4c8d 2396
a6d41a40 2397 if (!pte || !IOMMU_PTE_PRESENT(*pte))
645c4c8d
JR
2398 return 0;
2399
2400 paddr = *pte & IOMMU_PAGE_MASK;
2401 paddr |= offset;
2402
2403 return paddr;
2404}
2405
dbb9fd86
SY
2406static int amd_iommu_domain_has_cap(struct iommu_domain *domain,
2407 unsigned long cap)
2408{
2409 return 0;
2410}
2411
26961efe
JR
2412static struct iommu_ops amd_iommu_ops = {
2413 .domain_init = amd_iommu_domain_init,
2414 .domain_destroy = amd_iommu_domain_destroy,
2415 .attach_dev = amd_iommu_attach_device,
2416 .detach_dev = amd_iommu_detach_device,
2417 .map = amd_iommu_map_range,
2418 .unmap = amd_iommu_unmap_range,
2419 .iova_to_phys = amd_iommu_iova_to_phys,
dbb9fd86 2420 .domain_has_cap = amd_iommu_domain_has_cap,
26961efe
JR
2421};
2422
0feae533
JR
2423/*****************************************************************************
2424 *
2425 * The next functions do a basic initialization of IOMMU for pass through
2426 * mode
2427 *
2428 * In passthrough mode the IOMMU is initialized and enabled but not used for
2429 * DMA-API translation.
2430 *
2431 *****************************************************************************/
2432
2433int __init amd_iommu_init_passthrough(void)
2434{
15898bbc 2435 struct amd_iommu *iommu;
0feae533 2436 struct pci_dev *dev = NULL;
15898bbc 2437 u16 devid;
0feae533
JR
2438
2439 /* allocate passthroug domain */
2440 pt_domain = protection_domain_alloc();
2441 if (!pt_domain)
2442 return -ENOMEM;
2443
2444 pt_domain->mode |= PAGE_MODE_NONE;
2445
2446 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
0feae533 2447
98fc5a69 2448 if (!check_device(&dev->dev))
0feae533
JR
2449 continue;
2450
98fc5a69
JR
2451 devid = get_device_id(&dev->dev);
2452
15898bbc 2453 iommu = amd_iommu_rlookup_table[devid];
0feae533
JR
2454 if (!iommu)
2455 continue;
2456
15898bbc 2457 attach_device(&dev->dev, pt_domain);
0feae533
JR
2458 }
2459
2460 pr_info("AMD-Vi: Initialized for Passthrough Mode\n");
2461
2462 return 0;
2463}
This page took 0.375927 seconds and 5 git commands to generate.