intel-iommu: move struct dmar_domain def out dma_remapping.h
[deliverable/linux.git] / drivers / pci / intel-iommu.c
1 /*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21 * Author: Fenghua Yu <fenghua.yu@intel.com>
22 */
23
24 #include <linux/init.h>
25 #include <linux/bitmap.h>
26 #include <linux/debugfs.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/timer.h>
36 #include <linux/iova.h>
37 #include <linux/intel-iommu.h>
38 #include <asm/cacheflush.h>
39 #include <asm/iommu.h>
40 #include "pci.h"
41
42 #define ROOT_SIZE VTD_PAGE_SIZE
43 #define CONTEXT_SIZE VTD_PAGE_SIZE
44
45 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
46 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
47
48 #define IOAPIC_RANGE_START (0xfee00000)
49 #define IOAPIC_RANGE_END (0xfeefffff)
50 #define IOVA_START_ADDR (0x1000)
51
52 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
53
54 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
55
56 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
57 #define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
58 #define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
59
60 /*
61 * 0: Present
62 * 1-11: Reserved
63 * 12-63: Context Ptr (12 - (haw-1))
64 * 64-127: Reserved
65 */
66 struct root_entry {
67 u64 val;
68 u64 rsvd1;
69 };
70 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
71 static inline bool root_present(struct root_entry *root)
72 {
73 return (root->val & 1);
74 }
75 static inline void set_root_present(struct root_entry *root)
76 {
77 root->val |= 1;
78 }
79 static inline void set_root_value(struct root_entry *root, unsigned long value)
80 {
81 root->val |= value & VTD_PAGE_MASK;
82 }
83
84 static inline struct context_entry *
85 get_context_addr_from_root(struct root_entry *root)
86 {
87 return (struct context_entry *)
88 (root_present(root)?phys_to_virt(
89 root->val & VTD_PAGE_MASK) :
90 NULL);
91 }
92
93 /*
94 * low 64 bits:
95 * 0: present
96 * 1: fault processing disable
97 * 2-3: translation type
98 * 12-63: address space root
99 * high 64 bits:
100 * 0-2: address width
101 * 3-6: aval
102 * 8-23: domain id
103 */
104 struct context_entry {
105 u64 lo;
106 u64 hi;
107 };
108 #define context_present(c) ((c).lo & 1)
109 #define context_fault_disable(c) (((c).lo >> 1) & 1)
110 #define context_translation_type(c) (((c).lo >> 2) & 3)
111 #define context_address_root(c) ((c).lo & VTD_PAGE_MASK)
112 #define context_address_width(c) ((c).hi & 7)
113 #define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
114
115 #define context_set_present(c) do {(c).lo |= 1;} while (0)
116 #define context_set_fault_enable(c) \
117 do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
118 #define context_set_translation_type(c, val) \
119 do { \
120 (c).lo &= (((u64)-1) << 4) | 3; \
121 (c).lo |= ((val) & 3) << 2; \
122 } while (0)
123 #define CONTEXT_TT_MULTI_LEVEL 0
124 #define context_set_address_root(c, val) \
125 do {(c).lo |= (val) & VTD_PAGE_MASK; } while (0)
126 #define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
127 #define context_set_domain_id(c, val) \
128 do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
129 #define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
130
131 /*
132 * 0: readable
133 * 1: writable
134 * 2-6: reserved
135 * 7: super page
136 * 8-11: available
137 * 12-63: Host physcial address
138 */
139 struct dma_pte {
140 u64 val;
141 };
142 #define dma_clear_pte(p) do {(p).val = 0;} while (0)
143
144 #define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
145 #define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
146 #define dma_set_pte_prot(p, prot) \
147 do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
148 #define dma_pte_addr(p) ((p).val & VTD_PAGE_MASK)
149 #define dma_set_pte_addr(p, addr) do {\
150 (p).val |= ((addr) & VTD_PAGE_MASK); } while (0)
151 #define dma_pte_present(p) (((p).val & 3) != 0)
152
153 struct dmar_domain {
154 int id; /* domain id */
155 struct intel_iommu *iommu; /* back pointer to owning iommu */
156
157 struct list_head devices; /* all devices' list */
158 struct iova_domain iovad; /* iova's that belong to this domain */
159
160 struct dma_pte *pgd; /* virtual address */
161 spinlock_t mapping_lock; /* page table lock */
162 int gaw; /* max guest address width */
163
164 /* adjusted guest address width, 0 is level 2 30-bit */
165 int agaw;
166
167 #define DOMAIN_FLAG_MULTIPLE_DEVICES 1
168 int flags;
169 };
170
171 static void flush_unmaps_timeout(unsigned long data);
172
173 DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
174
175 #define HIGH_WATER_MARK 250
176 struct deferred_flush_tables {
177 int next;
178 struct iova *iova[HIGH_WATER_MARK];
179 struct dmar_domain *domain[HIGH_WATER_MARK];
180 };
181
182 static struct deferred_flush_tables *deferred_flush;
183
184 /* bitmap for indexing intel_iommus */
185 static int g_num_of_iommus;
186
187 static DEFINE_SPINLOCK(async_umap_flush_lock);
188 static LIST_HEAD(unmaps_to_do);
189
190 static int timer_on;
191 static long list_size;
192
193 static void domain_remove_dev_info(struct dmar_domain *domain);
194
195 int dmar_disabled;
196 static int __initdata dmar_map_gfx = 1;
197 static int dmar_forcedac;
198 static int intel_iommu_strict;
199
200 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
201 static DEFINE_SPINLOCK(device_domain_lock);
202 static LIST_HEAD(device_domain_list);
203
204 static int __init intel_iommu_setup(char *str)
205 {
206 if (!str)
207 return -EINVAL;
208 while (*str) {
209 if (!strncmp(str, "off", 3)) {
210 dmar_disabled = 1;
211 printk(KERN_INFO"Intel-IOMMU: disabled\n");
212 } else if (!strncmp(str, "igfx_off", 8)) {
213 dmar_map_gfx = 0;
214 printk(KERN_INFO
215 "Intel-IOMMU: disable GFX device mapping\n");
216 } else if (!strncmp(str, "forcedac", 8)) {
217 printk(KERN_INFO
218 "Intel-IOMMU: Forcing DAC for PCI devices\n");
219 dmar_forcedac = 1;
220 } else if (!strncmp(str, "strict", 6)) {
221 printk(KERN_INFO
222 "Intel-IOMMU: disable batched IOTLB flush\n");
223 intel_iommu_strict = 1;
224 }
225
226 str += strcspn(str, ",");
227 while (*str == ',')
228 str++;
229 }
230 return 0;
231 }
232 __setup("intel_iommu=", intel_iommu_setup);
233
234 static struct kmem_cache *iommu_domain_cache;
235 static struct kmem_cache *iommu_devinfo_cache;
236 static struct kmem_cache *iommu_iova_cache;
237
238 static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
239 {
240 unsigned int flags;
241 void *vaddr;
242
243 /* trying to avoid low memory issues */
244 flags = current->flags & PF_MEMALLOC;
245 current->flags |= PF_MEMALLOC;
246 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
247 current->flags &= (~PF_MEMALLOC | flags);
248 return vaddr;
249 }
250
251
252 static inline void *alloc_pgtable_page(void)
253 {
254 unsigned int flags;
255 void *vaddr;
256
257 /* trying to avoid low memory issues */
258 flags = current->flags & PF_MEMALLOC;
259 current->flags |= PF_MEMALLOC;
260 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
261 current->flags &= (~PF_MEMALLOC | flags);
262 return vaddr;
263 }
264
265 static inline void free_pgtable_page(void *vaddr)
266 {
267 free_page((unsigned long)vaddr);
268 }
269
270 static inline void *alloc_domain_mem(void)
271 {
272 return iommu_kmem_cache_alloc(iommu_domain_cache);
273 }
274
275 static void free_domain_mem(void *vaddr)
276 {
277 kmem_cache_free(iommu_domain_cache, vaddr);
278 }
279
280 static inline void * alloc_devinfo_mem(void)
281 {
282 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
283 }
284
285 static inline void free_devinfo_mem(void *vaddr)
286 {
287 kmem_cache_free(iommu_devinfo_cache, vaddr);
288 }
289
290 struct iova *alloc_iova_mem(void)
291 {
292 return iommu_kmem_cache_alloc(iommu_iova_cache);
293 }
294
295 void free_iova_mem(struct iova *iova)
296 {
297 kmem_cache_free(iommu_iova_cache, iova);
298 }
299
300 /* Gets context entry for a given bus and devfn */
301 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
302 u8 bus, u8 devfn)
303 {
304 struct root_entry *root;
305 struct context_entry *context;
306 unsigned long phy_addr;
307 unsigned long flags;
308
309 spin_lock_irqsave(&iommu->lock, flags);
310 root = &iommu->root_entry[bus];
311 context = get_context_addr_from_root(root);
312 if (!context) {
313 context = (struct context_entry *)alloc_pgtable_page();
314 if (!context) {
315 spin_unlock_irqrestore(&iommu->lock, flags);
316 return NULL;
317 }
318 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
319 phy_addr = virt_to_phys((void *)context);
320 set_root_value(root, phy_addr);
321 set_root_present(root);
322 __iommu_flush_cache(iommu, root, sizeof(*root));
323 }
324 spin_unlock_irqrestore(&iommu->lock, flags);
325 return &context[devfn];
326 }
327
328 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
329 {
330 struct root_entry *root;
331 struct context_entry *context;
332 int ret;
333 unsigned long flags;
334
335 spin_lock_irqsave(&iommu->lock, flags);
336 root = &iommu->root_entry[bus];
337 context = get_context_addr_from_root(root);
338 if (!context) {
339 ret = 0;
340 goto out;
341 }
342 ret = context_present(context[devfn]);
343 out:
344 spin_unlock_irqrestore(&iommu->lock, flags);
345 return ret;
346 }
347
348 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
349 {
350 struct root_entry *root;
351 struct context_entry *context;
352 unsigned long flags;
353
354 spin_lock_irqsave(&iommu->lock, flags);
355 root = &iommu->root_entry[bus];
356 context = get_context_addr_from_root(root);
357 if (context) {
358 context_clear_entry(context[devfn]);
359 __iommu_flush_cache(iommu, &context[devfn], \
360 sizeof(*context));
361 }
362 spin_unlock_irqrestore(&iommu->lock, flags);
363 }
364
365 static void free_context_table(struct intel_iommu *iommu)
366 {
367 struct root_entry *root;
368 int i;
369 unsigned long flags;
370 struct context_entry *context;
371
372 spin_lock_irqsave(&iommu->lock, flags);
373 if (!iommu->root_entry) {
374 goto out;
375 }
376 for (i = 0; i < ROOT_ENTRY_NR; i++) {
377 root = &iommu->root_entry[i];
378 context = get_context_addr_from_root(root);
379 if (context)
380 free_pgtable_page(context);
381 }
382 free_pgtable_page(iommu->root_entry);
383 iommu->root_entry = NULL;
384 out:
385 spin_unlock_irqrestore(&iommu->lock, flags);
386 }
387
388 /* page table handling */
389 #define LEVEL_STRIDE (9)
390 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
391
392 static inline int agaw_to_level(int agaw)
393 {
394 return agaw + 2;
395 }
396
397 static inline int agaw_to_width(int agaw)
398 {
399 return 30 + agaw * LEVEL_STRIDE;
400
401 }
402
403 static inline int width_to_agaw(int width)
404 {
405 return (width - 30) / LEVEL_STRIDE;
406 }
407
408 static inline unsigned int level_to_offset_bits(int level)
409 {
410 return (12 + (level - 1) * LEVEL_STRIDE);
411 }
412
413 static inline int address_level_offset(u64 addr, int level)
414 {
415 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
416 }
417
418 static inline u64 level_mask(int level)
419 {
420 return ((u64)-1 << level_to_offset_bits(level));
421 }
422
423 static inline u64 level_size(int level)
424 {
425 return ((u64)1 << level_to_offset_bits(level));
426 }
427
428 static inline u64 align_to_level(u64 addr, int level)
429 {
430 return ((addr + level_size(level) - 1) & level_mask(level));
431 }
432
433 static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
434 {
435 int addr_width = agaw_to_width(domain->agaw);
436 struct dma_pte *parent, *pte = NULL;
437 int level = agaw_to_level(domain->agaw);
438 int offset;
439 unsigned long flags;
440
441 BUG_ON(!domain->pgd);
442
443 addr &= (((u64)1) << addr_width) - 1;
444 parent = domain->pgd;
445
446 spin_lock_irqsave(&domain->mapping_lock, flags);
447 while (level > 0) {
448 void *tmp_page;
449
450 offset = address_level_offset(addr, level);
451 pte = &parent[offset];
452 if (level == 1)
453 break;
454
455 if (!dma_pte_present(*pte)) {
456 tmp_page = alloc_pgtable_page();
457
458 if (!tmp_page) {
459 spin_unlock_irqrestore(&domain->mapping_lock,
460 flags);
461 return NULL;
462 }
463 __iommu_flush_cache(domain->iommu, tmp_page,
464 PAGE_SIZE);
465 dma_set_pte_addr(*pte, virt_to_phys(tmp_page));
466 /*
467 * high level table always sets r/w, last level page
468 * table control read/write
469 */
470 dma_set_pte_readable(*pte);
471 dma_set_pte_writable(*pte);
472 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
473 }
474 parent = phys_to_virt(dma_pte_addr(*pte));
475 level--;
476 }
477
478 spin_unlock_irqrestore(&domain->mapping_lock, flags);
479 return pte;
480 }
481
482 /* return address's pte at specific level */
483 static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
484 int level)
485 {
486 struct dma_pte *parent, *pte = NULL;
487 int total = agaw_to_level(domain->agaw);
488 int offset;
489
490 parent = domain->pgd;
491 while (level <= total) {
492 offset = address_level_offset(addr, total);
493 pte = &parent[offset];
494 if (level == total)
495 return pte;
496
497 if (!dma_pte_present(*pte))
498 break;
499 parent = phys_to_virt(dma_pte_addr(*pte));
500 total--;
501 }
502 return NULL;
503 }
504
505 /* clear one page's page table */
506 static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
507 {
508 struct dma_pte *pte = NULL;
509
510 /* get last level pte */
511 pte = dma_addr_level_pte(domain, addr, 1);
512
513 if (pte) {
514 dma_clear_pte(*pte);
515 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
516 }
517 }
518
519 /* clear last level pte, a tlb flush should be followed */
520 static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
521 {
522 int addr_width = agaw_to_width(domain->agaw);
523
524 start &= (((u64)1) << addr_width) - 1;
525 end &= (((u64)1) << addr_width) - 1;
526 /* in case it's partial page */
527 start = PAGE_ALIGN(start);
528 end &= PAGE_MASK;
529
530 /* we don't need lock here, nobody else touches the iova range */
531 while (start < end) {
532 dma_pte_clear_one(domain, start);
533 start += VTD_PAGE_SIZE;
534 }
535 }
536
537 /* free page table pages. last level pte should already be cleared */
538 static void dma_pte_free_pagetable(struct dmar_domain *domain,
539 u64 start, u64 end)
540 {
541 int addr_width = agaw_to_width(domain->agaw);
542 struct dma_pte *pte;
543 int total = agaw_to_level(domain->agaw);
544 int level;
545 u64 tmp;
546
547 start &= (((u64)1) << addr_width) - 1;
548 end &= (((u64)1) << addr_width) - 1;
549
550 /* we don't need lock here, nobody else touches the iova range */
551 level = 2;
552 while (level <= total) {
553 tmp = align_to_level(start, level);
554 if (tmp >= end || (tmp + level_size(level) > end))
555 return;
556
557 while (tmp < end) {
558 pte = dma_addr_level_pte(domain, tmp, level);
559 if (pte) {
560 free_pgtable_page(
561 phys_to_virt(dma_pte_addr(*pte)));
562 dma_clear_pte(*pte);
563 __iommu_flush_cache(domain->iommu,
564 pte, sizeof(*pte));
565 }
566 tmp += level_size(level);
567 }
568 level++;
569 }
570 /* free pgd */
571 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
572 free_pgtable_page(domain->pgd);
573 domain->pgd = NULL;
574 }
575 }
576
577 /* iommu handling */
578 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
579 {
580 struct root_entry *root;
581 unsigned long flags;
582
583 root = (struct root_entry *)alloc_pgtable_page();
584 if (!root)
585 return -ENOMEM;
586
587 __iommu_flush_cache(iommu, root, ROOT_SIZE);
588
589 spin_lock_irqsave(&iommu->lock, flags);
590 iommu->root_entry = root;
591 spin_unlock_irqrestore(&iommu->lock, flags);
592
593 return 0;
594 }
595
596 static void iommu_set_root_entry(struct intel_iommu *iommu)
597 {
598 void *addr;
599 u32 cmd, sts;
600 unsigned long flag;
601
602 addr = iommu->root_entry;
603
604 spin_lock_irqsave(&iommu->register_lock, flag);
605 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
606
607 cmd = iommu->gcmd | DMA_GCMD_SRTP;
608 writel(cmd, iommu->reg + DMAR_GCMD_REG);
609
610 /* Make sure hardware complete it */
611 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
612 readl, (sts & DMA_GSTS_RTPS), sts);
613
614 spin_unlock_irqrestore(&iommu->register_lock, flag);
615 }
616
617 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
618 {
619 u32 val;
620 unsigned long flag;
621
622 if (!cap_rwbf(iommu->cap))
623 return;
624 val = iommu->gcmd | DMA_GCMD_WBF;
625
626 spin_lock_irqsave(&iommu->register_lock, flag);
627 writel(val, iommu->reg + DMAR_GCMD_REG);
628
629 /* Make sure hardware complete it */
630 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
631 readl, (!(val & DMA_GSTS_WBFS)), val);
632
633 spin_unlock_irqrestore(&iommu->register_lock, flag);
634 }
635
636 /* return value determine if we need a write buffer flush */
637 static int __iommu_flush_context(struct intel_iommu *iommu,
638 u16 did, u16 source_id, u8 function_mask, u64 type,
639 int non_present_entry_flush)
640 {
641 u64 val = 0;
642 unsigned long flag;
643
644 /*
645 * In the non-present entry flush case, if hardware doesn't cache
646 * non-present entry we do nothing and if hardware cache non-present
647 * entry, we flush entries of domain 0 (the domain id is used to cache
648 * any non-present entries)
649 */
650 if (non_present_entry_flush) {
651 if (!cap_caching_mode(iommu->cap))
652 return 1;
653 else
654 did = 0;
655 }
656
657 switch (type) {
658 case DMA_CCMD_GLOBAL_INVL:
659 val = DMA_CCMD_GLOBAL_INVL;
660 break;
661 case DMA_CCMD_DOMAIN_INVL:
662 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
663 break;
664 case DMA_CCMD_DEVICE_INVL:
665 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
666 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
667 break;
668 default:
669 BUG();
670 }
671 val |= DMA_CCMD_ICC;
672
673 spin_lock_irqsave(&iommu->register_lock, flag);
674 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
675
676 /* Make sure hardware complete it */
677 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
678 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
679
680 spin_unlock_irqrestore(&iommu->register_lock, flag);
681
682 /* flush context entry will implicitly flush write buffer */
683 return 0;
684 }
685
686 /* return value determine if we need a write buffer flush */
687 static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
688 u64 addr, unsigned int size_order, u64 type,
689 int non_present_entry_flush)
690 {
691 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
692 u64 val = 0, val_iva = 0;
693 unsigned long flag;
694
695 /*
696 * In the non-present entry flush case, if hardware doesn't cache
697 * non-present entry we do nothing and if hardware cache non-present
698 * entry, we flush entries of domain 0 (the domain id is used to cache
699 * any non-present entries)
700 */
701 if (non_present_entry_flush) {
702 if (!cap_caching_mode(iommu->cap))
703 return 1;
704 else
705 did = 0;
706 }
707
708 switch (type) {
709 case DMA_TLB_GLOBAL_FLUSH:
710 /* global flush doesn't need set IVA_REG */
711 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
712 break;
713 case DMA_TLB_DSI_FLUSH:
714 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
715 break;
716 case DMA_TLB_PSI_FLUSH:
717 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
718 /* Note: always flush non-leaf currently */
719 val_iva = size_order | addr;
720 break;
721 default:
722 BUG();
723 }
724 /* Note: set drain read/write */
725 #if 0
726 /*
727 * This is probably to be super secure.. Looks like we can
728 * ignore it without any impact.
729 */
730 if (cap_read_drain(iommu->cap))
731 val |= DMA_TLB_READ_DRAIN;
732 #endif
733 if (cap_write_drain(iommu->cap))
734 val |= DMA_TLB_WRITE_DRAIN;
735
736 spin_lock_irqsave(&iommu->register_lock, flag);
737 /* Note: Only uses first TLB reg currently */
738 if (val_iva)
739 dmar_writeq(iommu->reg + tlb_offset, val_iva);
740 dmar_writeq(iommu->reg + tlb_offset + 8, val);
741
742 /* Make sure hardware complete it */
743 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
744 dmar_readq, (!(val & DMA_TLB_IVT)), val);
745
746 spin_unlock_irqrestore(&iommu->register_lock, flag);
747
748 /* check IOTLB invalidation granularity */
749 if (DMA_TLB_IAIG(val) == 0)
750 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
751 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
752 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
753 (unsigned long long)DMA_TLB_IIRG(type),
754 (unsigned long long)DMA_TLB_IAIG(val));
755 /* flush iotlb entry will implicitly flush write buffer */
756 return 0;
757 }
758
759 static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
760 u64 addr, unsigned int pages, int non_present_entry_flush)
761 {
762 unsigned int mask;
763
764 BUG_ON(addr & (~VTD_PAGE_MASK));
765 BUG_ON(pages == 0);
766
767 /* Fallback to domain selective flush if no PSI support */
768 if (!cap_pgsel_inv(iommu->cap))
769 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
770 DMA_TLB_DSI_FLUSH,
771 non_present_entry_flush);
772
773 /*
774 * PSI requires page size to be 2 ^ x, and the base address is naturally
775 * aligned to the size
776 */
777 mask = ilog2(__roundup_pow_of_two(pages));
778 /* Fallback to domain selective flush if size is too big */
779 if (mask > cap_max_amask_val(iommu->cap))
780 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
781 DMA_TLB_DSI_FLUSH, non_present_entry_flush);
782
783 return iommu->flush.flush_iotlb(iommu, did, addr, mask,
784 DMA_TLB_PSI_FLUSH,
785 non_present_entry_flush);
786 }
787
788 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
789 {
790 u32 pmen;
791 unsigned long flags;
792
793 spin_lock_irqsave(&iommu->register_lock, flags);
794 pmen = readl(iommu->reg + DMAR_PMEN_REG);
795 pmen &= ~DMA_PMEN_EPM;
796 writel(pmen, iommu->reg + DMAR_PMEN_REG);
797
798 /* wait for the protected region status bit to clear */
799 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
800 readl, !(pmen & DMA_PMEN_PRS), pmen);
801
802 spin_unlock_irqrestore(&iommu->register_lock, flags);
803 }
804
805 static int iommu_enable_translation(struct intel_iommu *iommu)
806 {
807 u32 sts;
808 unsigned long flags;
809
810 spin_lock_irqsave(&iommu->register_lock, flags);
811 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
812
813 /* Make sure hardware complete it */
814 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
815 readl, (sts & DMA_GSTS_TES), sts);
816
817 iommu->gcmd |= DMA_GCMD_TE;
818 spin_unlock_irqrestore(&iommu->register_lock, flags);
819 return 0;
820 }
821
822 static int iommu_disable_translation(struct intel_iommu *iommu)
823 {
824 u32 sts;
825 unsigned long flag;
826
827 spin_lock_irqsave(&iommu->register_lock, flag);
828 iommu->gcmd &= ~DMA_GCMD_TE;
829 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
830
831 /* Make sure hardware complete it */
832 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
833 readl, (!(sts & DMA_GSTS_TES)), sts);
834
835 spin_unlock_irqrestore(&iommu->register_lock, flag);
836 return 0;
837 }
838
839 /* iommu interrupt handling. Most stuff are MSI-like. */
840
841 static const char *fault_reason_strings[] =
842 {
843 "Software",
844 "Present bit in root entry is clear",
845 "Present bit in context entry is clear",
846 "Invalid context entry",
847 "Access beyond MGAW",
848 "PTE Write access is not set",
849 "PTE Read access is not set",
850 "Next page table ptr is invalid",
851 "Root table address invalid",
852 "Context table ptr is invalid",
853 "non-zero reserved fields in RTP",
854 "non-zero reserved fields in CTP",
855 "non-zero reserved fields in PTE",
856 };
857 #define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
858
859 const char *dmar_get_fault_reason(u8 fault_reason)
860 {
861 if (fault_reason > MAX_FAULT_REASON_IDX)
862 return "Unknown";
863 else
864 return fault_reason_strings[fault_reason];
865 }
866
867 void dmar_msi_unmask(unsigned int irq)
868 {
869 struct intel_iommu *iommu = get_irq_data(irq);
870 unsigned long flag;
871
872 /* unmask it */
873 spin_lock_irqsave(&iommu->register_lock, flag);
874 writel(0, iommu->reg + DMAR_FECTL_REG);
875 /* Read a reg to force flush the post write */
876 readl(iommu->reg + DMAR_FECTL_REG);
877 spin_unlock_irqrestore(&iommu->register_lock, flag);
878 }
879
880 void dmar_msi_mask(unsigned int irq)
881 {
882 unsigned long flag;
883 struct intel_iommu *iommu = get_irq_data(irq);
884
885 /* mask it */
886 spin_lock_irqsave(&iommu->register_lock, flag);
887 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
888 /* Read a reg to force flush the post write */
889 readl(iommu->reg + DMAR_FECTL_REG);
890 spin_unlock_irqrestore(&iommu->register_lock, flag);
891 }
892
893 void dmar_msi_write(int irq, struct msi_msg *msg)
894 {
895 struct intel_iommu *iommu = get_irq_data(irq);
896 unsigned long flag;
897
898 spin_lock_irqsave(&iommu->register_lock, flag);
899 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
900 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
901 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
902 spin_unlock_irqrestore(&iommu->register_lock, flag);
903 }
904
905 void dmar_msi_read(int irq, struct msi_msg *msg)
906 {
907 struct intel_iommu *iommu = get_irq_data(irq);
908 unsigned long flag;
909
910 spin_lock_irqsave(&iommu->register_lock, flag);
911 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
912 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
913 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
914 spin_unlock_irqrestore(&iommu->register_lock, flag);
915 }
916
917 static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
918 u8 fault_reason, u16 source_id, unsigned long long addr)
919 {
920 const char *reason;
921
922 reason = dmar_get_fault_reason(fault_reason);
923
924 printk(KERN_ERR
925 "DMAR:[%s] Request device [%02x:%02x.%d] "
926 "fault addr %llx \n"
927 "DMAR:[fault reason %02d] %s\n",
928 (type ? "DMA Read" : "DMA Write"),
929 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
930 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
931 return 0;
932 }
933
934 #define PRIMARY_FAULT_REG_LEN (16)
935 static irqreturn_t iommu_page_fault(int irq, void *dev_id)
936 {
937 struct intel_iommu *iommu = dev_id;
938 int reg, fault_index;
939 u32 fault_status;
940 unsigned long flag;
941
942 spin_lock_irqsave(&iommu->register_lock, flag);
943 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
944
945 /* TBD: ignore advanced fault log currently */
946 if (!(fault_status & DMA_FSTS_PPF))
947 goto clear_overflow;
948
949 fault_index = dma_fsts_fault_record_index(fault_status);
950 reg = cap_fault_reg_offset(iommu->cap);
951 while (1) {
952 u8 fault_reason;
953 u16 source_id;
954 u64 guest_addr;
955 int type;
956 u32 data;
957
958 /* highest 32 bits */
959 data = readl(iommu->reg + reg +
960 fault_index * PRIMARY_FAULT_REG_LEN + 12);
961 if (!(data & DMA_FRCD_F))
962 break;
963
964 fault_reason = dma_frcd_fault_reason(data);
965 type = dma_frcd_type(data);
966
967 data = readl(iommu->reg + reg +
968 fault_index * PRIMARY_FAULT_REG_LEN + 8);
969 source_id = dma_frcd_source_id(data);
970
971 guest_addr = dmar_readq(iommu->reg + reg +
972 fault_index * PRIMARY_FAULT_REG_LEN);
973 guest_addr = dma_frcd_page_addr(guest_addr);
974 /* clear the fault */
975 writel(DMA_FRCD_F, iommu->reg + reg +
976 fault_index * PRIMARY_FAULT_REG_LEN + 12);
977
978 spin_unlock_irqrestore(&iommu->register_lock, flag);
979
980 iommu_page_fault_do_one(iommu, type, fault_reason,
981 source_id, guest_addr);
982
983 fault_index++;
984 if (fault_index > cap_num_fault_regs(iommu->cap))
985 fault_index = 0;
986 spin_lock_irqsave(&iommu->register_lock, flag);
987 }
988 clear_overflow:
989 /* clear primary fault overflow */
990 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
991 if (fault_status & DMA_FSTS_PFO)
992 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
993
994 spin_unlock_irqrestore(&iommu->register_lock, flag);
995 return IRQ_HANDLED;
996 }
997
998 int dmar_set_interrupt(struct intel_iommu *iommu)
999 {
1000 int irq, ret;
1001
1002 irq = create_irq();
1003 if (!irq) {
1004 printk(KERN_ERR "IOMMU: no free vectors\n");
1005 return -EINVAL;
1006 }
1007
1008 set_irq_data(irq, iommu);
1009 iommu->irq = irq;
1010
1011 ret = arch_setup_dmar_msi(irq);
1012 if (ret) {
1013 set_irq_data(irq, NULL);
1014 iommu->irq = 0;
1015 destroy_irq(irq);
1016 return 0;
1017 }
1018
1019 /* Force fault register is cleared */
1020 iommu_page_fault(irq, iommu);
1021
1022 ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
1023 if (ret)
1024 printk(KERN_ERR "IOMMU: can't request irq\n");
1025 return ret;
1026 }
1027
1028 static int iommu_init_domains(struct intel_iommu *iommu)
1029 {
1030 unsigned long ndomains;
1031 unsigned long nlongs;
1032
1033 ndomains = cap_ndoms(iommu->cap);
1034 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
1035 nlongs = BITS_TO_LONGS(ndomains);
1036
1037 /* TBD: there might be 64K domains,
1038 * consider other allocation for future chip
1039 */
1040 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1041 if (!iommu->domain_ids) {
1042 printk(KERN_ERR "Allocating domain id array failed\n");
1043 return -ENOMEM;
1044 }
1045 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1046 GFP_KERNEL);
1047 if (!iommu->domains) {
1048 printk(KERN_ERR "Allocating domain array failed\n");
1049 kfree(iommu->domain_ids);
1050 return -ENOMEM;
1051 }
1052
1053 spin_lock_init(&iommu->lock);
1054
1055 /*
1056 * if Caching mode is set, then invalid translations are tagged
1057 * with domainid 0. Hence we need to pre-allocate it.
1058 */
1059 if (cap_caching_mode(iommu->cap))
1060 set_bit(0, iommu->domain_ids);
1061 return 0;
1062 }
1063
1064
1065 static void domain_exit(struct dmar_domain *domain);
1066
1067 void free_dmar_iommu(struct intel_iommu *iommu)
1068 {
1069 struct dmar_domain *domain;
1070 int i;
1071
1072 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1073 for (; i < cap_ndoms(iommu->cap); ) {
1074 domain = iommu->domains[i];
1075 clear_bit(i, iommu->domain_ids);
1076 domain_exit(domain);
1077 i = find_next_bit(iommu->domain_ids,
1078 cap_ndoms(iommu->cap), i+1);
1079 }
1080
1081 if (iommu->gcmd & DMA_GCMD_TE)
1082 iommu_disable_translation(iommu);
1083
1084 if (iommu->irq) {
1085 set_irq_data(iommu->irq, NULL);
1086 /* This will mask the irq */
1087 free_irq(iommu->irq, iommu);
1088 destroy_irq(iommu->irq);
1089 }
1090
1091 kfree(iommu->domains);
1092 kfree(iommu->domain_ids);
1093
1094 /* free context mapping */
1095 free_context_table(iommu);
1096 }
1097
1098 static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1099 {
1100 unsigned long num;
1101 unsigned long ndomains;
1102 struct dmar_domain *domain;
1103 unsigned long flags;
1104
1105 domain = alloc_domain_mem();
1106 if (!domain)
1107 return NULL;
1108
1109 ndomains = cap_ndoms(iommu->cap);
1110
1111 spin_lock_irqsave(&iommu->lock, flags);
1112 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1113 if (num >= ndomains) {
1114 spin_unlock_irqrestore(&iommu->lock, flags);
1115 free_domain_mem(domain);
1116 printk(KERN_ERR "IOMMU: no free domain ids\n");
1117 return NULL;
1118 }
1119
1120 set_bit(num, iommu->domain_ids);
1121 domain->id = num;
1122 domain->iommu = iommu;
1123 iommu->domains[num] = domain;
1124 spin_unlock_irqrestore(&iommu->lock, flags);
1125
1126 return domain;
1127 }
1128
1129 static void iommu_free_domain(struct dmar_domain *domain)
1130 {
1131 unsigned long flags;
1132
1133 spin_lock_irqsave(&domain->iommu->lock, flags);
1134 clear_bit(domain->id, domain->iommu->domain_ids);
1135 spin_unlock_irqrestore(&domain->iommu->lock, flags);
1136 }
1137
1138 static struct iova_domain reserved_iova_list;
1139 static struct lock_class_key reserved_alloc_key;
1140 static struct lock_class_key reserved_rbtree_key;
1141
1142 static void dmar_init_reserved_ranges(void)
1143 {
1144 struct pci_dev *pdev = NULL;
1145 struct iova *iova;
1146 int i;
1147 u64 addr, size;
1148
1149 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1150
1151 lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
1152 &reserved_alloc_key);
1153 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1154 &reserved_rbtree_key);
1155
1156 /* IOAPIC ranges shouldn't be accessed by DMA */
1157 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1158 IOVA_PFN(IOAPIC_RANGE_END));
1159 if (!iova)
1160 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1161
1162 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1163 for_each_pci_dev(pdev) {
1164 struct resource *r;
1165
1166 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1167 r = &pdev->resource[i];
1168 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1169 continue;
1170 addr = r->start;
1171 addr &= PAGE_MASK;
1172 size = r->end - addr;
1173 size = PAGE_ALIGN(size);
1174 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1175 IOVA_PFN(size + addr) - 1);
1176 if (!iova)
1177 printk(KERN_ERR "Reserve iova failed\n");
1178 }
1179 }
1180
1181 }
1182
1183 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1184 {
1185 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1186 }
1187
1188 static inline int guestwidth_to_adjustwidth(int gaw)
1189 {
1190 int agaw;
1191 int r = (gaw - 12) % 9;
1192
1193 if (r == 0)
1194 agaw = gaw;
1195 else
1196 agaw = gaw + 9 - r;
1197 if (agaw > 64)
1198 agaw = 64;
1199 return agaw;
1200 }
1201
1202 static int domain_init(struct dmar_domain *domain, int guest_width)
1203 {
1204 struct intel_iommu *iommu;
1205 int adjust_width, agaw;
1206 unsigned long sagaw;
1207
1208 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1209 spin_lock_init(&domain->mapping_lock);
1210
1211 domain_reserve_special_ranges(domain);
1212
1213 /* calculate AGAW */
1214 iommu = domain->iommu;
1215 if (guest_width > cap_mgaw(iommu->cap))
1216 guest_width = cap_mgaw(iommu->cap);
1217 domain->gaw = guest_width;
1218 adjust_width = guestwidth_to_adjustwidth(guest_width);
1219 agaw = width_to_agaw(adjust_width);
1220 sagaw = cap_sagaw(iommu->cap);
1221 if (!test_bit(agaw, &sagaw)) {
1222 /* hardware doesn't support it, choose a bigger one */
1223 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1224 agaw = find_next_bit(&sagaw, 5, agaw);
1225 if (agaw >= 5)
1226 return -ENODEV;
1227 }
1228 domain->agaw = agaw;
1229 INIT_LIST_HEAD(&domain->devices);
1230
1231 /* always allocate the top pgd */
1232 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1233 if (!domain->pgd)
1234 return -ENOMEM;
1235 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1236 return 0;
1237 }
1238
1239 static void domain_exit(struct dmar_domain *domain)
1240 {
1241 u64 end;
1242
1243 /* Domain 0 is reserved, so dont process it */
1244 if (!domain)
1245 return;
1246
1247 domain_remove_dev_info(domain);
1248 /* destroy iovas */
1249 put_iova_domain(&domain->iovad);
1250 end = DOMAIN_MAX_ADDR(domain->gaw);
1251 end = end & (~PAGE_MASK);
1252
1253 /* clear ptes */
1254 dma_pte_clear_range(domain, 0, end);
1255
1256 /* free page tables */
1257 dma_pte_free_pagetable(domain, 0, end);
1258
1259 iommu_free_domain(domain);
1260 free_domain_mem(domain);
1261 }
1262
1263 static int domain_context_mapping_one(struct dmar_domain *domain,
1264 u8 bus, u8 devfn)
1265 {
1266 struct context_entry *context;
1267 struct intel_iommu *iommu = domain->iommu;
1268 unsigned long flags;
1269
1270 pr_debug("Set context mapping for %02x:%02x.%d\n",
1271 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1272 BUG_ON(!domain->pgd);
1273 context = device_to_context_entry(iommu, bus, devfn);
1274 if (!context)
1275 return -ENOMEM;
1276 spin_lock_irqsave(&iommu->lock, flags);
1277 if (context_present(*context)) {
1278 spin_unlock_irqrestore(&iommu->lock, flags);
1279 return 0;
1280 }
1281
1282 context_set_domain_id(*context, domain->id);
1283 context_set_address_width(*context, domain->agaw);
1284 context_set_address_root(*context, virt_to_phys(domain->pgd));
1285 context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
1286 context_set_fault_enable(*context);
1287 context_set_present(*context);
1288 __iommu_flush_cache(iommu, context, sizeof(*context));
1289
1290 /* it's a non-present to present mapping */
1291 if (iommu->flush.flush_context(iommu, domain->id,
1292 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT,
1293 DMA_CCMD_DEVICE_INVL, 1))
1294 iommu_flush_write_buffer(iommu);
1295 else
1296 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
1297
1298 spin_unlock_irqrestore(&iommu->lock, flags);
1299 return 0;
1300 }
1301
1302 static int
1303 domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1304 {
1305 int ret;
1306 struct pci_dev *tmp, *parent;
1307
1308 ret = domain_context_mapping_one(domain, pdev->bus->number,
1309 pdev->devfn);
1310 if (ret)
1311 return ret;
1312
1313 /* dependent device mapping */
1314 tmp = pci_find_upstream_pcie_bridge(pdev);
1315 if (!tmp)
1316 return 0;
1317 /* Secondary interface's bus number and devfn 0 */
1318 parent = pdev->bus->self;
1319 while (parent != tmp) {
1320 ret = domain_context_mapping_one(domain, parent->bus->number,
1321 parent->devfn);
1322 if (ret)
1323 return ret;
1324 parent = parent->bus->self;
1325 }
1326 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1327 return domain_context_mapping_one(domain,
1328 tmp->subordinate->number, 0);
1329 else /* this is a legacy PCI bridge */
1330 return domain_context_mapping_one(domain,
1331 tmp->bus->number, tmp->devfn);
1332 }
1333
1334 static int domain_context_mapped(struct dmar_domain *domain,
1335 struct pci_dev *pdev)
1336 {
1337 int ret;
1338 struct pci_dev *tmp, *parent;
1339
1340 ret = device_context_mapped(domain->iommu,
1341 pdev->bus->number, pdev->devfn);
1342 if (!ret)
1343 return ret;
1344 /* dependent device mapping */
1345 tmp = pci_find_upstream_pcie_bridge(pdev);
1346 if (!tmp)
1347 return ret;
1348 /* Secondary interface's bus number and devfn 0 */
1349 parent = pdev->bus->self;
1350 while (parent != tmp) {
1351 ret = device_context_mapped(domain->iommu, parent->bus->number,
1352 parent->devfn);
1353 if (!ret)
1354 return ret;
1355 parent = parent->bus->self;
1356 }
1357 if (tmp->is_pcie)
1358 return device_context_mapped(domain->iommu,
1359 tmp->subordinate->number, 0);
1360 else
1361 return device_context_mapped(domain->iommu,
1362 tmp->bus->number, tmp->devfn);
1363 }
1364
1365 static int
1366 domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1367 u64 hpa, size_t size, int prot)
1368 {
1369 u64 start_pfn, end_pfn;
1370 struct dma_pte *pte;
1371 int index;
1372 int addr_width = agaw_to_width(domain->agaw);
1373
1374 hpa &= (((u64)1) << addr_width) - 1;
1375
1376 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1377 return -EINVAL;
1378 iova &= PAGE_MASK;
1379 start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT;
1380 end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT;
1381 index = 0;
1382 while (start_pfn < end_pfn) {
1383 pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index);
1384 if (!pte)
1385 return -ENOMEM;
1386 /* We don't need lock here, nobody else
1387 * touches the iova range
1388 */
1389 BUG_ON(dma_pte_addr(*pte));
1390 dma_set_pte_addr(*pte, start_pfn << VTD_PAGE_SHIFT);
1391 dma_set_pte_prot(*pte, prot);
1392 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
1393 start_pfn++;
1394 index++;
1395 }
1396 return 0;
1397 }
1398
1399 static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
1400 {
1401 clear_context_table(domain->iommu, bus, devfn);
1402 domain->iommu->flush.flush_context(domain->iommu, 0, 0, 0,
1403 DMA_CCMD_GLOBAL_INVL, 0);
1404 domain->iommu->flush.flush_iotlb(domain->iommu, 0, 0, 0,
1405 DMA_TLB_GLOBAL_FLUSH, 0);
1406 }
1407
1408 static void domain_remove_dev_info(struct dmar_domain *domain)
1409 {
1410 struct device_domain_info *info;
1411 unsigned long flags;
1412
1413 spin_lock_irqsave(&device_domain_lock, flags);
1414 while (!list_empty(&domain->devices)) {
1415 info = list_entry(domain->devices.next,
1416 struct device_domain_info, link);
1417 list_del(&info->link);
1418 list_del(&info->global);
1419 if (info->dev)
1420 info->dev->dev.archdata.iommu = NULL;
1421 spin_unlock_irqrestore(&device_domain_lock, flags);
1422
1423 detach_domain_for_dev(info->domain, info->bus, info->devfn);
1424 free_devinfo_mem(info);
1425
1426 spin_lock_irqsave(&device_domain_lock, flags);
1427 }
1428 spin_unlock_irqrestore(&device_domain_lock, flags);
1429 }
1430
1431 /*
1432 * find_domain
1433 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1434 */
1435 static struct dmar_domain *
1436 find_domain(struct pci_dev *pdev)
1437 {
1438 struct device_domain_info *info;
1439
1440 /* No lock here, assumes no domain exit in normal case */
1441 info = pdev->dev.archdata.iommu;
1442 if (info)
1443 return info->domain;
1444 return NULL;
1445 }
1446
1447 /* domain is initialized */
1448 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1449 {
1450 struct dmar_domain *domain, *found = NULL;
1451 struct intel_iommu *iommu;
1452 struct dmar_drhd_unit *drhd;
1453 struct device_domain_info *info, *tmp;
1454 struct pci_dev *dev_tmp;
1455 unsigned long flags;
1456 int bus = 0, devfn = 0;
1457
1458 domain = find_domain(pdev);
1459 if (domain)
1460 return domain;
1461
1462 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1463 if (dev_tmp) {
1464 if (dev_tmp->is_pcie) {
1465 bus = dev_tmp->subordinate->number;
1466 devfn = 0;
1467 } else {
1468 bus = dev_tmp->bus->number;
1469 devfn = dev_tmp->devfn;
1470 }
1471 spin_lock_irqsave(&device_domain_lock, flags);
1472 list_for_each_entry(info, &device_domain_list, global) {
1473 if (info->bus == bus && info->devfn == devfn) {
1474 found = info->domain;
1475 break;
1476 }
1477 }
1478 spin_unlock_irqrestore(&device_domain_lock, flags);
1479 /* pcie-pci bridge already has a domain, uses it */
1480 if (found) {
1481 domain = found;
1482 goto found_domain;
1483 }
1484 }
1485
1486 /* Allocate new domain for the device */
1487 drhd = dmar_find_matched_drhd_unit(pdev);
1488 if (!drhd) {
1489 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1490 pci_name(pdev));
1491 return NULL;
1492 }
1493 iommu = drhd->iommu;
1494
1495 domain = iommu_alloc_domain(iommu);
1496 if (!domain)
1497 goto error;
1498
1499 if (domain_init(domain, gaw)) {
1500 domain_exit(domain);
1501 goto error;
1502 }
1503
1504 /* register pcie-to-pci device */
1505 if (dev_tmp) {
1506 info = alloc_devinfo_mem();
1507 if (!info) {
1508 domain_exit(domain);
1509 goto error;
1510 }
1511 info->bus = bus;
1512 info->devfn = devfn;
1513 info->dev = NULL;
1514 info->domain = domain;
1515 /* This domain is shared by devices under p2p bridge */
1516 domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES;
1517
1518 /* pcie-to-pci bridge already has a domain, uses it */
1519 found = NULL;
1520 spin_lock_irqsave(&device_domain_lock, flags);
1521 list_for_each_entry(tmp, &device_domain_list, global) {
1522 if (tmp->bus == bus && tmp->devfn == devfn) {
1523 found = tmp->domain;
1524 break;
1525 }
1526 }
1527 if (found) {
1528 free_devinfo_mem(info);
1529 domain_exit(domain);
1530 domain = found;
1531 } else {
1532 list_add(&info->link, &domain->devices);
1533 list_add(&info->global, &device_domain_list);
1534 }
1535 spin_unlock_irqrestore(&device_domain_lock, flags);
1536 }
1537
1538 found_domain:
1539 info = alloc_devinfo_mem();
1540 if (!info)
1541 goto error;
1542 info->bus = pdev->bus->number;
1543 info->devfn = pdev->devfn;
1544 info->dev = pdev;
1545 info->domain = domain;
1546 spin_lock_irqsave(&device_domain_lock, flags);
1547 /* somebody is fast */
1548 found = find_domain(pdev);
1549 if (found != NULL) {
1550 spin_unlock_irqrestore(&device_domain_lock, flags);
1551 if (found != domain) {
1552 domain_exit(domain);
1553 domain = found;
1554 }
1555 free_devinfo_mem(info);
1556 return domain;
1557 }
1558 list_add(&info->link, &domain->devices);
1559 list_add(&info->global, &device_domain_list);
1560 pdev->dev.archdata.iommu = info;
1561 spin_unlock_irqrestore(&device_domain_lock, flags);
1562 return domain;
1563 error:
1564 /* recheck it here, maybe others set it */
1565 return find_domain(pdev);
1566 }
1567
1568 static int iommu_prepare_identity_map(struct pci_dev *pdev,
1569 unsigned long long start,
1570 unsigned long long end)
1571 {
1572 struct dmar_domain *domain;
1573 unsigned long size;
1574 unsigned long long base;
1575 int ret;
1576
1577 printk(KERN_INFO
1578 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1579 pci_name(pdev), start, end);
1580 /* page table init */
1581 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1582 if (!domain)
1583 return -ENOMEM;
1584
1585 /* The address might not be aligned */
1586 base = start & PAGE_MASK;
1587 size = end - base;
1588 size = PAGE_ALIGN(size);
1589 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1590 IOVA_PFN(base + size) - 1)) {
1591 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1592 ret = -ENOMEM;
1593 goto error;
1594 }
1595
1596 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1597 size, base, pci_name(pdev));
1598 /*
1599 * RMRR range might have overlap with physical memory range,
1600 * clear it first
1601 */
1602 dma_pte_clear_range(domain, base, base + size);
1603
1604 ret = domain_page_mapping(domain, base, base, size,
1605 DMA_PTE_READ|DMA_PTE_WRITE);
1606 if (ret)
1607 goto error;
1608
1609 /* context entry init */
1610 ret = domain_context_mapping(domain, pdev);
1611 if (!ret)
1612 return 0;
1613 error:
1614 domain_exit(domain);
1615 return ret;
1616
1617 }
1618
1619 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1620 struct pci_dev *pdev)
1621 {
1622 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1623 return 0;
1624 return iommu_prepare_identity_map(pdev, rmrr->base_address,
1625 rmrr->end_address + 1);
1626 }
1627
1628 #ifdef CONFIG_DMAR_GFX_WA
1629 struct iommu_prepare_data {
1630 struct pci_dev *pdev;
1631 int ret;
1632 };
1633
1634 static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1635 unsigned long end_pfn, void *datax)
1636 {
1637 struct iommu_prepare_data *data;
1638
1639 data = (struct iommu_prepare_data *)datax;
1640
1641 data->ret = iommu_prepare_identity_map(data->pdev,
1642 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1643 return data->ret;
1644
1645 }
1646
1647 static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1648 {
1649 int nid;
1650 struct iommu_prepare_data data;
1651
1652 data.pdev = pdev;
1653 data.ret = 0;
1654
1655 for_each_online_node(nid) {
1656 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1657 if (data.ret)
1658 return data.ret;
1659 }
1660 return data.ret;
1661 }
1662
1663 static void __init iommu_prepare_gfx_mapping(void)
1664 {
1665 struct pci_dev *pdev = NULL;
1666 int ret;
1667
1668 for_each_pci_dev(pdev) {
1669 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
1670 !IS_GFX_DEVICE(pdev))
1671 continue;
1672 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1673 pci_name(pdev));
1674 ret = iommu_prepare_with_active_regions(pdev);
1675 if (ret)
1676 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1677 }
1678 }
1679 #endif
1680
1681 #ifdef CONFIG_DMAR_FLOPPY_WA
1682 static inline void iommu_prepare_isa(void)
1683 {
1684 struct pci_dev *pdev;
1685 int ret;
1686
1687 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1688 if (!pdev)
1689 return;
1690
1691 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1692 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1693
1694 if (ret)
1695 printk("IOMMU: Failed to create 0-64M identity map, "
1696 "floppy might not work\n");
1697
1698 }
1699 #else
1700 static inline void iommu_prepare_isa(void)
1701 {
1702 return;
1703 }
1704 #endif /* !CONFIG_DMAR_FLPY_WA */
1705
1706 static int __init init_dmars(void)
1707 {
1708 struct dmar_drhd_unit *drhd;
1709 struct dmar_rmrr_unit *rmrr;
1710 struct pci_dev *pdev;
1711 struct intel_iommu *iommu;
1712 int i, ret, unit = 0;
1713
1714 /*
1715 * for each drhd
1716 * allocate root
1717 * initialize and program root entry to not present
1718 * endfor
1719 */
1720 for_each_drhd_unit(drhd) {
1721 g_num_of_iommus++;
1722 /*
1723 * lock not needed as this is only incremented in the single
1724 * threaded kernel __init code path all other access are read
1725 * only
1726 */
1727 }
1728
1729 deferred_flush = kzalloc(g_num_of_iommus *
1730 sizeof(struct deferred_flush_tables), GFP_KERNEL);
1731 if (!deferred_flush) {
1732 ret = -ENOMEM;
1733 goto error;
1734 }
1735
1736 for_each_drhd_unit(drhd) {
1737 if (drhd->ignored)
1738 continue;
1739
1740 iommu = drhd->iommu;
1741
1742 ret = iommu_init_domains(iommu);
1743 if (ret)
1744 goto error;
1745
1746 /*
1747 * TBD:
1748 * we could share the same root & context tables
1749 * amoung all IOMMU's. Need to Split it later.
1750 */
1751 ret = iommu_alloc_root_entry(iommu);
1752 if (ret) {
1753 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1754 goto error;
1755 }
1756 }
1757
1758 for_each_drhd_unit(drhd) {
1759 if (drhd->ignored)
1760 continue;
1761
1762 iommu = drhd->iommu;
1763 if (dmar_enable_qi(iommu)) {
1764 /*
1765 * Queued Invalidate not enabled, use Register Based
1766 * Invalidate
1767 */
1768 iommu->flush.flush_context = __iommu_flush_context;
1769 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
1770 printk(KERN_INFO "IOMMU 0x%Lx: using Register based "
1771 "invalidation\n",
1772 (unsigned long long)drhd->reg_base_addr);
1773 } else {
1774 iommu->flush.flush_context = qi_flush_context;
1775 iommu->flush.flush_iotlb = qi_flush_iotlb;
1776 printk(KERN_INFO "IOMMU 0x%Lx: using Queued "
1777 "invalidation\n",
1778 (unsigned long long)drhd->reg_base_addr);
1779 }
1780 }
1781
1782 /*
1783 * For each rmrr
1784 * for each dev attached to rmrr
1785 * do
1786 * locate drhd for dev, alloc domain for dev
1787 * allocate free domain
1788 * allocate page table entries for rmrr
1789 * if context not allocated for bus
1790 * allocate and init context
1791 * set present in root table for this bus
1792 * init context with domain, translation etc
1793 * endfor
1794 * endfor
1795 */
1796 for_each_rmrr_units(rmrr) {
1797 for (i = 0; i < rmrr->devices_cnt; i++) {
1798 pdev = rmrr->devices[i];
1799 /* some BIOS lists non-exist devices in DMAR table */
1800 if (!pdev)
1801 continue;
1802 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
1803 if (ret)
1804 printk(KERN_ERR
1805 "IOMMU: mapping reserved region failed\n");
1806 }
1807 }
1808
1809 iommu_prepare_gfx_mapping();
1810
1811 iommu_prepare_isa();
1812
1813 /*
1814 * for each drhd
1815 * enable fault log
1816 * global invalidate context cache
1817 * global invalidate iotlb
1818 * enable translation
1819 */
1820 for_each_drhd_unit(drhd) {
1821 if (drhd->ignored)
1822 continue;
1823 iommu = drhd->iommu;
1824 sprintf (iommu->name, "dmar%d", unit++);
1825
1826 iommu_flush_write_buffer(iommu);
1827
1828 ret = dmar_set_interrupt(iommu);
1829 if (ret)
1830 goto error;
1831
1832 iommu_set_root_entry(iommu);
1833
1834 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
1835 0);
1836 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
1837 0);
1838 iommu_disable_protect_mem_regions(iommu);
1839
1840 ret = iommu_enable_translation(iommu);
1841 if (ret)
1842 goto error;
1843 }
1844
1845 return 0;
1846 error:
1847 for_each_drhd_unit(drhd) {
1848 if (drhd->ignored)
1849 continue;
1850 iommu = drhd->iommu;
1851 free_iommu(iommu);
1852 }
1853 return ret;
1854 }
1855
1856 static inline u64 aligned_size(u64 host_addr, size_t size)
1857 {
1858 u64 addr;
1859 addr = (host_addr & (~PAGE_MASK)) + size;
1860 return PAGE_ALIGN(addr);
1861 }
1862
1863 struct iova *
1864 iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
1865 {
1866 struct iova *piova;
1867
1868 /* Make sure it's in range */
1869 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
1870 if (!size || (IOVA_START_ADDR + size > end))
1871 return NULL;
1872
1873 piova = alloc_iova(&domain->iovad,
1874 size >> PAGE_SHIFT, IOVA_PFN(end), 1);
1875 return piova;
1876 }
1877
1878 static struct iova *
1879 __intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
1880 size_t size, u64 dma_mask)
1881 {
1882 struct pci_dev *pdev = to_pci_dev(dev);
1883 struct iova *iova = NULL;
1884
1885 if (dma_mask <= DMA_32BIT_MASK || dmar_forcedac)
1886 iova = iommu_alloc_iova(domain, size, dma_mask);
1887 else {
1888 /*
1889 * First try to allocate an io virtual address in
1890 * DMA_32BIT_MASK and if that fails then try allocating
1891 * from higher range
1892 */
1893 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
1894 if (!iova)
1895 iova = iommu_alloc_iova(domain, size, dma_mask);
1896 }
1897
1898 if (!iova) {
1899 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
1900 return NULL;
1901 }
1902
1903 return iova;
1904 }
1905
1906 static struct dmar_domain *
1907 get_valid_domain_for_dev(struct pci_dev *pdev)
1908 {
1909 struct dmar_domain *domain;
1910 int ret;
1911
1912 domain = get_domain_for_dev(pdev,
1913 DEFAULT_DOMAIN_ADDRESS_WIDTH);
1914 if (!domain) {
1915 printk(KERN_ERR
1916 "Allocating domain for %s failed", pci_name(pdev));
1917 return NULL;
1918 }
1919
1920 /* make sure context mapping is ok */
1921 if (unlikely(!domain_context_mapped(domain, pdev))) {
1922 ret = domain_context_mapping(domain, pdev);
1923 if (ret) {
1924 printk(KERN_ERR
1925 "Domain context map for %s failed",
1926 pci_name(pdev));
1927 return NULL;
1928 }
1929 }
1930
1931 return domain;
1932 }
1933
1934 static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
1935 size_t size, int dir, u64 dma_mask)
1936 {
1937 struct pci_dev *pdev = to_pci_dev(hwdev);
1938 struct dmar_domain *domain;
1939 phys_addr_t start_paddr;
1940 struct iova *iova;
1941 int prot = 0;
1942 int ret;
1943
1944 BUG_ON(dir == DMA_NONE);
1945 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1946 return paddr;
1947
1948 domain = get_valid_domain_for_dev(pdev);
1949 if (!domain)
1950 return 0;
1951
1952 size = aligned_size((u64)paddr, size);
1953
1954 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
1955 if (!iova)
1956 goto error;
1957
1958 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
1959
1960 /*
1961 * Check if DMAR supports zero-length reads on write only
1962 * mappings..
1963 */
1964 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
1965 !cap_zlr(domain->iommu->cap))
1966 prot |= DMA_PTE_READ;
1967 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
1968 prot |= DMA_PTE_WRITE;
1969 /*
1970 * paddr - (paddr + size) might be partial page, we should map the whole
1971 * page. Note: if two part of one page are separately mapped, we
1972 * might have two guest_addr mapping to the same host paddr, but this
1973 * is not a big problem
1974 */
1975 ret = domain_page_mapping(domain, start_paddr,
1976 ((u64)paddr) & PAGE_MASK, size, prot);
1977 if (ret)
1978 goto error;
1979
1980 /* it's a non-present to present mapping */
1981 ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
1982 start_paddr, size >> VTD_PAGE_SHIFT, 1);
1983 if (ret)
1984 iommu_flush_write_buffer(domain->iommu);
1985
1986 return start_paddr + ((u64)paddr & (~PAGE_MASK));
1987
1988 error:
1989 if (iova)
1990 __free_iova(&domain->iovad, iova);
1991 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
1992 pci_name(pdev), size, (unsigned long long)paddr, dir);
1993 return 0;
1994 }
1995
1996 dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr,
1997 size_t size, int dir)
1998 {
1999 return __intel_map_single(hwdev, paddr, size, dir,
2000 to_pci_dev(hwdev)->dma_mask);
2001 }
2002
2003 static void flush_unmaps(void)
2004 {
2005 int i, j;
2006
2007 timer_on = 0;
2008
2009 /* just flush them all */
2010 for (i = 0; i < g_num_of_iommus; i++) {
2011 if (deferred_flush[i].next) {
2012 struct intel_iommu *iommu =
2013 deferred_flush[i].domain[0]->iommu;
2014
2015 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2016 DMA_TLB_GLOBAL_FLUSH, 0);
2017 for (j = 0; j < deferred_flush[i].next; j++) {
2018 __free_iova(&deferred_flush[i].domain[j]->iovad,
2019 deferred_flush[i].iova[j]);
2020 }
2021 deferred_flush[i].next = 0;
2022 }
2023 }
2024
2025 list_size = 0;
2026 }
2027
2028 static void flush_unmaps_timeout(unsigned long data)
2029 {
2030 unsigned long flags;
2031
2032 spin_lock_irqsave(&async_umap_flush_lock, flags);
2033 flush_unmaps();
2034 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2035 }
2036
2037 static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2038 {
2039 unsigned long flags;
2040 int next, iommu_id;
2041
2042 spin_lock_irqsave(&async_umap_flush_lock, flags);
2043 if (list_size == HIGH_WATER_MARK)
2044 flush_unmaps();
2045
2046 iommu_id = dom->iommu->seq_id;
2047
2048 next = deferred_flush[iommu_id].next;
2049 deferred_flush[iommu_id].domain[next] = dom;
2050 deferred_flush[iommu_id].iova[next] = iova;
2051 deferred_flush[iommu_id].next++;
2052
2053 if (!timer_on) {
2054 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2055 timer_on = 1;
2056 }
2057 list_size++;
2058 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2059 }
2060
2061 void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
2062 int dir)
2063 {
2064 struct pci_dev *pdev = to_pci_dev(dev);
2065 struct dmar_domain *domain;
2066 unsigned long start_addr;
2067 struct iova *iova;
2068
2069 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2070 return;
2071 domain = find_domain(pdev);
2072 BUG_ON(!domain);
2073
2074 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2075 if (!iova)
2076 return;
2077
2078 start_addr = iova->pfn_lo << PAGE_SHIFT;
2079 size = aligned_size((u64)dev_addr, size);
2080
2081 pr_debug("Device %s unmapping: %lx@%llx\n",
2082 pci_name(pdev), size, (unsigned long long)start_addr);
2083
2084 /* clear the whole page */
2085 dma_pte_clear_range(domain, start_addr, start_addr + size);
2086 /* free page tables */
2087 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2088 if (intel_iommu_strict) {
2089 if (iommu_flush_iotlb_psi(domain->iommu,
2090 domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
2091 iommu_flush_write_buffer(domain->iommu);
2092 /* free iova */
2093 __free_iova(&domain->iovad, iova);
2094 } else {
2095 add_unmap(domain, iova);
2096 /*
2097 * queue up the release of the unmap to save the 1/6th of the
2098 * cpu used up by the iotlb flush operation...
2099 */
2100 }
2101 }
2102
2103 void *intel_alloc_coherent(struct device *hwdev, size_t size,
2104 dma_addr_t *dma_handle, gfp_t flags)
2105 {
2106 void *vaddr;
2107 int order;
2108
2109 size = PAGE_ALIGN(size);
2110 order = get_order(size);
2111 flags &= ~(GFP_DMA | GFP_DMA32);
2112
2113 vaddr = (void *)__get_free_pages(flags, order);
2114 if (!vaddr)
2115 return NULL;
2116 memset(vaddr, 0, size);
2117
2118 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2119 DMA_BIDIRECTIONAL,
2120 hwdev->coherent_dma_mask);
2121 if (*dma_handle)
2122 return vaddr;
2123 free_pages((unsigned long)vaddr, order);
2124 return NULL;
2125 }
2126
2127 void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2128 dma_addr_t dma_handle)
2129 {
2130 int order;
2131
2132 size = PAGE_ALIGN(size);
2133 order = get_order(size);
2134
2135 intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
2136 free_pages((unsigned long)vaddr, order);
2137 }
2138
2139 #define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
2140
2141 void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2142 int nelems, int dir)
2143 {
2144 int i;
2145 struct pci_dev *pdev = to_pci_dev(hwdev);
2146 struct dmar_domain *domain;
2147 unsigned long start_addr;
2148 struct iova *iova;
2149 size_t size = 0;
2150 void *addr;
2151 struct scatterlist *sg;
2152
2153 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2154 return;
2155
2156 domain = find_domain(pdev);
2157
2158 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2159 if (!iova)
2160 return;
2161 for_each_sg(sglist, sg, nelems, i) {
2162 addr = SG_ENT_VIRT_ADDRESS(sg);
2163 size += aligned_size((u64)addr, sg->length);
2164 }
2165
2166 start_addr = iova->pfn_lo << PAGE_SHIFT;
2167
2168 /* clear the whole page */
2169 dma_pte_clear_range(domain, start_addr, start_addr + size);
2170 /* free page tables */
2171 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2172
2173 if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
2174 size >> VTD_PAGE_SHIFT, 0))
2175 iommu_flush_write_buffer(domain->iommu);
2176
2177 /* free iova */
2178 __free_iova(&domain->iovad, iova);
2179 }
2180
2181 static int intel_nontranslate_map_sg(struct device *hddev,
2182 struct scatterlist *sglist, int nelems, int dir)
2183 {
2184 int i;
2185 struct scatterlist *sg;
2186
2187 for_each_sg(sglist, sg, nelems, i) {
2188 BUG_ON(!sg_page(sg));
2189 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2190 sg->dma_length = sg->length;
2191 }
2192 return nelems;
2193 }
2194
2195 int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2196 int dir)
2197 {
2198 void *addr;
2199 int i;
2200 struct pci_dev *pdev = to_pci_dev(hwdev);
2201 struct dmar_domain *domain;
2202 size_t size = 0;
2203 int prot = 0;
2204 size_t offset = 0;
2205 struct iova *iova = NULL;
2206 int ret;
2207 struct scatterlist *sg;
2208 unsigned long start_addr;
2209
2210 BUG_ON(dir == DMA_NONE);
2211 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2212 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
2213
2214 domain = get_valid_domain_for_dev(pdev);
2215 if (!domain)
2216 return 0;
2217
2218 for_each_sg(sglist, sg, nelems, i) {
2219 addr = SG_ENT_VIRT_ADDRESS(sg);
2220 addr = (void *)virt_to_phys(addr);
2221 size += aligned_size((u64)addr, sg->length);
2222 }
2223
2224 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2225 if (!iova) {
2226 sglist->dma_length = 0;
2227 return 0;
2228 }
2229
2230 /*
2231 * Check if DMAR supports zero-length reads on write only
2232 * mappings..
2233 */
2234 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2235 !cap_zlr(domain->iommu->cap))
2236 prot |= DMA_PTE_READ;
2237 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2238 prot |= DMA_PTE_WRITE;
2239
2240 start_addr = iova->pfn_lo << PAGE_SHIFT;
2241 offset = 0;
2242 for_each_sg(sglist, sg, nelems, i) {
2243 addr = SG_ENT_VIRT_ADDRESS(sg);
2244 addr = (void *)virt_to_phys(addr);
2245 size = aligned_size((u64)addr, sg->length);
2246 ret = domain_page_mapping(domain, start_addr + offset,
2247 ((u64)addr) & PAGE_MASK,
2248 size, prot);
2249 if (ret) {
2250 /* clear the page */
2251 dma_pte_clear_range(domain, start_addr,
2252 start_addr + offset);
2253 /* free page tables */
2254 dma_pte_free_pagetable(domain, start_addr,
2255 start_addr + offset);
2256 /* free iova */
2257 __free_iova(&domain->iovad, iova);
2258 return 0;
2259 }
2260 sg->dma_address = start_addr + offset +
2261 ((u64)addr & (~PAGE_MASK));
2262 sg->dma_length = sg->length;
2263 offset += size;
2264 }
2265
2266 /* it's a non-present to present mapping */
2267 if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
2268 start_addr, offset >> VTD_PAGE_SHIFT, 1))
2269 iommu_flush_write_buffer(domain->iommu);
2270 return nelems;
2271 }
2272
2273 static struct dma_mapping_ops intel_dma_ops = {
2274 .alloc_coherent = intel_alloc_coherent,
2275 .free_coherent = intel_free_coherent,
2276 .map_single = intel_map_single,
2277 .unmap_single = intel_unmap_single,
2278 .map_sg = intel_map_sg,
2279 .unmap_sg = intel_unmap_sg,
2280 };
2281
2282 static inline int iommu_domain_cache_init(void)
2283 {
2284 int ret = 0;
2285
2286 iommu_domain_cache = kmem_cache_create("iommu_domain",
2287 sizeof(struct dmar_domain),
2288 0,
2289 SLAB_HWCACHE_ALIGN,
2290
2291 NULL);
2292 if (!iommu_domain_cache) {
2293 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2294 ret = -ENOMEM;
2295 }
2296
2297 return ret;
2298 }
2299
2300 static inline int iommu_devinfo_cache_init(void)
2301 {
2302 int ret = 0;
2303
2304 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2305 sizeof(struct device_domain_info),
2306 0,
2307 SLAB_HWCACHE_ALIGN,
2308 NULL);
2309 if (!iommu_devinfo_cache) {
2310 printk(KERN_ERR "Couldn't create devinfo cache\n");
2311 ret = -ENOMEM;
2312 }
2313
2314 return ret;
2315 }
2316
2317 static inline int iommu_iova_cache_init(void)
2318 {
2319 int ret = 0;
2320
2321 iommu_iova_cache = kmem_cache_create("iommu_iova",
2322 sizeof(struct iova),
2323 0,
2324 SLAB_HWCACHE_ALIGN,
2325 NULL);
2326 if (!iommu_iova_cache) {
2327 printk(KERN_ERR "Couldn't create iova cache\n");
2328 ret = -ENOMEM;
2329 }
2330
2331 return ret;
2332 }
2333
2334 static int __init iommu_init_mempool(void)
2335 {
2336 int ret;
2337 ret = iommu_iova_cache_init();
2338 if (ret)
2339 return ret;
2340
2341 ret = iommu_domain_cache_init();
2342 if (ret)
2343 goto domain_error;
2344
2345 ret = iommu_devinfo_cache_init();
2346 if (!ret)
2347 return ret;
2348
2349 kmem_cache_destroy(iommu_domain_cache);
2350 domain_error:
2351 kmem_cache_destroy(iommu_iova_cache);
2352
2353 return -ENOMEM;
2354 }
2355
2356 static void __init iommu_exit_mempool(void)
2357 {
2358 kmem_cache_destroy(iommu_devinfo_cache);
2359 kmem_cache_destroy(iommu_domain_cache);
2360 kmem_cache_destroy(iommu_iova_cache);
2361
2362 }
2363
2364 static void __init init_no_remapping_devices(void)
2365 {
2366 struct dmar_drhd_unit *drhd;
2367
2368 for_each_drhd_unit(drhd) {
2369 if (!drhd->include_all) {
2370 int i;
2371 for (i = 0; i < drhd->devices_cnt; i++)
2372 if (drhd->devices[i] != NULL)
2373 break;
2374 /* ignore DMAR unit if no pci devices exist */
2375 if (i == drhd->devices_cnt)
2376 drhd->ignored = 1;
2377 }
2378 }
2379
2380 if (dmar_map_gfx)
2381 return;
2382
2383 for_each_drhd_unit(drhd) {
2384 int i;
2385 if (drhd->ignored || drhd->include_all)
2386 continue;
2387
2388 for (i = 0; i < drhd->devices_cnt; i++)
2389 if (drhd->devices[i] &&
2390 !IS_GFX_DEVICE(drhd->devices[i]))
2391 break;
2392
2393 if (i < drhd->devices_cnt)
2394 continue;
2395
2396 /* bypass IOMMU if it is just for gfx devices */
2397 drhd->ignored = 1;
2398 for (i = 0; i < drhd->devices_cnt; i++) {
2399 if (!drhd->devices[i])
2400 continue;
2401 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
2402 }
2403 }
2404 }
2405
2406 int __init intel_iommu_init(void)
2407 {
2408 int ret = 0;
2409
2410 if (dmar_table_init())
2411 return -ENODEV;
2412
2413 if (dmar_dev_scope_init())
2414 return -ENODEV;
2415
2416 /*
2417 * Check the need for DMA-remapping initialization now.
2418 * Above initialization will also be used by Interrupt-remapping.
2419 */
2420 if (no_iommu || swiotlb || dmar_disabled)
2421 return -ENODEV;
2422
2423 iommu_init_mempool();
2424 dmar_init_reserved_ranges();
2425
2426 init_no_remapping_devices();
2427
2428 ret = init_dmars();
2429 if (ret) {
2430 printk(KERN_ERR "IOMMU: dmar init failed\n");
2431 put_iova_domain(&reserved_iova_list);
2432 iommu_exit_mempool();
2433 return ret;
2434 }
2435 printk(KERN_INFO
2436 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2437
2438 init_timer(&unmap_timer);
2439 force_iommu = 1;
2440 dma_ops = &intel_dma_ops;
2441 return 0;
2442 }
2443
2444 void intel_iommu_domain_exit(struct dmar_domain *domain)
2445 {
2446 u64 end;
2447
2448 /* Domain 0 is reserved, so dont process it */
2449 if (!domain)
2450 return;
2451
2452 end = DOMAIN_MAX_ADDR(domain->gaw);
2453 end = end & (~VTD_PAGE_MASK);
2454
2455 /* clear ptes */
2456 dma_pte_clear_range(domain, 0, end);
2457
2458 /* free page tables */
2459 dma_pte_free_pagetable(domain, 0, end);
2460
2461 iommu_free_domain(domain);
2462 free_domain_mem(domain);
2463 }
2464 EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
2465
2466 struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
2467 {
2468 struct dmar_drhd_unit *drhd;
2469 struct dmar_domain *domain;
2470 struct intel_iommu *iommu;
2471
2472 drhd = dmar_find_matched_drhd_unit(pdev);
2473 if (!drhd) {
2474 printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
2475 return NULL;
2476 }
2477
2478 iommu = drhd->iommu;
2479 if (!iommu) {
2480 printk(KERN_ERR
2481 "intel_iommu_domain_alloc: iommu == NULL\n");
2482 return NULL;
2483 }
2484 domain = iommu_alloc_domain(iommu);
2485 if (!domain) {
2486 printk(KERN_ERR
2487 "intel_iommu_domain_alloc: domain == NULL\n");
2488 return NULL;
2489 }
2490 if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2491 printk(KERN_ERR
2492 "intel_iommu_domain_alloc: domain_init() failed\n");
2493 intel_iommu_domain_exit(domain);
2494 return NULL;
2495 }
2496 return domain;
2497 }
2498 EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
2499
2500 int intel_iommu_context_mapping(
2501 struct dmar_domain *domain, struct pci_dev *pdev)
2502 {
2503 int rc;
2504 rc = domain_context_mapping(domain, pdev);
2505 return rc;
2506 }
2507 EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
2508
2509 int intel_iommu_page_mapping(
2510 struct dmar_domain *domain, dma_addr_t iova,
2511 u64 hpa, size_t size, int prot)
2512 {
2513 int rc;
2514 rc = domain_page_mapping(domain, iova, hpa, size, prot);
2515 return rc;
2516 }
2517 EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
2518
2519 void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
2520 {
2521 detach_domain_for_dev(domain, bus, devfn);
2522 }
2523 EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
2524
2525 struct dmar_domain *
2526 intel_iommu_find_domain(struct pci_dev *pdev)
2527 {
2528 return find_domain(pdev);
2529 }
2530 EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
2531
2532 int intel_iommu_found(void)
2533 {
2534 return g_num_of_iommus;
2535 }
2536 EXPORT_SYMBOL_GPL(intel_iommu_found);
2537
2538 u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
2539 {
2540 struct dma_pte *pte;
2541 u64 pfn;
2542
2543 pfn = 0;
2544 pte = addr_to_dma_pte(domain, iova);
2545
2546 if (pte)
2547 pfn = dma_pte_addr(*pte);
2548
2549 return pfn >> VTD_PAGE_SHIFT;
2550 }
2551 EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);
This page took 0.08445 seconds and 6 git commands to generate.