65aa1d427f43a8853ab9d6bcd0fd219bbb18fbc0
[deliverable/linux.git] / drivers / pci / intel-iommu.c
1 /*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21 * Author: Fenghua Yu <fenghua.yu@intel.com>
22 */
23
24 #include <linux/init.h>
25 #include <linux/bitmap.h>
26 #include <linux/debugfs.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/timer.h>
36 #include <linux/iova.h>
37 #include <linux/intel-iommu.h>
38 #include <asm/cacheflush.h>
39 #include <asm/iommu.h>
40 #include "pci.h"
41
42 #define ROOT_SIZE VTD_PAGE_SIZE
43 #define CONTEXT_SIZE VTD_PAGE_SIZE
44
45 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
46 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
47
48 #define IOAPIC_RANGE_START (0xfee00000)
49 #define IOAPIC_RANGE_END (0xfeefffff)
50 #define IOVA_START_ADDR (0x1000)
51
52 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
53
54 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
55
56 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
57 #define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
58 #define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
59
60 /*
61 * 0: Present
62 * 1-11: Reserved
63 * 12-63: Context Ptr (12 - (haw-1))
64 * 64-127: Reserved
65 */
66 struct root_entry {
67 u64 val;
68 u64 rsvd1;
69 };
70 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
71 static inline bool root_present(struct root_entry *root)
72 {
73 return (root->val & 1);
74 }
75 static inline void set_root_present(struct root_entry *root)
76 {
77 root->val |= 1;
78 }
79 static inline void set_root_value(struct root_entry *root, unsigned long value)
80 {
81 root->val |= value & VTD_PAGE_MASK;
82 }
83
84 static inline struct context_entry *
85 get_context_addr_from_root(struct root_entry *root)
86 {
87 return (struct context_entry *)
88 (root_present(root)?phys_to_virt(
89 root->val & VTD_PAGE_MASK) :
90 NULL);
91 }
92
93 /*
94 * low 64 bits:
95 * 0: present
96 * 1: fault processing disable
97 * 2-3: translation type
98 * 12-63: address space root
99 * high 64 bits:
100 * 0-2: address width
101 * 3-6: aval
102 * 8-23: domain id
103 */
104 struct context_entry {
105 u64 lo;
106 u64 hi;
107 };
108
109 static inline bool context_present(struct context_entry *context)
110 {
111 return (context->lo & 1);
112 }
113 static inline void context_set_present(struct context_entry *context)
114 {
115 context->lo |= 1;
116 }
117
118 static inline void context_set_fault_enable(struct context_entry *context)
119 {
120 context->lo &= (((u64)-1) << 2) | 1;
121 }
122
123 #define CONTEXT_TT_MULTI_LEVEL 0
124
125 static inline void context_set_translation_type(struct context_entry *context,
126 unsigned long value)
127 {
128 context->lo &= (((u64)-1) << 4) | 3;
129 context->lo |= (value & 3) << 2;
130 }
131
132 static inline void context_set_address_root(struct context_entry *context,
133 unsigned long value)
134 {
135 context->lo |= value & VTD_PAGE_MASK;
136 }
137
138 static inline void context_set_address_width(struct context_entry *context,
139 unsigned long value)
140 {
141 context->hi |= value & 7;
142 }
143
144 static inline void context_set_domain_id(struct context_entry *context,
145 unsigned long value)
146 {
147 context->hi |= (value & ((1 << 16) - 1)) << 8;
148 }
149
150 static inline void context_clear_entry(struct context_entry *context)
151 {
152 context->lo = 0;
153 context->hi = 0;
154 }
155
156 /*
157 * 0: readable
158 * 1: writable
159 * 2-6: reserved
160 * 7: super page
161 * 8-11: available
162 * 12-63: Host physcial address
163 */
164 struct dma_pte {
165 u64 val;
166 };
167
168 static inline void dma_clear_pte(struct dma_pte *pte)
169 {
170 pte->val = 0;
171 }
172
173 static inline void dma_set_pte_readable(struct dma_pte *pte)
174 {
175 pte->val |= DMA_PTE_READ;
176 }
177
178 static inline void dma_set_pte_writable(struct dma_pte *pte)
179 {
180 pte->val |= DMA_PTE_WRITE;
181 }
182
183 static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
184 {
185 pte->val = (pte->val & ~3) | (prot & 3);
186 }
187
188 static inline u64 dma_pte_addr(struct dma_pte *pte)
189 {
190 return (pte->val & VTD_PAGE_MASK);
191 }
192
193 static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr)
194 {
195 pte->val |= (addr & VTD_PAGE_MASK);
196 }
197
198 static inline bool dma_pte_present(struct dma_pte *pte)
199 {
200 return (pte->val & 3) != 0;
201 }
202
203 struct dmar_domain {
204 int id; /* domain id */
205 struct intel_iommu *iommu; /* back pointer to owning iommu */
206
207 struct list_head devices; /* all devices' list */
208 struct iova_domain iovad; /* iova's that belong to this domain */
209
210 struct dma_pte *pgd; /* virtual address */
211 spinlock_t mapping_lock; /* page table lock */
212 int gaw; /* max guest address width */
213
214 /* adjusted guest address width, 0 is level 2 30-bit */
215 int agaw;
216
217 #define DOMAIN_FLAG_MULTIPLE_DEVICES 1
218 int flags;
219 };
220
221 /* PCI domain-device relationship */
222 struct device_domain_info {
223 struct list_head link; /* link to domain siblings */
224 struct list_head global; /* link to global list */
225 u8 bus; /* PCI bus numer */
226 u8 devfn; /* PCI devfn number */
227 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
228 struct dmar_domain *domain; /* pointer to domain */
229 };
230
231 static void flush_unmaps_timeout(unsigned long data);
232
233 DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
234
235 #define HIGH_WATER_MARK 250
236 struct deferred_flush_tables {
237 int next;
238 struct iova *iova[HIGH_WATER_MARK];
239 struct dmar_domain *domain[HIGH_WATER_MARK];
240 };
241
242 static struct deferred_flush_tables *deferred_flush;
243
244 /* bitmap for indexing intel_iommus */
245 static int g_num_of_iommus;
246
247 static DEFINE_SPINLOCK(async_umap_flush_lock);
248 static LIST_HEAD(unmaps_to_do);
249
250 static int timer_on;
251 static long list_size;
252
253 static void domain_remove_dev_info(struct dmar_domain *domain);
254
255 int dmar_disabled;
256 static int __initdata dmar_map_gfx = 1;
257 static int dmar_forcedac;
258 static int intel_iommu_strict;
259
260 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
261 static DEFINE_SPINLOCK(device_domain_lock);
262 static LIST_HEAD(device_domain_list);
263
264 static int __init intel_iommu_setup(char *str)
265 {
266 if (!str)
267 return -EINVAL;
268 while (*str) {
269 if (!strncmp(str, "off", 3)) {
270 dmar_disabled = 1;
271 printk(KERN_INFO"Intel-IOMMU: disabled\n");
272 } else if (!strncmp(str, "igfx_off", 8)) {
273 dmar_map_gfx = 0;
274 printk(KERN_INFO
275 "Intel-IOMMU: disable GFX device mapping\n");
276 } else if (!strncmp(str, "forcedac", 8)) {
277 printk(KERN_INFO
278 "Intel-IOMMU: Forcing DAC for PCI devices\n");
279 dmar_forcedac = 1;
280 } else if (!strncmp(str, "strict", 6)) {
281 printk(KERN_INFO
282 "Intel-IOMMU: disable batched IOTLB flush\n");
283 intel_iommu_strict = 1;
284 }
285
286 str += strcspn(str, ",");
287 while (*str == ',')
288 str++;
289 }
290 return 0;
291 }
292 __setup("intel_iommu=", intel_iommu_setup);
293
294 static struct kmem_cache *iommu_domain_cache;
295 static struct kmem_cache *iommu_devinfo_cache;
296 static struct kmem_cache *iommu_iova_cache;
297
298 static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
299 {
300 unsigned int flags;
301 void *vaddr;
302
303 /* trying to avoid low memory issues */
304 flags = current->flags & PF_MEMALLOC;
305 current->flags |= PF_MEMALLOC;
306 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
307 current->flags &= (~PF_MEMALLOC | flags);
308 return vaddr;
309 }
310
311
312 static inline void *alloc_pgtable_page(void)
313 {
314 unsigned int flags;
315 void *vaddr;
316
317 /* trying to avoid low memory issues */
318 flags = current->flags & PF_MEMALLOC;
319 current->flags |= PF_MEMALLOC;
320 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
321 current->flags &= (~PF_MEMALLOC | flags);
322 return vaddr;
323 }
324
325 static inline void free_pgtable_page(void *vaddr)
326 {
327 free_page((unsigned long)vaddr);
328 }
329
330 static inline void *alloc_domain_mem(void)
331 {
332 return iommu_kmem_cache_alloc(iommu_domain_cache);
333 }
334
335 static void free_domain_mem(void *vaddr)
336 {
337 kmem_cache_free(iommu_domain_cache, vaddr);
338 }
339
340 static inline void * alloc_devinfo_mem(void)
341 {
342 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
343 }
344
345 static inline void free_devinfo_mem(void *vaddr)
346 {
347 kmem_cache_free(iommu_devinfo_cache, vaddr);
348 }
349
350 struct iova *alloc_iova_mem(void)
351 {
352 return iommu_kmem_cache_alloc(iommu_iova_cache);
353 }
354
355 void free_iova_mem(struct iova *iova)
356 {
357 kmem_cache_free(iommu_iova_cache, iova);
358 }
359
360 /* Gets context entry for a given bus and devfn */
361 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
362 u8 bus, u8 devfn)
363 {
364 struct root_entry *root;
365 struct context_entry *context;
366 unsigned long phy_addr;
367 unsigned long flags;
368
369 spin_lock_irqsave(&iommu->lock, flags);
370 root = &iommu->root_entry[bus];
371 context = get_context_addr_from_root(root);
372 if (!context) {
373 context = (struct context_entry *)alloc_pgtable_page();
374 if (!context) {
375 spin_unlock_irqrestore(&iommu->lock, flags);
376 return NULL;
377 }
378 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
379 phy_addr = virt_to_phys((void *)context);
380 set_root_value(root, phy_addr);
381 set_root_present(root);
382 __iommu_flush_cache(iommu, root, sizeof(*root));
383 }
384 spin_unlock_irqrestore(&iommu->lock, flags);
385 return &context[devfn];
386 }
387
388 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
389 {
390 struct root_entry *root;
391 struct context_entry *context;
392 int ret;
393 unsigned long flags;
394
395 spin_lock_irqsave(&iommu->lock, flags);
396 root = &iommu->root_entry[bus];
397 context = get_context_addr_from_root(root);
398 if (!context) {
399 ret = 0;
400 goto out;
401 }
402 ret = context_present(&context[devfn]);
403 out:
404 spin_unlock_irqrestore(&iommu->lock, flags);
405 return ret;
406 }
407
408 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
409 {
410 struct root_entry *root;
411 struct context_entry *context;
412 unsigned long flags;
413
414 spin_lock_irqsave(&iommu->lock, flags);
415 root = &iommu->root_entry[bus];
416 context = get_context_addr_from_root(root);
417 if (context) {
418 context_clear_entry(&context[devfn]);
419 __iommu_flush_cache(iommu, &context[devfn], \
420 sizeof(*context));
421 }
422 spin_unlock_irqrestore(&iommu->lock, flags);
423 }
424
425 static void free_context_table(struct intel_iommu *iommu)
426 {
427 struct root_entry *root;
428 int i;
429 unsigned long flags;
430 struct context_entry *context;
431
432 spin_lock_irqsave(&iommu->lock, flags);
433 if (!iommu->root_entry) {
434 goto out;
435 }
436 for (i = 0; i < ROOT_ENTRY_NR; i++) {
437 root = &iommu->root_entry[i];
438 context = get_context_addr_from_root(root);
439 if (context)
440 free_pgtable_page(context);
441 }
442 free_pgtable_page(iommu->root_entry);
443 iommu->root_entry = NULL;
444 out:
445 spin_unlock_irqrestore(&iommu->lock, flags);
446 }
447
448 /* page table handling */
449 #define LEVEL_STRIDE (9)
450 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
451
452 static inline int agaw_to_level(int agaw)
453 {
454 return agaw + 2;
455 }
456
457 static inline int agaw_to_width(int agaw)
458 {
459 return 30 + agaw * LEVEL_STRIDE;
460
461 }
462
463 static inline int width_to_agaw(int width)
464 {
465 return (width - 30) / LEVEL_STRIDE;
466 }
467
468 static inline unsigned int level_to_offset_bits(int level)
469 {
470 return (12 + (level - 1) * LEVEL_STRIDE);
471 }
472
473 static inline int address_level_offset(u64 addr, int level)
474 {
475 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
476 }
477
478 static inline u64 level_mask(int level)
479 {
480 return ((u64)-1 << level_to_offset_bits(level));
481 }
482
483 static inline u64 level_size(int level)
484 {
485 return ((u64)1 << level_to_offset_bits(level));
486 }
487
488 static inline u64 align_to_level(u64 addr, int level)
489 {
490 return ((addr + level_size(level) - 1) & level_mask(level));
491 }
492
493 static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
494 {
495 int addr_width = agaw_to_width(domain->agaw);
496 struct dma_pte *parent, *pte = NULL;
497 int level = agaw_to_level(domain->agaw);
498 int offset;
499 unsigned long flags;
500
501 BUG_ON(!domain->pgd);
502
503 addr &= (((u64)1) << addr_width) - 1;
504 parent = domain->pgd;
505
506 spin_lock_irqsave(&domain->mapping_lock, flags);
507 while (level > 0) {
508 void *tmp_page;
509
510 offset = address_level_offset(addr, level);
511 pte = &parent[offset];
512 if (level == 1)
513 break;
514
515 if (!dma_pte_present(pte)) {
516 tmp_page = alloc_pgtable_page();
517
518 if (!tmp_page) {
519 spin_unlock_irqrestore(&domain->mapping_lock,
520 flags);
521 return NULL;
522 }
523 __iommu_flush_cache(domain->iommu, tmp_page,
524 PAGE_SIZE);
525 dma_set_pte_addr(pte, virt_to_phys(tmp_page));
526 /*
527 * high level table always sets r/w, last level page
528 * table control read/write
529 */
530 dma_set_pte_readable(pte);
531 dma_set_pte_writable(pte);
532 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
533 }
534 parent = phys_to_virt(dma_pte_addr(pte));
535 level--;
536 }
537
538 spin_unlock_irqrestore(&domain->mapping_lock, flags);
539 return pte;
540 }
541
542 /* return address's pte at specific level */
543 static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
544 int level)
545 {
546 struct dma_pte *parent, *pte = NULL;
547 int total = agaw_to_level(domain->agaw);
548 int offset;
549
550 parent = domain->pgd;
551 while (level <= total) {
552 offset = address_level_offset(addr, total);
553 pte = &parent[offset];
554 if (level == total)
555 return pte;
556
557 if (!dma_pte_present(pte))
558 break;
559 parent = phys_to_virt(dma_pte_addr(pte));
560 total--;
561 }
562 return NULL;
563 }
564
565 /* clear one page's page table */
566 static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
567 {
568 struct dma_pte *pte = NULL;
569
570 /* get last level pte */
571 pte = dma_addr_level_pte(domain, addr, 1);
572
573 if (pte) {
574 dma_clear_pte(pte);
575 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
576 }
577 }
578
579 /* clear last level pte, a tlb flush should be followed */
580 static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
581 {
582 int addr_width = agaw_to_width(domain->agaw);
583
584 start &= (((u64)1) << addr_width) - 1;
585 end &= (((u64)1) << addr_width) - 1;
586 /* in case it's partial page */
587 start = PAGE_ALIGN(start);
588 end &= PAGE_MASK;
589
590 /* we don't need lock here, nobody else touches the iova range */
591 while (start < end) {
592 dma_pte_clear_one(domain, start);
593 start += VTD_PAGE_SIZE;
594 }
595 }
596
597 /* free page table pages. last level pte should already be cleared */
598 static void dma_pte_free_pagetable(struct dmar_domain *domain,
599 u64 start, u64 end)
600 {
601 int addr_width = agaw_to_width(domain->agaw);
602 struct dma_pte *pte;
603 int total = agaw_to_level(domain->agaw);
604 int level;
605 u64 tmp;
606
607 start &= (((u64)1) << addr_width) - 1;
608 end &= (((u64)1) << addr_width) - 1;
609
610 /* we don't need lock here, nobody else touches the iova range */
611 level = 2;
612 while (level <= total) {
613 tmp = align_to_level(start, level);
614 if (tmp >= end || (tmp + level_size(level) > end))
615 return;
616
617 while (tmp < end) {
618 pte = dma_addr_level_pte(domain, tmp, level);
619 if (pte) {
620 free_pgtable_page(
621 phys_to_virt(dma_pte_addr(pte)));
622 dma_clear_pte(pte);
623 __iommu_flush_cache(domain->iommu,
624 pte, sizeof(*pte));
625 }
626 tmp += level_size(level);
627 }
628 level++;
629 }
630 /* free pgd */
631 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
632 free_pgtable_page(domain->pgd);
633 domain->pgd = NULL;
634 }
635 }
636
637 /* iommu handling */
638 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
639 {
640 struct root_entry *root;
641 unsigned long flags;
642
643 root = (struct root_entry *)alloc_pgtable_page();
644 if (!root)
645 return -ENOMEM;
646
647 __iommu_flush_cache(iommu, root, ROOT_SIZE);
648
649 spin_lock_irqsave(&iommu->lock, flags);
650 iommu->root_entry = root;
651 spin_unlock_irqrestore(&iommu->lock, flags);
652
653 return 0;
654 }
655
656 static void iommu_set_root_entry(struct intel_iommu *iommu)
657 {
658 void *addr;
659 u32 cmd, sts;
660 unsigned long flag;
661
662 addr = iommu->root_entry;
663
664 spin_lock_irqsave(&iommu->register_lock, flag);
665 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
666
667 cmd = iommu->gcmd | DMA_GCMD_SRTP;
668 writel(cmd, iommu->reg + DMAR_GCMD_REG);
669
670 /* Make sure hardware complete it */
671 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
672 readl, (sts & DMA_GSTS_RTPS), sts);
673
674 spin_unlock_irqrestore(&iommu->register_lock, flag);
675 }
676
677 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
678 {
679 u32 val;
680 unsigned long flag;
681
682 if (!cap_rwbf(iommu->cap))
683 return;
684 val = iommu->gcmd | DMA_GCMD_WBF;
685
686 spin_lock_irqsave(&iommu->register_lock, flag);
687 writel(val, iommu->reg + DMAR_GCMD_REG);
688
689 /* Make sure hardware complete it */
690 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
691 readl, (!(val & DMA_GSTS_WBFS)), val);
692
693 spin_unlock_irqrestore(&iommu->register_lock, flag);
694 }
695
696 /* return value determine if we need a write buffer flush */
697 static int __iommu_flush_context(struct intel_iommu *iommu,
698 u16 did, u16 source_id, u8 function_mask, u64 type,
699 int non_present_entry_flush)
700 {
701 u64 val = 0;
702 unsigned long flag;
703
704 /*
705 * In the non-present entry flush case, if hardware doesn't cache
706 * non-present entry we do nothing and if hardware cache non-present
707 * entry, we flush entries of domain 0 (the domain id is used to cache
708 * any non-present entries)
709 */
710 if (non_present_entry_flush) {
711 if (!cap_caching_mode(iommu->cap))
712 return 1;
713 else
714 did = 0;
715 }
716
717 switch (type) {
718 case DMA_CCMD_GLOBAL_INVL:
719 val = DMA_CCMD_GLOBAL_INVL;
720 break;
721 case DMA_CCMD_DOMAIN_INVL:
722 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
723 break;
724 case DMA_CCMD_DEVICE_INVL:
725 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
726 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
727 break;
728 default:
729 BUG();
730 }
731 val |= DMA_CCMD_ICC;
732
733 spin_lock_irqsave(&iommu->register_lock, flag);
734 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
735
736 /* Make sure hardware complete it */
737 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
738 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
739
740 spin_unlock_irqrestore(&iommu->register_lock, flag);
741
742 /* flush context entry will implicitly flush write buffer */
743 return 0;
744 }
745
746 /* return value determine if we need a write buffer flush */
747 static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
748 u64 addr, unsigned int size_order, u64 type,
749 int non_present_entry_flush)
750 {
751 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
752 u64 val = 0, val_iva = 0;
753 unsigned long flag;
754
755 /*
756 * In the non-present entry flush case, if hardware doesn't cache
757 * non-present entry we do nothing and if hardware cache non-present
758 * entry, we flush entries of domain 0 (the domain id is used to cache
759 * any non-present entries)
760 */
761 if (non_present_entry_flush) {
762 if (!cap_caching_mode(iommu->cap))
763 return 1;
764 else
765 did = 0;
766 }
767
768 switch (type) {
769 case DMA_TLB_GLOBAL_FLUSH:
770 /* global flush doesn't need set IVA_REG */
771 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
772 break;
773 case DMA_TLB_DSI_FLUSH:
774 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
775 break;
776 case DMA_TLB_PSI_FLUSH:
777 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
778 /* Note: always flush non-leaf currently */
779 val_iva = size_order | addr;
780 break;
781 default:
782 BUG();
783 }
784 /* Note: set drain read/write */
785 #if 0
786 /*
787 * This is probably to be super secure.. Looks like we can
788 * ignore it without any impact.
789 */
790 if (cap_read_drain(iommu->cap))
791 val |= DMA_TLB_READ_DRAIN;
792 #endif
793 if (cap_write_drain(iommu->cap))
794 val |= DMA_TLB_WRITE_DRAIN;
795
796 spin_lock_irqsave(&iommu->register_lock, flag);
797 /* Note: Only uses first TLB reg currently */
798 if (val_iva)
799 dmar_writeq(iommu->reg + tlb_offset, val_iva);
800 dmar_writeq(iommu->reg + tlb_offset + 8, val);
801
802 /* Make sure hardware complete it */
803 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
804 dmar_readq, (!(val & DMA_TLB_IVT)), val);
805
806 spin_unlock_irqrestore(&iommu->register_lock, flag);
807
808 /* check IOTLB invalidation granularity */
809 if (DMA_TLB_IAIG(val) == 0)
810 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
811 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
812 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
813 (unsigned long long)DMA_TLB_IIRG(type),
814 (unsigned long long)DMA_TLB_IAIG(val));
815 /* flush iotlb entry will implicitly flush write buffer */
816 return 0;
817 }
818
819 static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
820 u64 addr, unsigned int pages, int non_present_entry_flush)
821 {
822 unsigned int mask;
823
824 BUG_ON(addr & (~VTD_PAGE_MASK));
825 BUG_ON(pages == 0);
826
827 /* Fallback to domain selective flush if no PSI support */
828 if (!cap_pgsel_inv(iommu->cap))
829 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
830 DMA_TLB_DSI_FLUSH,
831 non_present_entry_flush);
832
833 /*
834 * PSI requires page size to be 2 ^ x, and the base address is naturally
835 * aligned to the size
836 */
837 mask = ilog2(__roundup_pow_of_two(pages));
838 /* Fallback to domain selective flush if size is too big */
839 if (mask > cap_max_amask_val(iommu->cap))
840 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
841 DMA_TLB_DSI_FLUSH, non_present_entry_flush);
842
843 return iommu->flush.flush_iotlb(iommu, did, addr, mask,
844 DMA_TLB_PSI_FLUSH,
845 non_present_entry_flush);
846 }
847
848 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
849 {
850 u32 pmen;
851 unsigned long flags;
852
853 spin_lock_irqsave(&iommu->register_lock, flags);
854 pmen = readl(iommu->reg + DMAR_PMEN_REG);
855 pmen &= ~DMA_PMEN_EPM;
856 writel(pmen, iommu->reg + DMAR_PMEN_REG);
857
858 /* wait for the protected region status bit to clear */
859 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
860 readl, !(pmen & DMA_PMEN_PRS), pmen);
861
862 spin_unlock_irqrestore(&iommu->register_lock, flags);
863 }
864
865 static int iommu_enable_translation(struct intel_iommu *iommu)
866 {
867 u32 sts;
868 unsigned long flags;
869
870 spin_lock_irqsave(&iommu->register_lock, flags);
871 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
872
873 /* Make sure hardware complete it */
874 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
875 readl, (sts & DMA_GSTS_TES), sts);
876
877 iommu->gcmd |= DMA_GCMD_TE;
878 spin_unlock_irqrestore(&iommu->register_lock, flags);
879 return 0;
880 }
881
882 static int iommu_disable_translation(struct intel_iommu *iommu)
883 {
884 u32 sts;
885 unsigned long flag;
886
887 spin_lock_irqsave(&iommu->register_lock, flag);
888 iommu->gcmd &= ~DMA_GCMD_TE;
889 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
890
891 /* Make sure hardware complete it */
892 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
893 readl, (!(sts & DMA_GSTS_TES)), sts);
894
895 spin_unlock_irqrestore(&iommu->register_lock, flag);
896 return 0;
897 }
898
899 /* iommu interrupt handling. Most stuff are MSI-like. */
900
901 static const char *fault_reason_strings[] =
902 {
903 "Software",
904 "Present bit in root entry is clear",
905 "Present bit in context entry is clear",
906 "Invalid context entry",
907 "Access beyond MGAW",
908 "PTE Write access is not set",
909 "PTE Read access is not set",
910 "Next page table ptr is invalid",
911 "Root table address invalid",
912 "Context table ptr is invalid",
913 "non-zero reserved fields in RTP",
914 "non-zero reserved fields in CTP",
915 "non-zero reserved fields in PTE",
916 };
917 #define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
918
919 const char *dmar_get_fault_reason(u8 fault_reason)
920 {
921 if (fault_reason > MAX_FAULT_REASON_IDX)
922 return "Unknown";
923 else
924 return fault_reason_strings[fault_reason];
925 }
926
927 void dmar_msi_unmask(unsigned int irq)
928 {
929 struct intel_iommu *iommu = get_irq_data(irq);
930 unsigned long flag;
931
932 /* unmask it */
933 spin_lock_irqsave(&iommu->register_lock, flag);
934 writel(0, iommu->reg + DMAR_FECTL_REG);
935 /* Read a reg to force flush the post write */
936 readl(iommu->reg + DMAR_FECTL_REG);
937 spin_unlock_irqrestore(&iommu->register_lock, flag);
938 }
939
940 void dmar_msi_mask(unsigned int irq)
941 {
942 unsigned long flag;
943 struct intel_iommu *iommu = get_irq_data(irq);
944
945 /* mask it */
946 spin_lock_irqsave(&iommu->register_lock, flag);
947 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
948 /* Read a reg to force flush the post write */
949 readl(iommu->reg + DMAR_FECTL_REG);
950 spin_unlock_irqrestore(&iommu->register_lock, flag);
951 }
952
953 void dmar_msi_write(int irq, struct msi_msg *msg)
954 {
955 struct intel_iommu *iommu = get_irq_data(irq);
956 unsigned long flag;
957
958 spin_lock_irqsave(&iommu->register_lock, flag);
959 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
960 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
961 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
962 spin_unlock_irqrestore(&iommu->register_lock, flag);
963 }
964
965 void dmar_msi_read(int irq, struct msi_msg *msg)
966 {
967 struct intel_iommu *iommu = get_irq_data(irq);
968 unsigned long flag;
969
970 spin_lock_irqsave(&iommu->register_lock, flag);
971 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
972 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
973 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
974 spin_unlock_irqrestore(&iommu->register_lock, flag);
975 }
976
977 static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
978 u8 fault_reason, u16 source_id, unsigned long long addr)
979 {
980 const char *reason;
981
982 reason = dmar_get_fault_reason(fault_reason);
983
984 printk(KERN_ERR
985 "DMAR:[%s] Request device [%02x:%02x.%d] "
986 "fault addr %llx \n"
987 "DMAR:[fault reason %02d] %s\n",
988 (type ? "DMA Read" : "DMA Write"),
989 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
990 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
991 return 0;
992 }
993
994 #define PRIMARY_FAULT_REG_LEN (16)
995 static irqreturn_t iommu_page_fault(int irq, void *dev_id)
996 {
997 struct intel_iommu *iommu = dev_id;
998 int reg, fault_index;
999 u32 fault_status;
1000 unsigned long flag;
1001
1002 spin_lock_irqsave(&iommu->register_lock, flag);
1003 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1004
1005 /* TBD: ignore advanced fault log currently */
1006 if (!(fault_status & DMA_FSTS_PPF))
1007 goto clear_overflow;
1008
1009 fault_index = dma_fsts_fault_record_index(fault_status);
1010 reg = cap_fault_reg_offset(iommu->cap);
1011 while (1) {
1012 u8 fault_reason;
1013 u16 source_id;
1014 u64 guest_addr;
1015 int type;
1016 u32 data;
1017
1018 /* highest 32 bits */
1019 data = readl(iommu->reg + reg +
1020 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1021 if (!(data & DMA_FRCD_F))
1022 break;
1023
1024 fault_reason = dma_frcd_fault_reason(data);
1025 type = dma_frcd_type(data);
1026
1027 data = readl(iommu->reg + reg +
1028 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1029 source_id = dma_frcd_source_id(data);
1030
1031 guest_addr = dmar_readq(iommu->reg + reg +
1032 fault_index * PRIMARY_FAULT_REG_LEN);
1033 guest_addr = dma_frcd_page_addr(guest_addr);
1034 /* clear the fault */
1035 writel(DMA_FRCD_F, iommu->reg + reg +
1036 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1037
1038 spin_unlock_irqrestore(&iommu->register_lock, flag);
1039
1040 iommu_page_fault_do_one(iommu, type, fault_reason,
1041 source_id, guest_addr);
1042
1043 fault_index++;
1044 if (fault_index > cap_num_fault_regs(iommu->cap))
1045 fault_index = 0;
1046 spin_lock_irqsave(&iommu->register_lock, flag);
1047 }
1048 clear_overflow:
1049 /* clear primary fault overflow */
1050 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1051 if (fault_status & DMA_FSTS_PFO)
1052 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
1053
1054 spin_unlock_irqrestore(&iommu->register_lock, flag);
1055 return IRQ_HANDLED;
1056 }
1057
1058 int dmar_set_interrupt(struct intel_iommu *iommu)
1059 {
1060 int irq, ret;
1061
1062 irq = create_irq();
1063 if (!irq) {
1064 printk(KERN_ERR "IOMMU: no free vectors\n");
1065 return -EINVAL;
1066 }
1067
1068 set_irq_data(irq, iommu);
1069 iommu->irq = irq;
1070
1071 ret = arch_setup_dmar_msi(irq);
1072 if (ret) {
1073 set_irq_data(irq, NULL);
1074 iommu->irq = 0;
1075 destroy_irq(irq);
1076 return 0;
1077 }
1078
1079 /* Force fault register is cleared */
1080 iommu_page_fault(irq, iommu);
1081
1082 ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
1083 if (ret)
1084 printk(KERN_ERR "IOMMU: can't request irq\n");
1085 return ret;
1086 }
1087
1088 static int iommu_init_domains(struct intel_iommu *iommu)
1089 {
1090 unsigned long ndomains;
1091 unsigned long nlongs;
1092
1093 ndomains = cap_ndoms(iommu->cap);
1094 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
1095 nlongs = BITS_TO_LONGS(ndomains);
1096
1097 /* TBD: there might be 64K domains,
1098 * consider other allocation for future chip
1099 */
1100 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1101 if (!iommu->domain_ids) {
1102 printk(KERN_ERR "Allocating domain id array failed\n");
1103 return -ENOMEM;
1104 }
1105 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1106 GFP_KERNEL);
1107 if (!iommu->domains) {
1108 printk(KERN_ERR "Allocating domain array failed\n");
1109 kfree(iommu->domain_ids);
1110 return -ENOMEM;
1111 }
1112
1113 spin_lock_init(&iommu->lock);
1114
1115 /*
1116 * if Caching mode is set, then invalid translations are tagged
1117 * with domainid 0. Hence we need to pre-allocate it.
1118 */
1119 if (cap_caching_mode(iommu->cap))
1120 set_bit(0, iommu->domain_ids);
1121 return 0;
1122 }
1123
1124
1125 static void domain_exit(struct dmar_domain *domain);
1126
1127 void free_dmar_iommu(struct intel_iommu *iommu)
1128 {
1129 struct dmar_domain *domain;
1130 int i;
1131
1132 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1133 for (; i < cap_ndoms(iommu->cap); ) {
1134 domain = iommu->domains[i];
1135 clear_bit(i, iommu->domain_ids);
1136 domain_exit(domain);
1137 i = find_next_bit(iommu->domain_ids,
1138 cap_ndoms(iommu->cap), i+1);
1139 }
1140
1141 if (iommu->gcmd & DMA_GCMD_TE)
1142 iommu_disable_translation(iommu);
1143
1144 if (iommu->irq) {
1145 set_irq_data(iommu->irq, NULL);
1146 /* This will mask the irq */
1147 free_irq(iommu->irq, iommu);
1148 destroy_irq(iommu->irq);
1149 }
1150
1151 kfree(iommu->domains);
1152 kfree(iommu->domain_ids);
1153
1154 /* free context mapping */
1155 free_context_table(iommu);
1156 }
1157
1158 static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1159 {
1160 unsigned long num;
1161 unsigned long ndomains;
1162 struct dmar_domain *domain;
1163 unsigned long flags;
1164
1165 domain = alloc_domain_mem();
1166 if (!domain)
1167 return NULL;
1168
1169 ndomains = cap_ndoms(iommu->cap);
1170
1171 spin_lock_irqsave(&iommu->lock, flags);
1172 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1173 if (num >= ndomains) {
1174 spin_unlock_irqrestore(&iommu->lock, flags);
1175 free_domain_mem(domain);
1176 printk(KERN_ERR "IOMMU: no free domain ids\n");
1177 return NULL;
1178 }
1179
1180 set_bit(num, iommu->domain_ids);
1181 domain->id = num;
1182 domain->iommu = iommu;
1183 domain->flags = 0;
1184 iommu->domains[num] = domain;
1185 spin_unlock_irqrestore(&iommu->lock, flags);
1186
1187 return domain;
1188 }
1189
1190 static void iommu_free_domain(struct dmar_domain *domain)
1191 {
1192 unsigned long flags;
1193
1194 spin_lock_irqsave(&domain->iommu->lock, flags);
1195 clear_bit(domain->id, domain->iommu->domain_ids);
1196 spin_unlock_irqrestore(&domain->iommu->lock, flags);
1197 }
1198
1199 static struct iova_domain reserved_iova_list;
1200 static struct lock_class_key reserved_alloc_key;
1201 static struct lock_class_key reserved_rbtree_key;
1202
1203 static void dmar_init_reserved_ranges(void)
1204 {
1205 struct pci_dev *pdev = NULL;
1206 struct iova *iova;
1207 int i;
1208 u64 addr, size;
1209
1210 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1211
1212 lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
1213 &reserved_alloc_key);
1214 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1215 &reserved_rbtree_key);
1216
1217 /* IOAPIC ranges shouldn't be accessed by DMA */
1218 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1219 IOVA_PFN(IOAPIC_RANGE_END));
1220 if (!iova)
1221 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1222
1223 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1224 for_each_pci_dev(pdev) {
1225 struct resource *r;
1226
1227 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1228 r = &pdev->resource[i];
1229 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1230 continue;
1231 addr = r->start;
1232 addr &= PAGE_MASK;
1233 size = r->end - addr;
1234 size = PAGE_ALIGN(size);
1235 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1236 IOVA_PFN(size + addr) - 1);
1237 if (!iova)
1238 printk(KERN_ERR "Reserve iova failed\n");
1239 }
1240 }
1241
1242 }
1243
1244 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1245 {
1246 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1247 }
1248
1249 static inline int guestwidth_to_adjustwidth(int gaw)
1250 {
1251 int agaw;
1252 int r = (gaw - 12) % 9;
1253
1254 if (r == 0)
1255 agaw = gaw;
1256 else
1257 agaw = gaw + 9 - r;
1258 if (agaw > 64)
1259 agaw = 64;
1260 return agaw;
1261 }
1262
1263 static int domain_init(struct dmar_domain *domain, int guest_width)
1264 {
1265 struct intel_iommu *iommu;
1266 int adjust_width, agaw;
1267 unsigned long sagaw;
1268
1269 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1270 spin_lock_init(&domain->mapping_lock);
1271
1272 domain_reserve_special_ranges(domain);
1273
1274 /* calculate AGAW */
1275 iommu = domain->iommu;
1276 if (guest_width > cap_mgaw(iommu->cap))
1277 guest_width = cap_mgaw(iommu->cap);
1278 domain->gaw = guest_width;
1279 adjust_width = guestwidth_to_adjustwidth(guest_width);
1280 agaw = width_to_agaw(adjust_width);
1281 sagaw = cap_sagaw(iommu->cap);
1282 if (!test_bit(agaw, &sagaw)) {
1283 /* hardware doesn't support it, choose a bigger one */
1284 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1285 agaw = find_next_bit(&sagaw, 5, agaw);
1286 if (agaw >= 5)
1287 return -ENODEV;
1288 }
1289 domain->agaw = agaw;
1290 INIT_LIST_HEAD(&domain->devices);
1291
1292 /* always allocate the top pgd */
1293 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1294 if (!domain->pgd)
1295 return -ENOMEM;
1296 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1297 return 0;
1298 }
1299
1300 static void domain_exit(struct dmar_domain *domain)
1301 {
1302 u64 end;
1303
1304 /* Domain 0 is reserved, so dont process it */
1305 if (!domain)
1306 return;
1307
1308 domain_remove_dev_info(domain);
1309 /* destroy iovas */
1310 put_iova_domain(&domain->iovad);
1311 end = DOMAIN_MAX_ADDR(domain->gaw);
1312 end = end & (~PAGE_MASK);
1313
1314 /* clear ptes */
1315 dma_pte_clear_range(domain, 0, end);
1316
1317 /* free page tables */
1318 dma_pte_free_pagetable(domain, 0, end);
1319
1320 iommu_free_domain(domain);
1321 free_domain_mem(domain);
1322 }
1323
1324 static int domain_context_mapping_one(struct dmar_domain *domain,
1325 u8 bus, u8 devfn)
1326 {
1327 struct context_entry *context;
1328 struct intel_iommu *iommu = domain->iommu;
1329 unsigned long flags;
1330
1331 pr_debug("Set context mapping for %02x:%02x.%d\n",
1332 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1333 BUG_ON(!domain->pgd);
1334 context = device_to_context_entry(iommu, bus, devfn);
1335 if (!context)
1336 return -ENOMEM;
1337 spin_lock_irqsave(&iommu->lock, flags);
1338 if (context_present(context)) {
1339 spin_unlock_irqrestore(&iommu->lock, flags);
1340 return 0;
1341 }
1342
1343 context_set_domain_id(context, domain->id);
1344 context_set_address_width(context, domain->agaw);
1345 context_set_address_root(context, virt_to_phys(domain->pgd));
1346 context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL);
1347 context_set_fault_enable(context);
1348 context_set_present(context);
1349 __iommu_flush_cache(iommu, context, sizeof(*context));
1350
1351 /* it's a non-present to present mapping */
1352 if (iommu->flush.flush_context(iommu, domain->id,
1353 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT,
1354 DMA_CCMD_DEVICE_INVL, 1))
1355 iommu_flush_write_buffer(iommu);
1356 else
1357 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
1358
1359 spin_unlock_irqrestore(&iommu->lock, flags);
1360 return 0;
1361 }
1362
1363 static int
1364 domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1365 {
1366 int ret;
1367 struct pci_dev *tmp, *parent;
1368
1369 ret = domain_context_mapping_one(domain, pdev->bus->number,
1370 pdev->devfn);
1371 if (ret)
1372 return ret;
1373
1374 /* dependent device mapping */
1375 tmp = pci_find_upstream_pcie_bridge(pdev);
1376 if (!tmp)
1377 return 0;
1378 /* Secondary interface's bus number and devfn 0 */
1379 parent = pdev->bus->self;
1380 while (parent != tmp) {
1381 ret = domain_context_mapping_one(domain, parent->bus->number,
1382 parent->devfn);
1383 if (ret)
1384 return ret;
1385 parent = parent->bus->self;
1386 }
1387 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1388 return domain_context_mapping_one(domain,
1389 tmp->subordinate->number, 0);
1390 else /* this is a legacy PCI bridge */
1391 return domain_context_mapping_one(domain,
1392 tmp->bus->number, tmp->devfn);
1393 }
1394
1395 static int domain_context_mapped(struct dmar_domain *domain,
1396 struct pci_dev *pdev)
1397 {
1398 int ret;
1399 struct pci_dev *tmp, *parent;
1400
1401 ret = device_context_mapped(domain->iommu,
1402 pdev->bus->number, pdev->devfn);
1403 if (!ret)
1404 return ret;
1405 /* dependent device mapping */
1406 tmp = pci_find_upstream_pcie_bridge(pdev);
1407 if (!tmp)
1408 return ret;
1409 /* Secondary interface's bus number and devfn 0 */
1410 parent = pdev->bus->self;
1411 while (parent != tmp) {
1412 ret = device_context_mapped(domain->iommu, parent->bus->number,
1413 parent->devfn);
1414 if (!ret)
1415 return ret;
1416 parent = parent->bus->self;
1417 }
1418 if (tmp->is_pcie)
1419 return device_context_mapped(domain->iommu,
1420 tmp->subordinate->number, 0);
1421 else
1422 return device_context_mapped(domain->iommu,
1423 tmp->bus->number, tmp->devfn);
1424 }
1425
1426 static int
1427 domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1428 u64 hpa, size_t size, int prot)
1429 {
1430 u64 start_pfn, end_pfn;
1431 struct dma_pte *pte;
1432 int index;
1433 int addr_width = agaw_to_width(domain->agaw);
1434
1435 hpa &= (((u64)1) << addr_width) - 1;
1436
1437 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1438 return -EINVAL;
1439 iova &= PAGE_MASK;
1440 start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT;
1441 end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT;
1442 index = 0;
1443 while (start_pfn < end_pfn) {
1444 pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index);
1445 if (!pte)
1446 return -ENOMEM;
1447 /* We don't need lock here, nobody else
1448 * touches the iova range
1449 */
1450 BUG_ON(dma_pte_addr(pte));
1451 dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT);
1452 dma_set_pte_prot(pte, prot);
1453 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
1454 start_pfn++;
1455 index++;
1456 }
1457 return 0;
1458 }
1459
1460 static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
1461 {
1462 clear_context_table(domain->iommu, bus, devfn);
1463 domain->iommu->flush.flush_context(domain->iommu, 0, 0, 0,
1464 DMA_CCMD_GLOBAL_INVL, 0);
1465 domain->iommu->flush.flush_iotlb(domain->iommu, 0, 0, 0,
1466 DMA_TLB_GLOBAL_FLUSH, 0);
1467 }
1468
1469 static void domain_remove_dev_info(struct dmar_domain *domain)
1470 {
1471 struct device_domain_info *info;
1472 unsigned long flags;
1473
1474 spin_lock_irqsave(&device_domain_lock, flags);
1475 while (!list_empty(&domain->devices)) {
1476 info = list_entry(domain->devices.next,
1477 struct device_domain_info, link);
1478 list_del(&info->link);
1479 list_del(&info->global);
1480 if (info->dev)
1481 info->dev->dev.archdata.iommu = NULL;
1482 spin_unlock_irqrestore(&device_domain_lock, flags);
1483
1484 detach_domain_for_dev(info->domain, info->bus, info->devfn);
1485 free_devinfo_mem(info);
1486
1487 spin_lock_irqsave(&device_domain_lock, flags);
1488 }
1489 spin_unlock_irqrestore(&device_domain_lock, flags);
1490 }
1491
1492 /*
1493 * find_domain
1494 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1495 */
1496 static struct dmar_domain *
1497 find_domain(struct pci_dev *pdev)
1498 {
1499 struct device_domain_info *info;
1500
1501 /* No lock here, assumes no domain exit in normal case */
1502 info = pdev->dev.archdata.iommu;
1503 if (info)
1504 return info->domain;
1505 return NULL;
1506 }
1507
1508 /* domain is initialized */
1509 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1510 {
1511 struct dmar_domain *domain, *found = NULL;
1512 struct intel_iommu *iommu;
1513 struct dmar_drhd_unit *drhd;
1514 struct device_domain_info *info, *tmp;
1515 struct pci_dev *dev_tmp;
1516 unsigned long flags;
1517 int bus = 0, devfn = 0;
1518
1519 domain = find_domain(pdev);
1520 if (domain)
1521 return domain;
1522
1523 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1524 if (dev_tmp) {
1525 if (dev_tmp->is_pcie) {
1526 bus = dev_tmp->subordinate->number;
1527 devfn = 0;
1528 } else {
1529 bus = dev_tmp->bus->number;
1530 devfn = dev_tmp->devfn;
1531 }
1532 spin_lock_irqsave(&device_domain_lock, flags);
1533 list_for_each_entry(info, &device_domain_list, global) {
1534 if (info->bus == bus && info->devfn == devfn) {
1535 found = info->domain;
1536 break;
1537 }
1538 }
1539 spin_unlock_irqrestore(&device_domain_lock, flags);
1540 /* pcie-pci bridge already has a domain, uses it */
1541 if (found) {
1542 domain = found;
1543 goto found_domain;
1544 }
1545 }
1546
1547 /* Allocate new domain for the device */
1548 drhd = dmar_find_matched_drhd_unit(pdev);
1549 if (!drhd) {
1550 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1551 pci_name(pdev));
1552 return NULL;
1553 }
1554 iommu = drhd->iommu;
1555
1556 domain = iommu_alloc_domain(iommu);
1557 if (!domain)
1558 goto error;
1559
1560 if (domain_init(domain, gaw)) {
1561 domain_exit(domain);
1562 goto error;
1563 }
1564
1565 /* register pcie-to-pci device */
1566 if (dev_tmp) {
1567 info = alloc_devinfo_mem();
1568 if (!info) {
1569 domain_exit(domain);
1570 goto error;
1571 }
1572 info->bus = bus;
1573 info->devfn = devfn;
1574 info->dev = NULL;
1575 info->domain = domain;
1576 /* This domain is shared by devices under p2p bridge */
1577 domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES;
1578
1579 /* pcie-to-pci bridge already has a domain, uses it */
1580 found = NULL;
1581 spin_lock_irqsave(&device_domain_lock, flags);
1582 list_for_each_entry(tmp, &device_domain_list, global) {
1583 if (tmp->bus == bus && tmp->devfn == devfn) {
1584 found = tmp->domain;
1585 break;
1586 }
1587 }
1588 if (found) {
1589 free_devinfo_mem(info);
1590 domain_exit(domain);
1591 domain = found;
1592 } else {
1593 list_add(&info->link, &domain->devices);
1594 list_add(&info->global, &device_domain_list);
1595 }
1596 spin_unlock_irqrestore(&device_domain_lock, flags);
1597 }
1598
1599 found_domain:
1600 info = alloc_devinfo_mem();
1601 if (!info)
1602 goto error;
1603 info->bus = pdev->bus->number;
1604 info->devfn = pdev->devfn;
1605 info->dev = pdev;
1606 info->domain = domain;
1607 spin_lock_irqsave(&device_domain_lock, flags);
1608 /* somebody is fast */
1609 found = find_domain(pdev);
1610 if (found != NULL) {
1611 spin_unlock_irqrestore(&device_domain_lock, flags);
1612 if (found != domain) {
1613 domain_exit(domain);
1614 domain = found;
1615 }
1616 free_devinfo_mem(info);
1617 return domain;
1618 }
1619 list_add(&info->link, &domain->devices);
1620 list_add(&info->global, &device_domain_list);
1621 pdev->dev.archdata.iommu = info;
1622 spin_unlock_irqrestore(&device_domain_lock, flags);
1623 return domain;
1624 error:
1625 /* recheck it here, maybe others set it */
1626 return find_domain(pdev);
1627 }
1628
1629 static int iommu_prepare_identity_map(struct pci_dev *pdev,
1630 unsigned long long start,
1631 unsigned long long end)
1632 {
1633 struct dmar_domain *domain;
1634 unsigned long size;
1635 unsigned long long base;
1636 int ret;
1637
1638 printk(KERN_INFO
1639 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1640 pci_name(pdev), start, end);
1641 /* page table init */
1642 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1643 if (!domain)
1644 return -ENOMEM;
1645
1646 /* The address might not be aligned */
1647 base = start & PAGE_MASK;
1648 size = end - base;
1649 size = PAGE_ALIGN(size);
1650 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1651 IOVA_PFN(base + size) - 1)) {
1652 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1653 ret = -ENOMEM;
1654 goto error;
1655 }
1656
1657 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1658 size, base, pci_name(pdev));
1659 /*
1660 * RMRR range might have overlap with physical memory range,
1661 * clear it first
1662 */
1663 dma_pte_clear_range(domain, base, base + size);
1664
1665 ret = domain_page_mapping(domain, base, base, size,
1666 DMA_PTE_READ|DMA_PTE_WRITE);
1667 if (ret)
1668 goto error;
1669
1670 /* context entry init */
1671 ret = domain_context_mapping(domain, pdev);
1672 if (!ret)
1673 return 0;
1674 error:
1675 domain_exit(domain);
1676 return ret;
1677
1678 }
1679
1680 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1681 struct pci_dev *pdev)
1682 {
1683 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1684 return 0;
1685 return iommu_prepare_identity_map(pdev, rmrr->base_address,
1686 rmrr->end_address + 1);
1687 }
1688
1689 #ifdef CONFIG_DMAR_GFX_WA
1690 struct iommu_prepare_data {
1691 struct pci_dev *pdev;
1692 int ret;
1693 };
1694
1695 static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1696 unsigned long end_pfn, void *datax)
1697 {
1698 struct iommu_prepare_data *data;
1699
1700 data = (struct iommu_prepare_data *)datax;
1701
1702 data->ret = iommu_prepare_identity_map(data->pdev,
1703 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1704 return data->ret;
1705
1706 }
1707
1708 static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1709 {
1710 int nid;
1711 struct iommu_prepare_data data;
1712
1713 data.pdev = pdev;
1714 data.ret = 0;
1715
1716 for_each_online_node(nid) {
1717 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1718 if (data.ret)
1719 return data.ret;
1720 }
1721 return data.ret;
1722 }
1723
1724 static void __init iommu_prepare_gfx_mapping(void)
1725 {
1726 struct pci_dev *pdev = NULL;
1727 int ret;
1728
1729 for_each_pci_dev(pdev) {
1730 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
1731 !IS_GFX_DEVICE(pdev))
1732 continue;
1733 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1734 pci_name(pdev));
1735 ret = iommu_prepare_with_active_regions(pdev);
1736 if (ret)
1737 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1738 }
1739 }
1740 #else /* !CONFIG_DMAR_GFX_WA */
1741 static inline void iommu_prepare_gfx_mapping(void)
1742 {
1743 return;
1744 }
1745 #endif
1746
1747 #ifdef CONFIG_DMAR_FLOPPY_WA
1748 static inline void iommu_prepare_isa(void)
1749 {
1750 struct pci_dev *pdev;
1751 int ret;
1752
1753 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1754 if (!pdev)
1755 return;
1756
1757 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1758 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1759
1760 if (ret)
1761 printk("IOMMU: Failed to create 0-64M identity map, "
1762 "floppy might not work\n");
1763
1764 }
1765 #else
1766 static inline void iommu_prepare_isa(void)
1767 {
1768 return;
1769 }
1770 #endif /* !CONFIG_DMAR_FLPY_WA */
1771
1772 static int __init init_dmars(void)
1773 {
1774 struct dmar_drhd_unit *drhd;
1775 struct dmar_rmrr_unit *rmrr;
1776 struct pci_dev *pdev;
1777 struct intel_iommu *iommu;
1778 int i, ret, unit = 0;
1779
1780 /*
1781 * for each drhd
1782 * allocate root
1783 * initialize and program root entry to not present
1784 * endfor
1785 */
1786 for_each_drhd_unit(drhd) {
1787 g_num_of_iommus++;
1788 /*
1789 * lock not needed as this is only incremented in the single
1790 * threaded kernel __init code path all other access are read
1791 * only
1792 */
1793 }
1794
1795 deferred_flush = kzalloc(g_num_of_iommus *
1796 sizeof(struct deferred_flush_tables), GFP_KERNEL);
1797 if (!deferred_flush) {
1798 ret = -ENOMEM;
1799 goto error;
1800 }
1801
1802 for_each_drhd_unit(drhd) {
1803 if (drhd->ignored)
1804 continue;
1805
1806 iommu = drhd->iommu;
1807
1808 ret = iommu_init_domains(iommu);
1809 if (ret)
1810 goto error;
1811
1812 /*
1813 * TBD:
1814 * we could share the same root & context tables
1815 * amoung all IOMMU's. Need to Split it later.
1816 */
1817 ret = iommu_alloc_root_entry(iommu);
1818 if (ret) {
1819 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1820 goto error;
1821 }
1822 }
1823
1824 for_each_drhd_unit(drhd) {
1825 if (drhd->ignored)
1826 continue;
1827
1828 iommu = drhd->iommu;
1829 if (dmar_enable_qi(iommu)) {
1830 /*
1831 * Queued Invalidate not enabled, use Register Based
1832 * Invalidate
1833 */
1834 iommu->flush.flush_context = __iommu_flush_context;
1835 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
1836 printk(KERN_INFO "IOMMU 0x%Lx: using Register based "
1837 "invalidation\n",
1838 (unsigned long long)drhd->reg_base_addr);
1839 } else {
1840 iommu->flush.flush_context = qi_flush_context;
1841 iommu->flush.flush_iotlb = qi_flush_iotlb;
1842 printk(KERN_INFO "IOMMU 0x%Lx: using Queued "
1843 "invalidation\n",
1844 (unsigned long long)drhd->reg_base_addr);
1845 }
1846 }
1847
1848 /*
1849 * For each rmrr
1850 * for each dev attached to rmrr
1851 * do
1852 * locate drhd for dev, alloc domain for dev
1853 * allocate free domain
1854 * allocate page table entries for rmrr
1855 * if context not allocated for bus
1856 * allocate and init context
1857 * set present in root table for this bus
1858 * init context with domain, translation etc
1859 * endfor
1860 * endfor
1861 */
1862 for_each_rmrr_units(rmrr) {
1863 for (i = 0; i < rmrr->devices_cnt; i++) {
1864 pdev = rmrr->devices[i];
1865 /* some BIOS lists non-exist devices in DMAR table */
1866 if (!pdev)
1867 continue;
1868 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
1869 if (ret)
1870 printk(KERN_ERR
1871 "IOMMU: mapping reserved region failed\n");
1872 }
1873 }
1874
1875 iommu_prepare_gfx_mapping();
1876
1877 iommu_prepare_isa();
1878
1879 /*
1880 * for each drhd
1881 * enable fault log
1882 * global invalidate context cache
1883 * global invalidate iotlb
1884 * enable translation
1885 */
1886 for_each_drhd_unit(drhd) {
1887 if (drhd->ignored)
1888 continue;
1889 iommu = drhd->iommu;
1890 sprintf (iommu->name, "dmar%d", unit++);
1891
1892 iommu_flush_write_buffer(iommu);
1893
1894 ret = dmar_set_interrupt(iommu);
1895 if (ret)
1896 goto error;
1897
1898 iommu_set_root_entry(iommu);
1899
1900 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
1901 0);
1902 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
1903 0);
1904 iommu_disable_protect_mem_regions(iommu);
1905
1906 ret = iommu_enable_translation(iommu);
1907 if (ret)
1908 goto error;
1909 }
1910
1911 return 0;
1912 error:
1913 for_each_drhd_unit(drhd) {
1914 if (drhd->ignored)
1915 continue;
1916 iommu = drhd->iommu;
1917 free_iommu(iommu);
1918 }
1919 return ret;
1920 }
1921
1922 static inline u64 aligned_size(u64 host_addr, size_t size)
1923 {
1924 u64 addr;
1925 addr = (host_addr & (~PAGE_MASK)) + size;
1926 return PAGE_ALIGN(addr);
1927 }
1928
1929 struct iova *
1930 iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
1931 {
1932 struct iova *piova;
1933
1934 /* Make sure it's in range */
1935 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
1936 if (!size || (IOVA_START_ADDR + size > end))
1937 return NULL;
1938
1939 piova = alloc_iova(&domain->iovad,
1940 size >> PAGE_SHIFT, IOVA_PFN(end), 1);
1941 return piova;
1942 }
1943
1944 static struct iova *
1945 __intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
1946 size_t size, u64 dma_mask)
1947 {
1948 struct pci_dev *pdev = to_pci_dev(dev);
1949 struct iova *iova = NULL;
1950
1951 if (dma_mask <= DMA_32BIT_MASK || dmar_forcedac)
1952 iova = iommu_alloc_iova(domain, size, dma_mask);
1953 else {
1954 /*
1955 * First try to allocate an io virtual address in
1956 * DMA_32BIT_MASK and if that fails then try allocating
1957 * from higher range
1958 */
1959 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
1960 if (!iova)
1961 iova = iommu_alloc_iova(domain, size, dma_mask);
1962 }
1963
1964 if (!iova) {
1965 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
1966 return NULL;
1967 }
1968
1969 return iova;
1970 }
1971
1972 static struct dmar_domain *
1973 get_valid_domain_for_dev(struct pci_dev *pdev)
1974 {
1975 struct dmar_domain *domain;
1976 int ret;
1977
1978 domain = get_domain_for_dev(pdev,
1979 DEFAULT_DOMAIN_ADDRESS_WIDTH);
1980 if (!domain) {
1981 printk(KERN_ERR
1982 "Allocating domain for %s failed", pci_name(pdev));
1983 return NULL;
1984 }
1985
1986 /* make sure context mapping is ok */
1987 if (unlikely(!domain_context_mapped(domain, pdev))) {
1988 ret = domain_context_mapping(domain, pdev);
1989 if (ret) {
1990 printk(KERN_ERR
1991 "Domain context map for %s failed",
1992 pci_name(pdev));
1993 return NULL;
1994 }
1995 }
1996
1997 return domain;
1998 }
1999
2000 static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2001 size_t size, int dir, u64 dma_mask)
2002 {
2003 struct pci_dev *pdev = to_pci_dev(hwdev);
2004 struct dmar_domain *domain;
2005 phys_addr_t start_paddr;
2006 struct iova *iova;
2007 int prot = 0;
2008 int ret;
2009
2010 BUG_ON(dir == DMA_NONE);
2011 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2012 return paddr;
2013
2014 domain = get_valid_domain_for_dev(pdev);
2015 if (!domain)
2016 return 0;
2017
2018 size = aligned_size((u64)paddr, size);
2019
2020 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2021 if (!iova)
2022 goto error;
2023
2024 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2025
2026 /*
2027 * Check if DMAR supports zero-length reads on write only
2028 * mappings..
2029 */
2030 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2031 !cap_zlr(domain->iommu->cap))
2032 prot |= DMA_PTE_READ;
2033 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2034 prot |= DMA_PTE_WRITE;
2035 /*
2036 * paddr - (paddr + size) might be partial page, we should map the whole
2037 * page. Note: if two part of one page are separately mapped, we
2038 * might have two guest_addr mapping to the same host paddr, but this
2039 * is not a big problem
2040 */
2041 ret = domain_page_mapping(domain, start_paddr,
2042 ((u64)paddr) & PAGE_MASK, size, prot);
2043 if (ret)
2044 goto error;
2045
2046 /* it's a non-present to present mapping */
2047 ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
2048 start_paddr, size >> VTD_PAGE_SHIFT, 1);
2049 if (ret)
2050 iommu_flush_write_buffer(domain->iommu);
2051
2052 return start_paddr + ((u64)paddr & (~PAGE_MASK));
2053
2054 error:
2055 if (iova)
2056 __free_iova(&domain->iovad, iova);
2057 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
2058 pci_name(pdev), size, (unsigned long long)paddr, dir);
2059 return 0;
2060 }
2061
2062 dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr,
2063 size_t size, int dir)
2064 {
2065 return __intel_map_single(hwdev, paddr, size, dir,
2066 to_pci_dev(hwdev)->dma_mask);
2067 }
2068
2069 static void flush_unmaps(void)
2070 {
2071 int i, j;
2072
2073 timer_on = 0;
2074
2075 /* just flush them all */
2076 for (i = 0; i < g_num_of_iommus; i++) {
2077 if (deferred_flush[i].next) {
2078 struct intel_iommu *iommu =
2079 deferred_flush[i].domain[0]->iommu;
2080
2081 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2082 DMA_TLB_GLOBAL_FLUSH, 0);
2083 for (j = 0; j < deferred_flush[i].next; j++) {
2084 __free_iova(&deferred_flush[i].domain[j]->iovad,
2085 deferred_flush[i].iova[j]);
2086 }
2087 deferred_flush[i].next = 0;
2088 }
2089 }
2090
2091 list_size = 0;
2092 }
2093
2094 static void flush_unmaps_timeout(unsigned long data)
2095 {
2096 unsigned long flags;
2097
2098 spin_lock_irqsave(&async_umap_flush_lock, flags);
2099 flush_unmaps();
2100 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2101 }
2102
2103 static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2104 {
2105 unsigned long flags;
2106 int next, iommu_id;
2107
2108 spin_lock_irqsave(&async_umap_flush_lock, flags);
2109 if (list_size == HIGH_WATER_MARK)
2110 flush_unmaps();
2111
2112 iommu_id = dom->iommu->seq_id;
2113
2114 next = deferred_flush[iommu_id].next;
2115 deferred_flush[iommu_id].domain[next] = dom;
2116 deferred_flush[iommu_id].iova[next] = iova;
2117 deferred_flush[iommu_id].next++;
2118
2119 if (!timer_on) {
2120 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2121 timer_on = 1;
2122 }
2123 list_size++;
2124 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2125 }
2126
2127 void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
2128 int dir)
2129 {
2130 struct pci_dev *pdev = to_pci_dev(dev);
2131 struct dmar_domain *domain;
2132 unsigned long start_addr;
2133 struct iova *iova;
2134
2135 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2136 return;
2137 domain = find_domain(pdev);
2138 BUG_ON(!domain);
2139
2140 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2141 if (!iova)
2142 return;
2143
2144 start_addr = iova->pfn_lo << PAGE_SHIFT;
2145 size = aligned_size((u64)dev_addr, size);
2146
2147 pr_debug("Device %s unmapping: %lx@%llx\n",
2148 pci_name(pdev), size, (unsigned long long)start_addr);
2149
2150 /* clear the whole page */
2151 dma_pte_clear_range(domain, start_addr, start_addr + size);
2152 /* free page tables */
2153 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2154 if (intel_iommu_strict) {
2155 if (iommu_flush_iotlb_psi(domain->iommu,
2156 domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
2157 iommu_flush_write_buffer(domain->iommu);
2158 /* free iova */
2159 __free_iova(&domain->iovad, iova);
2160 } else {
2161 add_unmap(domain, iova);
2162 /*
2163 * queue up the release of the unmap to save the 1/6th of the
2164 * cpu used up by the iotlb flush operation...
2165 */
2166 }
2167 }
2168
2169 void *intel_alloc_coherent(struct device *hwdev, size_t size,
2170 dma_addr_t *dma_handle, gfp_t flags)
2171 {
2172 void *vaddr;
2173 int order;
2174
2175 size = PAGE_ALIGN(size);
2176 order = get_order(size);
2177 flags &= ~(GFP_DMA | GFP_DMA32);
2178
2179 vaddr = (void *)__get_free_pages(flags, order);
2180 if (!vaddr)
2181 return NULL;
2182 memset(vaddr, 0, size);
2183
2184 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2185 DMA_BIDIRECTIONAL,
2186 hwdev->coherent_dma_mask);
2187 if (*dma_handle)
2188 return vaddr;
2189 free_pages((unsigned long)vaddr, order);
2190 return NULL;
2191 }
2192
2193 void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2194 dma_addr_t dma_handle)
2195 {
2196 int order;
2197
2198 size = PAGE_ALIGN(size);
2199 order = get_order(size);
2200
2201 intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
2202 free_pages((unsigned long)vaddr, order);
2203 }
2204
2205 #define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
2206
2207 void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2208 int nelems, int dir)
2209 {
2210 int i;
2211 struct pci_dev *pdev = to_pci_dev(hwdev);
2212 struct dmar_domain *domain;
2213 unsigned long start_addr;
2214 struct iova *iova;
2215 size_t size = 0;
2216 void *addr;
2217 struct scatterlist *sg;
2218
2219 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2220 return;
2221
2222 domain = find_domain(pdev);
2223
2224 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2225 if (!iova)
2226 return;
2227 for_each_sg(sglist, sg, nelems, i) {
2228 addr = SG_ENT_VIRT_ADDRESS(sg);
2229 size += aligned_size((u64)addr, sg->length);
2230 }
2231
2232 start_addr = iova->pfn_lo << PAGE_SHIFT;
2233
2234 /* clear the whole page */
2235 dma_pte_clear_range(domain, start_addr, start_addr + size);
2236 /* free page tables */
2237 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2238
2239 if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
2240 size >> VTD_PAGE_SHIFT, 0))
2241 iommu_flush_write_buffer(domain->iommu);
2242
2243 /* free iova */
2244 __free_iova(&domain->iovad, iova);
2245 }
2246
2247 static int intel_nontranslate_map_sg(struct device *hddev,
2248 struct scatterlist *sglist, int nelems, int dir)
2249 {
2250 int i;
2251 struct scatterlist *sg;
2252
2253 for_each_sg(sglist, sg, nelems, i) {
2254 BUG_ON(!sg_page(sg));
2255 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2256 sg->dma_length = sg->length;
2257 }
2258 return nelems;
2259 }
2260
2261 int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2262 int dir)
2263 {
2264 void *addr;
2265 int i;
2266 struct pci_dev *pdev = to_pci_dev(hwdev);
2267 struct dmar_domain *domain;
2268 size_t size = 0;
2269 int prot = 0;
2270 size_t offset = 0;
2271 struct iova *iova = NULL;
2272 int ret;
2273 struct scatterlist *sg;
2274 unsigned long start_addr;
2275
2276 BUG_ON(dir == DMA_NONE);
2277 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2278 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
2279
2280 domain = get_valid_domain_for_dev(pdev);
2281 if (!domain)
2282 return 0;
2283
2284 for_each_sg(sglist, sg, nelems, i) {
2285 addr = SG_ENT_VIRT_ADDRESS(sg);
2286 addr = (void *)virt_to_phys(addr);
2287 size += aligned_size((u64)addr, sg->length);
2288 }
2289
2290 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2291 if (!iova) {
2292 sglist->dma_length = 0;
2293 return 0;
2294 }
2295
2296 /*
2297 * Check if DMAR supports zero-length reads on write only
2298 * mappings..
2299 */
2300 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2301 !cap_zlr(domain->iommu->cap))
2302 prot |= DMA_PTE_READ;
2303 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2304 prot |= DMA_PTE_WRITE;
2305
2306 start_addr = iova->pfn_lo << PAGE_SHIFT;
2307 offset = 0;
2308 for_each_sg(sglist, sg, nelems, i) {
2309 addr = SG_ENT_VIRT_ADDRESS(sg);
2310 addr = (void *)virt_to_phys(addr);
2311 size = aligned_size((u64)addr, sg->length);
2312 ret = domain_page_mapping(domain, start_addr + offset,
2313 ((u64)addr) & PAGE_MASK,
2314 size, prot);
2315 if (ret) {
2316 /* clear the page */
2317 dma_pte_clear_range(domain, start_addr,
2318 start_addr + offset);
2319 /* free page tables */
2320 dma_pte_free_pagetable(domain, start_addr,
2321 start_addr + offset);
2322 /* free iova */
2323 __free_iova(&domain->iovad, iova);
2324 return 0;
2325 }
2326 sg->dma_address = start_addr + offset +
2327 ((u64)addr & (~PAGE_MASK));
2328 sg->dma_length = sg->length;
2329 offset += size;
2330 }
2331
2332 /* it's a non-present to present mapping */
2333 if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
2334 start_addr, offset >> VTD_PAGE_SHIFT, 1))
2335 iommu_flush_write_buffer(domain->iommu);
2336 return nelems;
2337 }
2338
2339 static struct dma_mapping_ops intel_dma_ops = {
2340 .alloc_coherent = intel_alloc_coherent,
2341 .free_coherent = intel_free_coherent,
2342 .map_single = intel_map_single,
2343 .unmap_single = intel_unmap_single,
2344 .map_sg = intel_map_sg,
2345 .unmap_sg = intel_unmap_sg,
2346 };
2347
2348 static inline int iommu_domain_cache_init(void)
2349 {
2350 int ret = 0;
2351
2352 iommu_domain_cache = kmem_cache_create("iommu_domain",
2353 sizeof(struct dmar_domain),
2354 0,
2355 SLAB_HWCACHE_ALIGN,
2356
2357 NULL);
2358 if (!iommu_domain_cache) {
2359 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2360 ret = -ENOMEM;
2361 }
2362
2363 return ret;
2364 }
2365
2366 static inline int iommu_devinfo_cache_init(void)
2367 {
2368 int ret = 0;
2369
2370 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2371 sizeof(struct device_domain_info),
2372 0,
2373 SLAB_HWCACHE_ALIGN,
2374 NULL);
2375 if (!iommu_devinfo_cache) {
2376 printk(KERN_ERR "Couldn't create devinfo cache\n");
2377 ret = -ENOMEM;
2378 }
2379
2380 return ret;
2381 }
2382
2383 static inline int iommu_iova_cache_init(void)
2384 {
2385 int ret = 0;
2386
2387 iommu_iova_cache = kmem_cache_create("iommu_iova",
2388 sizeof(struct iova),
2389 0,
2390 SLAB_HWCACHE_ALIGN,
2391 NULL);
2392 if (!iommu_iova_cache) {
2393 printk(KERN_ERR "Couldn't create iova cache\n");
2394 ret = -ENOMEM;
2395 }
2396
2397 return ret;
2398 }
2399
2400 static int __init iommu_init_mempool(void)
2401 {
2402 int ret;
2403 ret = iommu_iova_cache_init();
2404 if (ret)
2405 return ret;
2406
2407 ret = iommu_domain_cache_init();
2408 if (ret)
2409 goto domain_error;
2410
2411 ret = iommu_devinfo_cache_init();
2412 if (!ret)
2413 return ret;
2414
2415 kmem_cache_destroy(iommu_domain_cache);
2416 domain_error:
2417 kmem_cache_destroy(iommu_iova_cache);
2418
2419 return -ENOMEM;
2420 }
2421
2422 static void __init iommu_exit_mempool(void)
2423 {
2424 kmem_cache_destroy(iommu_devinfo_cache);
2425 kmem_cache_destroy(iommu_domain_cache);
2426 kmem_cache_destroy(iommu_iova_cache);
2427
2428 }
2429
2430 static void __init init_no_remapping_devices(void)
2431 {
2432 struct dmar_drhd_unit *drhd;
2433
2434 for_each_drhd_unit(drhd) {
2435 if (!drhd->include_all) {
2436 int i;
2437 for (i = 0; i < drhd->devices_cnt; i++)
2438 if (drhd->devices[i] != NULL)
2439 break;
2440 /* ignore DMAR unit if no pci devices exist */
2441 if (i == drhd->devices_cnt)
2442 drhd->ignored = 1;
2443 }
2444 }
2445
2446 if (dmar_map_gfx)
2447 return;
2448
2449 for_each_drhd_unit(drhd) {
2450 int i;
2451 if (drhd->ignored || drhd->include_all)
2452 continue;
2453
2454 for (i = 0; i < drhd->devices_cnt; i++)
2455 if (drhd->devices[i] &&
2456 !IS_GFX_DEVICE(drhd->devices[i]))
2457 break;
2458
2459 if (i < drhd->devices_cnt)
2460 continue;
2461
2462 /* bypass IOMMU if it is just for gfx devices */
2463 drhd->ignored = 1;
2464 for (i = 0; i < drhd->devices_cnt; i++) {
2465 if (!drhd->devices[i])
2466 continue;
2467 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
2468 }
2469 }
2470 }
2471
2472 int __init intel_iommu_init(void)
2473 {
2474 int ret = 0;
2475
2476 if (dmar_table_init())
2477 return -ENODEV;
2478
2479 if (dmar_dev_scope_init())
2480 return -ENODEV;
2481
2482 /*
2483 * Check the need for DMA-remapping initialization now.
2484 * Above initialization will also be used by Interrupt-remapping.
2485 */
2486 if (no_iommu || swiotlb || dmar_disabled)
2487 return -ENODEV;
2488
2489 iommu_init_mempool();
2490 dmar_init_reserved_ranges();
2491
2492 init_no_remapping_devices();
2493
2494 ret = init_dmars();
2495 if (ret) {
2496 printk(KERN_ERR "IOMMU: dmar init failed\n");
2497 put_iova_domain(&reserved_iova_list);
2498 iommu_exit_mempool();
2499 return ret;
2500 }
2501 printk(KERN_INFO
2502 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2503
2504 init_timer(&unmap_timer);
2505 force_iommu = 1;
2506 dma_ops = &intel_dma_ops;
2507 return 0;
2508 }
2509
2510 void intel_iommu_domain_exit(struct dmar_domain *domain)
2511 {
2512 u64 end;
2513
2514 /* Domain 0 is reserved, so dont process it */
2515 if (!domain)
2516 return;
2517
2518 end = DOMAIN_MAX_ADDR(domain->gaw);
2519 end = end & (~VTD_PAGE_MASK);
2520
2521 /* clear ptes */
2522 dma_pte_clear_range(domain, 0, end);
2523
2524 /* free page tables */
2525 dma_pte_free_pagetable(domain, 0, end);
2526
2527 iommu_free_domain(domain);
2528 free_domain_mem(domain);
2529 }
2530 EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
2531
2532 struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
2533 {
2534 struct dmar_drhd_unit *drhd;
2535 struct dmar_domain *domain;
2536 struct intel_iommu *iommu;
2537
2538 drhd = dmar_find_matched_drhd_unit(pdev);
2539 if (!drhd) {
2540 printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
2541 return NULL;
2542 }
2543
2544 iommu = drhd->iommu;
2545 if (!iommu) {
2546 printk(KERN_ERR
2547 "intel_iommu_domain_alloc: iommu == NULL\n");
2548 return NULL;
2549 }
2550 domain = iommu_alloc_domain(iommu);
2551 if (!domain) {
2552 printk(KERN_ERR
2553 "intel_iommu_domain_alloc: domain == NULL\n");
2554 return NULL;
2555 }
2556 if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2557 printk(KERN_ERR
2558 "intel_iommu_domain_alloc: domain_init() failed\n");
2559 intel_iommu_domain_exit(domain);
2560 return NULL;
2561 }
2562 return domain;
2563 }
2564 EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
2565
2566 int intel_iommu_context_mapping(
2567 struct dmar_domain *domain, struct pci_dev *pdev)
2568 {
2569 int rc;
2570 rc = domain_context_mapping(domain, pdev);
2571 return rc;
2572 }
2573 EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
2574
2575 int intel_iommu_page_mapping(
2576 struct dmar_domain *domain, dma_addr_t iova,
2577 u64 hpa, size_t size, int prot)
2578 {
2579 int rc;
2580 rc = domain_page_mapping(domain, iova, hpa, size, prot);
2581 return rc;
2582 }
2583 EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
2584
2585 void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
2586 {
2587 detach_domain_for_dev(domain, bus, devfn);
2588 }
2589 EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
2590
2591 struct dmar_domain *
2592 intel_iommu_find_domain(struct pci_dev *pdev)
2593 {
2594 return find_domain(pdev);
2595 }
2596 EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
2597
2598 int intel_iommu_found(void)
2599 {
2600 return g_num_of_iommus;
2601 }
2602 EXPORT_SYMBOL_GPL(intel_iommu_found);
2603
2604 u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
2605 {
2606 struct dma_pte *pte;
2607 u64 pfn;
2608
2609 pfn = 0;
2610 pte = addr_to_dma_pte(domain, iova);
2611
2612 if (pte)
2613 pfn = dma_pte_addr(pte);
2614
2615 return pfn >> VTD_PAGE_SHIFT;
2616 }
2617 EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);
This page took 0.135438 seconds and 5 git commands to generate.