2 * Copyright (c) 2006, Intel Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21 * Author: Fenghua Yu <fenghua.yu@intel.com>
24 #include <linux/init.h>
25 #include <linux/bitmap.h>
26 #include <linux/debugfs.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/timer.h>
36 #include <linux/iova.h>
37 #include <linux/intel-iommu.h>
38 #include <asm/cacheflush.h>
39 #include <asm/iommu.h>
42 #define ROOT_SIZE VTD_PAGE_SIZE
43 #define CONTEXT_SIZE VTD_PAGE_SIZE
45 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
46 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
48 #define IOAPIC_RANGE_START (0xfee00000)
49 #define IOAPIC_RANGE_END (0xfeefffff)
50 #define IOVA_START_ADDR (0x1000)
52 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
54 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
56 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
57 #define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
58 #define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
63 * 12-63: Context Ptr (12 - (haw-1))
70 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
71 static inline bool root_present(struct root_entry
*root
)
73 return (root
->val
& 1);
75 static inline void set_root_present(struct root_entry
*root
)
79 static inline void set_root_value(struct root_entry
*root
, unsigned long value
)
81 root
->val
|= value
& VTD_PAGE_MASK
;
84 static inline struct context_entry
*
85 get_context_addr_from_root(struct root_entry
*root
)
87 return (struct context_entry
*)
88 (root_present(root
)?phys_to_virt(
89 root
->val
& VTD_PAGE_MASK
) :
96 * 1: fault processing disable
97 * 2-3: translation type
98 * 12-63: address space root
104 struct context_entry
{
108 #define context_present(c) ((c).lo & 1)
109 #define context_fault_disable(c) (((c).lo >> 1) & 1)
110 #define context_translation_type(c) (((c).lo >> 2) & 3)
111 #define context_address_root(c) ((c).lo & VTD_PAGE_MASK)
112 #define context_address_width(c) ((c).hi & 7)
113 #define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
115 #define context_set_present(c) do {(c).lo |= 1;} while (0)
116 #define context_set_fault_enable(c) \
117 do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
118 #define context_set_translation_type(c, val) \
120 (c).lo &= (((u64)-1) << 4) | 3; \
121 (c).lo |= ((val) & 3) << 2; \
123 #define CONTEXT_TT_MULTI_LEVEL 0
124 #define context_set_address_root(c, val) \
125 do {(c).lo |= (val) & VTD_PAGE_MASK; } while (0)
126 #define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
127 #define context_set_domain_id(c, val) \
128 do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
129 #define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
137 * 12-63: Host physcial address
142 #define dma_clear_pte(p) do {(p).val = 0;} while (0)
144 #define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
145 #define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
146 #define dma_set_pte_prot(p, prot) \
147 do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
148 #define dma_pte_addr(p) ((p).val & VTD_PAGE_MASK)
149 #define dma_set_pte_addr(p, addr) do {\
150 (p).val |= ((addr) & VTD_PAGE_MASK); } while (0)
151 #define dma_pte_present(p) (((p).val & 3) != 0)
154 int id
; /* domain id */
155 struct intel_iommu
*iommu
; /* back pointer to owning iommu */
157 struct list_head devices
; /* all devices' list */
158 struct iova_domain iovad
; /* iova's that belong to this domain */
160 struct dma_pte
*pgd
; /* virtual address */
161 spinlock_t mapping_lock
; /* page table lock */
162 int gaw
; /* max guest address width */
164 /* adjusted guest address width, 0 is level 2 30-bit */
167 #define DOMAIN_FLAG_MULTIPLE_DEVICES 1
171 static void flush_unmaps_timeout(unsigned long data
);
173 DEFINE_TIMER(unmap_timer
, flush_unmaps_timeout
, 0, 0);
175 #define HIGH_WATER_MARK 250
176 struct deferred_flush_tables
{
178 struct iova
*iova
[HIGH_WATER_MARK
];
179 struct dmar_domain
*domain
[HIGH_WATER_MARK
];
182 static struct deferred_flush_tables
*deferred_flush
;
184 /* bitmap for indexing intel_iommus */
185 static int g_num_of_iommus
;
187 static DEFINE_SPINLOCK(async_umap_flush_lock
);
188 static LIST_HEAD(unmaps_to_do
);
191 static long list_size
;
193 static void domain_remove_dev_info(struct dmar_domain
*domain
);
196 static int __initdata dmar_map_gfx
= 1;
197 static int dmar_forcedac
;
198 static int intel_iommu_strict
;
200 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
201 static DEFINE_SPINLOCK(device_domain_lock
);
202 static LIST_HEAD(device_domain_list
);
204 static int __init
intel_iommu_setup(char *str
)
209 if (!strncmp(str
, "off", 3)) {
211 printk(KERN_INFO
"Intel-IOMMU: disabled\n");
212 } else if (!strncmp(str
, "igfx_off", 8)) {
215 "Intel-IOMMU: disable GFX device mapping\n");
216 } else if (!strncmp(str
, "forcedac", 8)) {
218 "Intel-IOMMU: Forcing DAC for PCI devices\n");
220 } else if (!strncmp(str
, "strict", 6)) {
222 "Intel-IOMMU: disable batched IOTLB flush\n");
223 intel_iommu_strict
= 1;
226 str
+= strcspn(str
, ",");
232 __setup("intel_iommu=", intel_iommu_setup
);
234 static struct kmem_cache
*iommu_domain_cache
;
235 static struct kmem_cache
*iommu_devinfo_cache
;
236 static struct kmem_cache
*iommu_iova_cache
;
238 static inline void *iommu_kmem_cache_alloc(struct kmem_cache
*cachep
)
243 /* trying to avoid low memory issues */
244 flags
= current
->flags
& PF_MEMALLOC
;
245 current
->flags
|= PF_MEMALLOC
;
246 vaddr
= kmem_cache_alloc(cachep
, GFP_ATOMIC
);
247 current
->flags
&= (~PF_MEMALLOC
| flags
);
252 static inline void *alloc_pgtable_page(void)
257 /* trying to avoid low memory issues */
258 flags
= current
->flags
& PF_MEMALLOC
;
259 current
->flags
|= PF_MEMALLOC
;
260 vaddr
= (void *)get_zeroed_page(GFP_ATOMIC
);
261 current
->flags
&= (~PF_MEMALLOC
| flags
);
265 static inline void free_pgtable_page(void *vaddr
)
267 free_page((unsigned long)vaddr
);
270 static inline void *alloc_domain_mem(void)
272 return iommu_kmem_cache_alloc(iommu_domain_cache
);
275 static void free_domain_mem(void *vaddr
)
277 kmem_cache_free(iommu_domain_cache
, vaddr
);
280 static inline void * alloc_devinfo_mem(void)
282 return iommu_kmem_cache_alloc(iommu_devinfo_cache
);
285 static inline void free_devinfo_mem(void *vaddr
)
287 kmem_cache_free(iommu_devinfo_cache
, vaddr
);
290 struct iova
*alloc_iova_mem(void)
292 return iommu_kmem_cache_alloc(iommu_iova_cache
);
295 void free_iova_mem(struct iova
*iova
)
297 kmem_cache_free(iommu_iova_cache
, iova
);
300 /* Gets context entry for a given bus and devfn */
301 static struct context_entry
* device_to_context_entry(struct intel_iommu
*iommu
,
304 struct root_entry
*root
;
305 struct context_entry
*context
;
306 unsigned long phy_addr
;
309 spin_lock_irqsave(&iommu
->lock
, flags
);
310 root
= &iommu
->root_entry
[bus
];
311 context
= get_context_addr_from_root(root
);
313 context
= (struct context_entry
*)alloc_pgtable_page();
315 spin_unlock_irqrestore(&iommu
->lock
, flags
);
318 __iommu_flush_cache(iommu
, (void *)context
, CONTEXT_SIZE
);
319 phy_addr
= virt_to_phys((void *)context
);
320 set_root_value(root
, phy_addr
);
321 set_root_present(root
);
322 __iommu_flush_cache(iommu
, root
, sizeof(*root
));
324 spin_unlock_irqrestore(&iommu
->lock
, flags
);
325 return &context
[devfn
];
328 static int device_context_mapped(struct intel_iommu
*iommu
, u8 bus
, u8 devfn
)
330 struct root_entry
*root
;
331 struct context_entry
*context
;
335 spin_lock_irqsave(&iommu
->lock
, flags
);
336 root
= &iommu
->root_entry
[bus
];
337 context
= get_context_addr_from_root(root
);
342 ret
= context_present(context
[devfn
]);
344 spin_unlock_irqrestore(&iommu
->lock
, flags
);
348 static void clear_context_table(struct intel_iommu
*iommu
, u8 bus
, u8 devfn
)
350 struct root_entry
*root
;
351 struct context_entry
*context
;
354 spin_lock_irqsave(&iommu
->lock
, flags
);
355 root
= &iommu
->root_entry
[bus
];
356 context
= get_context_addr_from_root(root
);
358 context_clear_entry(context
[devfn
]);
359 __iommu_flush_cache(iommu
, &context
[devfn
], \
362 spin_unlock_irqrestore(&iommu
->lock
, flags
);
365 static void free_context_table(struct intel_iommu
*iommu
)
367 struct root_entry
*root
;
370 struct context_entry
*context
;
372 spin_lock_irqsave(&iommu
->lock
, flags
);
373 if (!iommu
->root_entry
) {
376 for (i
= 0; i
< ROOT_ENTRY_NR
; i
++) {
377 root
= &iommu
->root_entry
[i
];
378 context
= get_context_addr_from_root(root
);
380 free_pgtable_page(context
);
382 free_pgtable_page(iommu
->root_entry
);
383 iommu
->root_entry
= NULL
;
385 spin_unlock_irqrestore(&iommu
->lock
, flags
);
388 /* page table handling */
389 #define LEVEL_STRIDE (9)
390 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
392 static inline int agaw_to_level(int agaw
)
397 static inline int agaw_to_width(int agaw
)
399 return 30 + agaw
* LEVEL_STRIDE
;
403 static inline int width_to_agaw(int width
)
405 return (width
- 30) / LEVEL_STRIDE
;
408 static inline unsigned int level_to_offset_bits(int level
)
410 return (12 + (level
- 1) * LEVEL_STRIDE
);
413 static inline int address_level_offset(u64 addr
, int level
)
415 return ((addr
>> level_to_offset_bits(level
)) & LEVEL_MASK
);
418 static inline u64
level_mask(int level
)
420 return ((u64
)-1 << level_to_offset_bits(level
));
423 static inline u64
level_size(int level
)
425 return ((u64
)1 << level_to_offset_bits(level
));
428 static inline u64
align_to_level(u64 addr
, int level
)
430 return ((addr
+ level_size(level
) - 1) & level_mask(level
));
433 static struct dma_pte
* addr_to_dma_pte(struct dmar_domain
*domain
, u64 addr
)
435 int addr_width
= agaw_to_width(domain
->agaw
);
436 struct dma_pte
*parent
, *pte
= NULL
;
437 int level
= agaw_to_level(domain
->agaw
);
441 BUG_ON(!domain
->pgd
);
443 addr
&= (((u64
)1) << addr_width
) - 1;
444 parent
= domain
->pgd
;
446 spin_lock_irqsave(&domain
->mapping_lock
, flags
);
450 offset
= address_level_offset(addr
, level
);
451 pte
= &parent
[offset
];
455 if (!dma_pte_present(*pte
)) {
456 tmp_page
= alloc_pgtable_page();
459 spin_unlock_irqrestore(&domain
->mapping_lock
,
463 __iommu_flush_cache(domain
->iommu
, tmp_page
,
465 dma_set_pte_addr(*pte
, virt_to_phys(tmp_page
));
467 * high level table always sets r/w, last level page
468 * table control read/write
470 dma_set_pte_readable(*pte
);
471 dma_set_pte_writable(*pte
);
472 __iommu_flush_cache(domain
->iommu
, pte
, sizeof(*pte
));
474 parent
= phys_to_virt(dma_pte_addr(*pte
));
478 spin_unlock_irqrestore(&domain
->mapping_lock
, flags
);
482 /* return address's pte at specific level */
483 static struct dma_pte
*dma_addr_level_pte(struct dmar_domain
*domain
, u64 addr
,
486 struct dma_pte
*parent
, *pte
= NULL
;
487 int total
= agaw_to_level(domain
->agaw
);
490 parent
= domain
->pgd
;
491 while (level
<= total
) {
492 offset
= address_level_offset(addr
, total
);
493 pte
= &parent
[offset
];
497 if (!dma_pte_present(*pte
))
499 parent
= phys_to_virt(dma_pte_addr(*pte
));
505 /* clear one page's page table */
506 static void dma_pte_clear_one(struct dmar_domain
*domain
, u64 addr
)
508 struct dma_pte
*pte
= NULL
;
510 /* get last level pte */
511 pte
= dma_addr_level_pte(domain
, addr
, 1);
515 __iommu_flush_cache(domain
->iommu
, pte
, sizeof(*pte
));
519 /* clear last level pte, a tlb flush should be followed */
520 static void dma_pte_clear_range(struct dmar_domain
*domain
, u64 start
, u64 end
)
522 int addr_width
= agaw_to_width(domain
->agaw
);
524 start
&= (((u64
)1) << addr_width
) - 1;
525 end
&= (((u64
)1) << addr_width
) - 1;
526 /* in case it's partial page */
527 start
= PAGE_ALIGN(start
);
530 /* we don't need lock here, nobody else touches the iova range */
531 while (start
< end
) {
532 dma_pte_clear_one(domain
, start
);
533 start
+= VTD_PAGE_SIZE
;
537 /* free page table pages. last level pte should already be cleared */
538 static void dma_pte_free_pagetable(struct dmar_domain
*domain
,
541 int addr_width
= agaw_to_width(domain
->agaw
);
543 int total
= agaw_to_level(domain
->agaw
);
547 start
&= (((u64
)1) << addr_width
) - 1;
548 end
&= (((u64
)1) << addr_width
) - 1;
550 /* we don't need lock here, nobody else touches the iova range */
552 while (level
<= total
) {
553 tmp
= align_to_level(start
, level
);
554 if (tmp
>= end
|| (tmp
+ level_size(level
) > end
))
558 pte
= dma_addr_level_pte(domain
, tmp
, level
);
561 phys_to_virt(dma_pte_addr(*pte
)));
563 __iommu_flush_cache(domain
->iommu
,
566 tmp
+= level_size(level
);
571 if (start
== 0 && end
>= ((((u64
)1) << addr_width
) - 1)) {
572 free_pgtable_page(domain
->pgd
);
578 static int iommu_alloc_root_entry(struct intel_iommu
*iommu
)
580 struct root_entry
*root
;
583 root
= (struct root_entry
*)alloc_pgtable_page();
587 __iommu_flush_cache(iommu
, root
, ROOT_SIZE
);
589 spin_lock_irqsave(&iommu
->lock
, flags
);
590 iommu
->root_entry
= root
;
591 spin_unlock_irqrestore(&iommu
->lock
, flags
);
596 static void iommu_set_root_entry(struct intel_iommu
*iommu
)
602 addr
= iommu
->root_entry
;
604 spin_lock_irqsave(&iommu
->register_lock
, flag
);
605 dmar_writeq(iommu
->reg
+ DMAR_RTADDR_REG
, virt_to_phys(addr
));
607 cmd
= iommu
->gcmd
| DMA_GCMD_SRTP
;
608 writel(cmd
, iommu
->reg
+ DMAR_GCMD_REG
);
610 /* Make sure hardware complete it */
611 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
612 readl
, (sts
& DMA_GSTS_RTPS
), sts
);
614 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
617 static void iommu_flush_write_buffer(struct intel_iommu
*iommu
)
622 if (!cap_rwbf(iommu
->cap
))
624 val
= iommu
->gcmd
| DMA_GCMD_WBF
;
626 spin_lock_irqsave(&iommu
->register_lock
, flag
);
627 writel(val
, iommu
->reg
+ DMAR_GCMD_REG
);
629 /* Make sure hardware complete it */
630 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
631 readl
, (!(val
& DMA_GSTS_WBFS
)), val
);
633 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
636 /* return value determine if we need a write buffer flush */
637 static int __iommu_flush_context(struct intel_iommu
*iommu
,
638 u16 did
, u16 source_id
, u8 function_mask
, u64 type
,
639 int non_present_entry_flush
)
645 * In the non-present entry flush case, if hardware doesn't cache
646 * non-present entry we do nothing and if hardware cache non-present
647 * entry, we flush entries of domain 0 (the domain id is used to cache
648 * any non-present entries)
650 if (non_present_entry_flush
) {
651 if (!cap_caching_mode(iommu
->cap
))
658 case DMA_CCMD_GLOBAL_INVL
:
659 val
= DMA_CCMD_GLOBAL_INVL
;
661 case DMA_CCMD_DOMAIN_INVL
:
662 val
= DMA_CCMD_DOMAIN_INVL
|DMA_CCMD_DID(did
);
664 case DMA_CCMD_DEVICE_INVL
:
665 val
= DMA_CCMD_DEVICE_INVL
|DMA_CCMD_DID(did
)
666 | DMA_CCMD_SID(source_id
) | DMA_CCMD_FM(function_mask
);
673 spin_lock_irqsave(&iommu
->register_lock
, flag
);
674 dmar_writeq(iommu
->reg
+ DMAR_CCMD_REG
, val
);
676 /* Make sure hardware complete it */
677 IOMMU_WAIT_OP(iommu
, DMAR_CCMD_REG
,
678 dmar_readq
, (!(val
& DMA_CCMD_ICC
)), val
);
680 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
682 /* flush context entry will implicitly flush write buffer */
686 /* return value determine if we need a write buffer flush */
687 static int __iommu_flush_iotlb(struct intel_iommu
*iommu
, u16 did
,
688 u64 addr
, unsigned int size_order
, u64 type
,
689 int non_present_entry_flush
)
691 int tlb_offset
= ecap_iotlb_offset(iommu
->ecap
);
692 u64 val
= 0, val_iva
= 0;
696 * In the non-present entry flush case, if hardware doesn't cache
697 * non-present entry we do nothing and if hardware cache non-present
698 * entry, we flush entries of domain 0 (the domain id is used to cache
699 * any non-present entries)
701 if (non_present_entry_flush
) {
702 if (!cap_caching_mode(iommu
->cap
))
709 case DMA_TLB_GLOBAL_FLUSH
:
710 /* global flush doesn't need set IVA_REG */
711 val
= DMA_TLB_GLOBAL_FLUSH
|DMA_TLB_IVT
;
713 case DMA_TLB_DSI_FLUSH
:
714 val
= DMA_TLB_DSI_FLUSH
|DMA_TLB_IVT
|DMA_TLB_DID(did
);
716 case DMA_TLB_PSI_FLUSH
:
717 val
= DMA_TLB_PSI_FLUSH
|DMA_TLB_IVT
|DMA_TLB_DID(did
);
718 /* Note: always flush non-leaf currently */
719 val_iva
= size_order
| addr
;
724 /* Note: set drain read/write */
727 * This is probably to be super secure.. Looks like we can
728 * ignore it without any impact.
730 if (cap_read_drain(iommu
->cap
))
731 val
|= DMA_TLB_READ_DRAIN
;
733 if (cap_write_drain(iommu
->cap
))
734 val
|= DMA_TLB_WRITE_DRAIN
;
736 spin_lock_irqsave(&iommu
->register_lock
, flag
);
737 /* Note: Only uses first TLB reg currently */
739 dmar_writeq(iommu
->reg
+ tlb_offset
, val_iva
);
740 dmar_writeq(iommu
->reg
+ tlb_offset
+ 8, val
);
742 /* Make sure hardware complete it */
743 IOMMU_WAIT_OP(iommu
, tlb_offset
+ 8,
744 dmar_readq
, (!(val
& DMA_TLB_IVT
)), val
);
746 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
748 /* check IOTLB invalidation granularity */
749 if (DMA_TLB_IAIG(val
) == 0)
750 printk(KERN_ERR
"IOMMU: flush IOTLB failed\n");
751 if (DMA_TLB_IAIG(val
) != DMA_TLB_IIRG(type
))
752 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
753 (unsigned long long)DMA_TLB_IIRG(type
),
754 (unsigned long long)DMA_TLB_IAIG(val
));
755 /* flush iotlb entry will implicitly flush write buffer */
759 static int iommu_flush_iotlb_psi(struct intel_iommu
*iommu
, u16 did
,
760 u64 addr
, unsigned int pages
, int non_present_entry_flush
)
764 BUG_ON(addr
& (~VTD_PAGE_MASK
));
767 /* Fallback to domain selective flush if no PSI support */
768 if (!cap_pgsel_inv(iommu
->cap
))
769 return iommu
->flush
.flush_iotlb(iommu
, did
, 0, 0,
771 non_present_entry_flush
);
774 * PSI requires page size to be 2 ^ x, and the base address is naturally
775 * aligned to the size
777 mask
= ilog2(__roundup_pow_of_two(pages
));
778 /* Fallback to domain selective flush if size is too big */
779 if (mask
> cap_max_amask_val(iommu
->cap
))
780 return iommu
->flush
.flush_iotlb(iommu
, did
, 0, 0,
781 DMA_TLB_DSI_FLUSH
, non_present_entry_flush
);
783 return iommu
->flush
.flush_iotlb(iommu
, did
, addr
, mask
,
785 non_present_entry_flush
);
788 static void iommu_disable_protect_mem_regions(struct intel_iommu
*iommu
)
793 spin_lock_irqsave(&iommu
->register_lock
, flags
);
794 pmen
= readl(iommu
->reg
+ DMAR_PMEN_REG
);
795 pmen
&= ~DMA_PMEN_EPM
;
796 writel(pmen
, iommu
->reg
+ DMAR_PMEN_REG
);
798 /* wait for the protected region status bit to clear */
799 IOMMU_WAIT_OP(iommu
, DMAR_PMEN_REG
,
800 readl
, !(pmen
& DMA_PMEN_PRS
), pmen
);
802 spin_unlock_irqrestore(&iommu
->register_lock
, flags
);
805 static int iommu_enable_translation(struct intel_iommu
*iommu
)
810 spin_lock_irqsave(&iommu
->register_lock
, flags
);
811 writel(iommu
->gcmd
|DMA_GCMD_TE
, iommu
->reg
+ DMAR_GCMD_REG
);
813 /* Make sure hardware complete it */
814 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
815 readl
, (sts
& DMA_GSTS_TES
), sts
);
817 iommu
->gcmd
|= DMA_GCMD_TE
;
818 spin_unlock_irqrestore(&iommu
->register_lock
, flags
);
822 static int iommu_disable_translation(struct intel_iommu
*iommu
)
827 spin_lock_irqsave(&iommu
->register_lock
, flag
);
828 iommu
->gcmd
&= ~DMA_GCMD_TE
;
829 writel(iommu
->gcmd
, iommu
->reg
+ DMAR_GCMD_REG
);
831 /* Make sure hardware complete it */
832 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
833 readl
, (!(sts
& DMA_GSTS_TES
)), sts
);
835 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
839 /* iommu interrupt handling. Most stuff are MSI-like. */
841 static const char *fault_reason_strings
[] =
844 "Present bit in root entry is clear",
845 "Present bit in context entry is clear",
846 "Invalid context entry",
847 "Access beyond MGAW",
848 "PTE Write access is not set",
849 "PTE Read access is not set",
850 "Next page table ptr is invalid",
851 "Root table address invalid",
852 "Context table ptr is invalid",
853 "non-zero reserved fields in RTP",
854 "non-zero reserved fields in CTP",
855 "non-zero reserved fields in PTE",
857 #define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
859 const char *dmar_get_fault_reason(u8 fault_reason
)
861 if (fault_reason
> MAX_FAULT_REASON_IDX
)
864 return fault_reason_strings
[fault_reason
];
867 void dmar_msi_unmask(unsigned int irq
)
869 struct intel_iommu
*iommu
= get_irq_data(irq
);
873 spin_lock_irqsave(&iommu
->register_lock
, flag
);
874 writel(0, iommu
->reg
+ DMAR_FECTL_REG
);
875 /* Read a reg to force flush the post write */
876 readl(iommu
->reg
+ DMAR_FECTL_REG
);
877 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
880 void dmar_msi_mask(unsigned int irq
)
883 struct intel_iommu
*iommu
= get_irq_data(irq
);
886 spin_lock_irqsave(&iommu
->register_lock
, flag
);
887 writel(DMA_FECTL_IM
, iommu
->reg
+ DMAR_FECTL_REG
);
888 /* Read a reg to force flush the post write */
889 readl(iommu
->reg
+ DMAR_FECTL_REG
);
890 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
893 void dmar_msi_write(int irq
, struct msi_msg
*msg
)
895 struct intel_iommu
*iommu
= get_irq_data(irq
);
898 spin_lock_irqsave(&iommu
->register_lock
, flag
);
899 writel(msg
->data
, iommu
->reg
+ DMAR_FEDATA_REG
);
900 writel(msg
->address_lo
, iommu
->reg
+ DMAR_FEADDR_REG
);
901 writel(msg
->address_hi
, iommu
->reg
+ DMAR_FEUADDR_REG
);
902 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
905 void dmar_msi_read(int irq
, struct msi_msg
*msg
)
907 struct intel_iommu
*iommu
= get_irq_data(irq
);
910 spin_lock_irqsave(&iommu
->register_lock
, flag
);
911 msg
->data
= readl(iommu
->reg
+ DMAR_FEDATA_REG
);
912 msg
->address_lo
= readl(iommu
->reg
+ DMAR_FEADDR_REG
);
913 msg
->address_hi
= readl(iommu
->reg
+ DMAR_FEUADDR_REG
);
914 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
917 static int iommu_page_fault_do_one(struct intel_iommu
*iommu
, int type
,
918 u8 fault_reason
, u16 source_id
, unsigned long long addr
)
922 reason
= dmar_get_fault_reason(fault_reason
);
925 "DMAR:[%s] Request device [%02x:%02x.%d] "
927 "DMAR:[fault reason %02d] %s\n",
928 (type
? "DMA Read" : "DMA Write"),
929 (source_id
>> 8), PCI_SLOT(source_id
& 0xFF),
930 PCI_FUNC(source_id
& 0xFF), addr
, fault_reason
, reason
);
934 #define PRIMARY_FAULT_REG_LEN (16)
935 static irqreturn_t
iommu_page_fault(int irq
, void *dev_id
)
937 struct intel_iommu
*iommu
= dev_id
;
938 int reg
, fault_index
;
942 spin_lock_irqsave(&iommu
->register_lock
, flag
);
943 fault_status
= readl(iommu
->reg
+ DMAR_FSTS_REG
);
945 /* TBD: ignore advanced fault log currently */
946 if (!(fault_status
& DMA_FSTS_PPF
))
949 fault_index
= dma_fsts_fault_record_index(fault_status
);
950 reg
= cap_fault_reg_offset(iommu
->cap
);
958 /* highest 32 bits */
959 data
= readl(iommu
->reg
+ reg
+
960 fault_index
* PRIMARY_FAULT_REG_LEN
+ 12);
961 if (!(data
& DMA_FRCD_F
))
964 fault_reason
= dma_frcd_fault_reason(data
);
965 type
= dma_frcd_type(data
);
967 data
= readl(iommu
->reg
+ reg
+
968 fault_index
* PRIMARY_FAULT_REG_LEN
+ 8);
969 source_id
= dma_frcd_source_id(data
);
971 guest_addr
= dmar_readq(iommu
->reg
+ reg
+
972 fault_index
* PRIMARY_FAULT_REG_LEN
);
973 guest_addr
= dma_frcd_page_addr(guest_addr
);
974 /* clear the fault */
975 writel(DMA_FRCD_F
, iommu
->reg
+ reg
+
976 fault_index
* PRIMARY_FAULT_REG_LEN
+ 12);
978 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
980 iommu_page_fault_do_one(iommu
, type
, fault_reason
,
981 source_id
, guest_addr
);
984 if (fault_index
> cap_num_fault_regs(iommu
->cap
))
986 spin_lock_irqsave(&iommu
->register_lock
, flag
);
989 /* clear primary fault overflow */
990 fault_status
= readl(iommu
->reg
+ DMAR_FSTS_REG
);
991 if (fault_status
& DMA_FSTS_PFO
)
992 writel(DMA_FSTS_PFO
, iommu
->reg
+ DMAR_FSTS_REG
);
994 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
998 int dmar_set_interrupt(struct intel_iommu
*iommu
)
1004 printk(KERN_ERR
"IOMMU: no free vectors\n");
1008 set_irq_data(irq
, iommu
);
1011 ret
= arch_setup_dmar_msi(irq
);
1013 set_irq_data(irq
, NULL
);
1019 /* Force fault register is cleared */
1020 iommu_page_fault(irq
, iommu
);
1022 ret
= request_irq(irq
, iommu_page_fault
, 0, iommu
->name
, iommu
);
1024 printk(KERN_ERR
"IOMMU: can't request irq\n");
1028 static int iommu_init_domains(struct intel_iommu
*iommu
)
1030 unsigned long ndomains
;
1031 unsigned long nlongs
;
1033 ndomains
= cap_ndoms(iommu
->cap
);
1034 pr_debug("Number of Domains supportd <%ld>\n", ndomains
);
1035 nlongs
= BITS_TO_LONGS(ndomains
);
1037 /* TBD: there might be 64K domains,
1038 * consider other allocation for future chip
1040 iommu
->domain_ids
= kcalloc(nlongs
, sizeof(unsigned long), GFP_KERNEL
);
1041 if (!iommu
->domain_ids
) {
1042 printk(KERN_ERR
"Allocating domain id array failed\n");
1045 iommu
->domains
= kcalloc(ndomains
, sizeof(struct dmar_domain
*),
1047 if (!iommu
->domains
) {
1048 printk(KERN_ERR
"Allocating domain array failed\n");
1049 kfree(iommu
->domain_ids
);
1053 spin_lock_init(&iommu
->lock
);
1056 * if Caching mode is set, then invalid translations are tagged
1057 * with domainid 0. Hence we need to pre-allocate it.
1059 if (cap_caching_mode(iommu
->cap
))
1060 set_bit(0, iommu
->domain_ids
);
1065 static void domain_exit(struct dmar_domain
*domain
);
1067 void free_dmar_iommu(struct intel_iommu
*iommu
)
1069 struct dmar_domain
*domain
;
1072 i
= find_first_bit(iommu
->domain_ids
, cap_ndoms(iommu
->cap
));
1073 for (; i
< cap_ndoms(iommu
->cap
); ) {
1074 domain
= iommu
->domains
[i
];
1075 clear_bit(i
, iommu
->domain_ids
);
1076 domain_exit(domain
);
1077 i
= find_next_bit(iommu
->domain_ids
,
1078 cap_ndoms(iommu
->cap
), i
+1);
1081 if (iommu
->gcmd
& DMA_GCMD_TE
)
1082 iommu_disable_translation(iommu
);
1085 set_irq_data(iommu
->irq
, NULL
);
1086 /* This will mask the irq */
1087 free_irq(iommu
->irq
, iommu
);
1088 destroy_irq(iommu
->irq
);
1091 kfree(iommu
->domains
);
1092 kfree(iommu
->domain_ids
);
1094 /* free context mapping */
1095 free_context_table(iommu
);
1098 static struct dmar_domain
* iommu_alloc_domain(struct intel_iommu
*iommu
)
1101 unsigned long ndomains
;
1102 struct dmar_domain
*domain
;
1103 unsigned long flags
;
1105 domain
= alloc_domain_mem();
1109 ndomains
= cap_ndoms(iommu
->cap
);
1111 spin_lock_irqsave(&iommu
->lock
, flags
);
1112 num
= find_first_zero_bit(iommu
->domain_ids
, ndomains
);
1113 if (num
>= ndomains
) {
1114 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1115 free_domain_mem(domain
);
1116 printk(KERN_ERR
"IOMMU: no free domain ids\n");
1120 set_bit(num
, iommu
->domain_ids
);
1122 domain
->iommu
= iommu
;
1123 iommu
->domains
[num
] = domain
;
1124 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1129 static void iommu_free_domain(struct dmar_domain
*domain
)
1131 unsigned long flags
;
1133 spin_lock_irqsave(&domain
->iommu
->lock
, flags
);
1134 clear_bit(domain
->id
, domain
->iommu
->domain_ids
);
1135 spin_unlock_irqrestore(&domain
->iommu
->lock
, flags
);
1138 static struct iova_domain reserved_iova_list
;
1139 static struct lock_class_key reserved_alloc_key
;
1140 static struct lock_class_key reserved_rbtree_key
;
1142 static void dmar_init_reserved_ranges(void)
1144 struct pci_dev
*pdev
= NULL
;
1149 init_iova_domain(&reserved_iova_list
, DMA_32BIT_PFN
);
1151 lockdep_set_class(&reserved_iova_list
.iova_alloc_lock
,
1152 &reserved_alloc_key
);
1153 lockdep_set_class(&reserved_iova_list
.iova_rbtree_lock
,
1154 &reserved_rbtree_key
);
1156 /* IOAPIC ranges shouldn't be accessed by DMA */
1157 iova
= reserve_iova(&reserved_iova_list
, IOVA_PFN(IOAPIC_RANGE_START
),
1158 IOVA_PFN(IOAPIC_RANGE_END
));
1160 printk(KERN_ERR
"Reserve IOAPIC range failed\n");
1162 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1163 for_each_pci_dev(pdev
) {
1166 for (i
= 0; i
< PCI_NUM_RESOURCES
; i
++) {
1167 r
= &pdev
->resource
[i
];
1168 if (!r
->flags
|| !(r
->flags
& IORESOURCE_MEM
))
1172 size
= r
->end
- addr
;
1173 size
= PAGE_ALIGN(size
);
1174 iova
= reserve_iova(&reserved_iova_list
, IOVA_PFN(addr
),
1175 IOVA_PFN(size
+ addr
) - 1);
1177 printk(KERN_ERR
"Reserve iova failed\n");
1183 static void domain_reserve_special_ranges(struct dmar_domain
*domain
)
1185 copy_reserved_iova(&reserved_iova_list
, &domain
->iovad
);
1188 static inline int guestwidth_to_adjustwidth(int gaw
)
1191 int r
= (gaw
- 12) % 9;
1202 static int domain_init(struct dmar_domain
*domain
, int guest_width
)
1204 struct intel_iommu
*iommu
;
1205 int adjust_width
, agaw
;
1206 unsigned long sagaw
;
1208 init_iova_domain(&domain
->iovad
, DMA_32BIT_PFN
);
1209 spin_lock_init(&domain
->mapping_lock
);
1211 domain_reserve_special_ranges(domain
);
1213 /* calculate AGAW */
1214 iommu
= domain
->iommu
;
1215 if (guest_width
> cap_mgaw(iommu
->cap
))
1216 guest_width
= cap_mgaw(iommu
->cap
);
1217 domain
->gaw
= guest_width
;
1218 adjust_width
= guestwidth_to_adjustwidth(guest_width
);
1219 agaw
= width_to_agaw(adjust_width
);
1220 sagaw
= cap_sagaw(iommu
->cap
);
1221 if (!test_bit(agaw
, &sagaw
)) {
1222 /* hardware doesn't support it, choose a bigger one */
1223 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw
);
1224 agaw
= find_next_bit(&sagaw
, 5, agaw
);
1228 domain
->agaw
= agaw
;
1229 INIT_LIST_HEAD(&domain
->devices
);
1231 /* always allocate the top pgd */
1232 domain
->pgd
= (struct dma_pte
*)alloc_pgtable_page();
1235 __iommu_flush_cache(iommu
, domain
->pgd
, PAGE_SIZE
);
1239 static void domain_exit(struct dmar_domain
*domain
)
1243 /* Domain 0 is reserved, so dont process it */
1247 domain_remove_dev_info(domain
);
1249 put_iova_domain(&domain
->iovad
);
1250 end
= DOMAIN_MAX_ADDR(domain
->gaw
);
1251 end
= end
& (~PAGE_MASK
);
1254 dma_pte_clear_range(domain
, 0, end
);
1256 /* free page tables */
1257 dma_pte_free_pagetable(domain
, 0, end
);
1259 iommu_free_domain(domain
);
1260 free_domain_mem(domain
);
1263 static int domain_context_mapping_one(struct dmar_domain
*domain
,
1266 struct context_entry
*context
;
1267 struct intel_iommu
*iommu
= domain
->iommu
;
1268 unsigned long flags
;
1270 pr_debug("Set context mapping for %02x:%02x.%d\n",
1271 bus
, PCI_SLOT(devfn
), PCI_FUNC(devfn
));
1272 BUG_ON(!domain
->pgd
);
1273 context
= device_to_context_entry(iommu
, bus
, devfn
);
1276 spin_lock_irqsave(&iommu
->lock
, flags
);
1277 if (context_present(*context
)) {
1278 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1282 context_set_domain_id(*context
, domain
->id
);
1283 context_set_address_width(*context
, domain
->agaw
);
1284 context_set_address_root(*context
, virt_to_phys(domain
->pgd
));
1285 context_set_translation_type(*context
, CONTEXT_TT_MULTI_LEVEL
);
1286 context_set_fault_enable(*context
);
1287 context_set_present(*context
);
1288 __iommu_flush_cache(iommu
, context
, sizeof(*context
));
1290 /* it's a non-present to present mapping */
1291 if (iommu
->flush
.flush_context(iommu
, domain
->id
,
1292 (((u16
)bus
) << 8) | devfn
, DMA_CCMD_MASK_NOBIT
,
1293 DMA_CCMD_DEVICE_INVL
, 1))
1294 iommu_flush_write_buffer(iommu
);
1296 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0, DMA_TLB_DSI_FLUSH
, 0);
1298 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1303 domain_context_mapping(struct dmar_domain
*domain
, struct pci_dev
*pdev
)
1306 struct pci_dev
*tmp
, *parent
;
1308 ret
= domain_context_mapping_one(domain
, pdev
->bus
->number
,
1313 /* dependent device mapping */
1314 tmp
= pci_find_upstream_pcie_bridge(pdev
);
1317 /* Secondary interface's bus number and devfn 0 */
1318 parent
= pdev
->bus
->self
;
1319 while (parent
!= tmp
) {
1320 ret
= domain_context_mapping_one(domain
, parent
->bus
->number
,
1324 parent
= parent
->bus
->self
;
1326 if (tmp
->is_pcie
) /* this is a PCIE-to-PCI bridge */
1327 return domain_context_mapping_one(domain
,
1328 tmp
->subordinate
->number
, 0);
1329 else /* this is a legacy PCI bridge */
1330 return domain_context_mapping_one(domain
,
1331 tmp
->bus
->number
, tmp
->devfn
);
1334 static int domain_context_mapped(struct dmar_domain
*domain
,
1335 struct pci_dev
*pdev
)
1338 struct pci_dev
*tmp
, *parent
;
1340 ret
= device_context_mapped(domain
->iommu
,
1341 pdev
->bus
->number
, pdev
->devfn
);
1344 /* dependent device mapping */
1345 tmp
= pci_find_upstream_pcie_bridge(pdev
);
1348 /* Secondary interface's bus number and devfn 0 */
1349 parent
= pdev
->bus
->self
;
1350 while (parent
!= tmp
) {
1351 ret
= device_context_mapped(domain
->iommu
, parent
->bus
->number
,
1355 parent
= parent
->bus
->self
;
1358 return device_context_mapped(domain
->iommu
,
1359 tmp
->subordinate
->number
, 0);
1361 return device_context_mapped(domain
->iommu
,
1362 tmp
->bus
->number
, tmp
->devfn
);
1366 domain_page_mapping(struct dmar_domain
*domain
, dma_addr_t iova
,
1367 u64 hpa
, size_t size
, int prot
)
1369 u64 start_pfn
, end_pfn
;
1370 struct dma_pte
*pte
;
1372 int addr_width
= agaw_to_width(domain
->agaw
);
1374 hpa
&= (((u64
)1) << addr_width
) - 1;
1376 if ((prot
& (DMA_PTE_READ
|DMA_PTE_WRITE
)) == 0)
1379 start_pfn
= ((u64
)hpa
) >> VTD_PAGE_SHIFT
;
1380 end_pfn
= (VTD_PAGE_ALIGN(((u64
)hpa
) + size
)) >> VTD_PAGE_SHIFT
;
1382 while (start_pfn
< end_pfn
) {
1383 pte
= addr_to_dma_pte(domain
, iova
+ VTD_PAGE_SIZE
* index
);
1386 /* We don't need lock here, nobody else
1387 * touches the iova range
1389 BUG_ON(dma_pte_addr(*pte
));
1390 dma_set_pte_addr(*pte
, start_pfn
<< VTD_PAGE_SHIFT
);
1391 dma_set_pte_prot(*pte
, prot
);
1392 __iommu_flush_cache(domain
->iommu
, pte
, sizeof(*pte
));
1399 static void detach_domain_for_dev(struct dmar_domain
*domain
, u8 bus
, u8 devfn
)
1401 clear_context_table(domain
->iommu
, bus
, devfn
);
1402 domain
->iommu
->flush
.flush_context(domain
->iommu
, 0, 0, 0,
1403 DMA_CCMD_GLOBAL_INVL
, 0);
1404 domain
->iommu
->flush
.flush_iotlb(domain
->iommu
, 0, 0, 0,
1405 DMA_TLB_GLOBAL_FLUSH
, 0);
1408 static void domain_remove_dev_info(struct dmar_domain
*domain
)
1410 struct device_domain_info
*info
;
1411 unsigned long flags
;
1413 spin_lock_irqsave(&device_domain_lock
, flags
);
1414 while (!list_empty(&domain
->devices
)) {
1415 info
= list_entry(domain
->devices
.next
,
1416 struct device_domain_info
, link
);
1417 list_del(&info
->link
);
1418 list_del(&info
->global
);
1420 info
->dev
->dev
.archdata
.iommu
= NULL
;
1421 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1423 detach_domain_for_dev(info
->domain
, info
->bus
, info
->devfn
);
1424 free_devinfo_mem(info
);
1426 spin_lock_irqsave(&device_domain_lock
, flags
);
1428 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1433 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1435 static struct dmar_domain
*
1436 find_domain(struct pci_dev
*pdev
)
1438 struct device_domain_info
*info
;
1440 /* No lock here, assumes no domain exit in normal case */
1441 info
= pdev
->dev
.archdata
.iommu
;
1443 return info
->domain
;
1447 /* domain is initialized */
1448 static struct dmar_domain
*get_domain_for_dev(struct pci_dev
*pdev
, int gaw
)
1450 struct dmar_domain
*domain
, *found
= NULL
;
1451 struct intel_iommu
*iommu
;
1452 struct dmar_drhd_unit
*drhd
;
1453 struct device_domain_info
*info
, *tmp
;
1454 struct pci_dev
*dev_tmp
;
1455 unsigned long flags
;
1456 int bus
= 0, devfn
= 0;
1458 domain
= find_domain(pdev
);
1462 dev_tmp
= pci_find_upstream_pcie_bridge(pdev
);
1464 if (dev_tmp
->is_pcie
) {
1465 bus
= dev_tmp
->subordinate
->number
;
1468 bus
= dev_tmp
->bus
->number
;
1469 devfn
= dev_tmp
->devfn
;
1471 spin_lock_irqsave(&device_domain_lock
, flags
);
1472 list_for_each_entry(info
, &device_domain_list
, global
) {
1473 if (info
->bus
== bus
&& info
->devfn
== devfn
) {
1474 found
= info
->domain
;
1478 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1479 /* pcie-pci bridge already has a domain, uses it */
1486 /* Allocate new domain for the device */
1487 drhd
= dmar_find_matched_drhd_unit(pdev
);
1489 printk(KERN_ERR
"IOMMU: can't find DMAR for device %s\n",
1493 iommu
= drhd
->iommu
;
1495 domain
= iommu_alloc_domain(iommu
);
1499 if (domain_init(domain
, gaw
)) {
1500 domain_exit(domain
);
1504 /* register pcie-to-pci device */
1506 info
= alloc_devinfo_mem();
1508 domain_exit(domain
);
1512 info
->devfn
= devfn
;
1514 info
->domain
= domain
;
1515 /* This domain is shared by devices under p2p bridge */
1516 domain
->flags
|= DOMAIN_FLAG_MULTIPLE_DEVICES
;
1518 /* pcie-to-pci bridge already has a domain, uses it */
1520 spin_lock_irqsave(&device_domain_lock
, flags
);
1521 list_for_each_entry(tmp
, &device_domain_list
, global
) {
1522 if (tmp
->bus
== bus
&& tmp
->devfn
== devfn
) {
1523 found
= tmp
->domain
;
1528 free_devinfo_mem(info
);
1529 domain_exit(domain
);
1532 list_add(&info
->link
, &domain
->devices
);
1533 list_add(&info
->global
, &device_domain_list
);
1535 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1539 info
= alloc_devinfo_mem();
1542 info
->bus
= pdev
->bus
->number
;
1543 info
->devfn
= pdev
->devfn
;
1545 info
->domain
= domain
;
1546 spin_lock_irqsave(&device_domain_lock
, flags
);
1547 /* somebody is fast */
1548 found
= find_domain(pdev
);
1549 if (found
!= NULL
) {
1550 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1551 if (found
!= domain
) {
1552 domain_exit(domain
);
1555 free_devinfo_mem(info
);
1558 list_add(&info
->link
, &domain
->devices
);
1559 list_add(&info
->global
, &device_domain_list
);
1560 pdev
->dev
.archdata
.iommu
= info
;
1561 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1564 /* recheck it here, maybe others set it */
1565 return find_domain(pdev
);
1568 static int iommu_prepare_identity_map(struct pci_dev
*pdev
,
1569 unsigned long long start
,
1570 unsigned long long end
)
1572 struct dmar_domain
*domain
;
1574 unsigned long long base
;
1578 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1579 pci_name(pdev
), start
, end
);
1580 /* page table init */
1581 domain
= get_domain_for_dev(pdev
, DEFAULT_DOMAIN_ADDRESS_WIDTH
);
1585 /* The address might not be aligned */
1586 base
= start
& PAGE_MASK
;
1588 size
= PAGE_ALIGN(size
);
1589 if (!reserve_iova(&domain
->iovad
, IOVA_PFN(base
),
1590 IOVA_PFN(base
+ size
) - 1)) {
1591 printk(KERN_ERR
"IOMMU: reserve iova failed\n");
1596 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1597 size
, base
, pci_name(pdev
));
1599 * RMRR range might have overlap with physical memory range,
1602 dma_pte_clear_range(domain
, base
, base
+ size
);
1604 ret
= domain_page_mapping(domain
, base
, base
, size
,
1605 DMA_PTE_READ
|DMA_PTE_WRITE
);
1609 /* context entry init */
1610 ret
= domain_context_mapping(domain
, pdev
);
1614 domain_exit(domain
);
1619 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit
*rmrr
,
1620 struct pci_dev
*pdev
)
1622 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
1624 return iommu_prepare_identity_map(pdev
, rmrr
->base_address
,
1625 rmrr
->end_address
+ 1);
1628 #ifdef CONFIG_DMAR_GFX_WA
1629 struct iommu_prepare_data
{
1630 struct pci_dev
*pdev
;
1634 static int __init
iommu_prepare_work_fn(unsigned long start_pfn
,
1635 unsigned long end_pfn
, void *datax
)
1637 struct iommu_prepare_data
*data
;
1639 data
= (struct iommu_prepare_data
*)datax
;
1641 data
->ret
= iommu_prepare_identity_map(data
->pdev
,
1642 start_pfn
<<PAGE_SHIFT
, end_pfn
<<PAGE_SHIFT
);
1647 static int __init
iommu_prepare_with_active_regions(struct pci_dev
*pdev
)
1650 struct iommu_prepare_data data
;
1655 for_each_online_node(nid
) {
1656 work_with_active_regions(nid
, iommu_prepare_work_fn
, &data
);
1663 static void __init
iommu_prepare_gfx_mapping(void)
1665 struct pci_dev
*pdev
= NULL
;
1668 for_each_pci_dev(pdev
) {
1669 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
||
1670 !IS_GFX_DEVICE(pdev
))
1672 printk(KERN_INFO
"IOMMU: gfx device %s 1-1 mapping\n",
1674 ret
= iommu_prepare_with_active_regions(pdev
);
1676 printk(KERN_ERR
"IOMMU: mapping reserved region failed\n");
1681 #ifdef CONFIG_DMAR_FLOPPY_WA
1682 static inline void iommu_prepare_isa(void)
1684 struct pci_dev
*pdev
;
1687 pdev
= pci_get_class(PCI_CLASS_BRIDGE_ISA
<< 8, NULL
);
1691 printk(KERN_INFO
"IOMMU: Prepare 0-16M unity mapping for LPC\n");
1692 ret
= iommu_prepare_identity_map(pdev
, 0, 16*1024*1024);
1695 printk("IOMMU: Failed to create 0-64M identity map, "
1696 "floppy might not work\n");
1700 static inline void iommu_prepare_isa(void)
1704 #endif /* !CONFIG_DMAR_FLPY_WA */
1706 static int __init
init_dmars(void)
1708 struct dmar_drhd_unit
*drhd
;
1709 struct dmar_rmrr_unit
*rmrr
;
1710 struct pci_dev
*pdev
;
1711 struct intel_iommu
*iommu
;
1712 int i
, ret
, unit
= 0;
1717 * initialize and program root entry to not present
1720 for_each_drhd_unit(drhd
) {
1723 * lock not needed as this is only incremented in the single
1724 * threaded kernel __init code path all other access are read
1729 deferred_flush
= kzalloc(g_num_of_iommus
*
1730 sizeof(struct deferred_flush_tables
), GFP_KERNEL
);
1731 if (!deferred_flush
) {
1736 for_each_drhd_unit(drhd
) {
1740 iommu
= drhd
->iommu
;
1742 ret
= iommu_init_domains(iommu
);
1748 * we could share the same root & context tables
1749 * amoung all IOMMU's. Need to Split it later.
1751 ret
= iommu_alloc_root_entry(iommu
);
1753 printk(KERN_ERR
"IOMMU: allocate root entry failed\n");
1758 for_each_drhd_unit(drhd
) {
1762 iommu
= drhd
->iommu
;
1763 if (dmar_enable_qi(iommu
)) {
1765 * Queued Invalidate not enabled, use Register Based
1768 iommu
->flush
.flush_context
= __iommu_flush_context
;
1769 iommu
->flush
.flush_iotlb
= __iommu_flush_iotlb
;
1770 printk(KERN_INFO
"IOMMU 0x%Lx: using Register based "
1772 (unsigned long long)drhd
->reg_base_addr
);
1774 iommu
->flush
.flush_context
= qi_flush_context
;
1775 iommu
->flush
.flush_iotlb
= qi_flush_iotlb
;
1776 printk(KERN_INFO
"IOMMU 0x%Lx: using Queued "
1778 (unsigned long long)drhd
->reg_base_addr
);
1784 * for each dev attached to rmrr
1786 * locate drhd for dev, alloc domain for dev
1787 * allocate free domain
1788 * allocate page table entries for rmrr
1789 * if context not allocated for bus
1790 * allocate and init context
1791 * set present in root table for this bus
1792 * init context with domain, translation etc
1796 for_each_rmrr_units(rmrr
) {
1797 for (i
= 0; i
< rmrr
->devices_cnt
; i
++) {
1798 pdev
= rmrr
->devices
[i
];
1799 /* some BIOS lists non-exist devices in DMAR table */
1802 ret
= iommu_prepare_rmrr_dev(rmrr
, pdev
);
1805 "IOMMU: mapping reserved region failed\n");
1809 iommu_prepare_gfx_mapping();
1811 iommu_prepare_isa();
1816 * global invalidate context cache
1817 * global invalidate iotlb
1818 * enable translation
1820 for_each_drhd_unit(drhd
) {
1823 iommu
= drhd
->iommu
;
1824 sprintf (iommu
->name
, "dmar%d", unit
++);
1826 iommu_flush_write_buffer(iommu
);
1828 ret
= dmar_set_interrupt(iommu
);
1832 iommu_set_root_entry(iommu
);
1834 iommu
->flush
.flush_context(iommu
, 0, 0, 0, DMA_CCMD_GLOBAL_INVL
,
1836 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH
,
1838 iommu_disable_protect_mem_regions(iommu
);
1840 ret
= iommu_enable_translation(iommu
);
1847 for_each_drhd_unit(drhd
) {
1850 iommu
= drhd
->iommu
;
1856 static inline u64
aligned_size(u64 host_addr
, size_t size
)
1859 addr
= (host_addr
& (~PAGE_MASK
)) + size
;
1860 return PAGE_ALIGN(addr
);
1864 iommu_alloc_iova(struct dmar_domain
*domain
, size_t size
, u64 end
)
1868 /* Make sure it's in range */
1869 end
= min_t(u64
, DOMAIN_MAX_ADDR(domain
->gaw
), end
);
1870 if (!size
|| (IOVA_START_ADDR
+ size
> end
))
1873 piova
= alloc_iova(&domain
->iovad
,
1874 size
>> PAGE_SHIFT
, IOVA_PFN(end
), 1);
1878 static struct iova
*
1879 __intel_alloc_iova(struct device
*dev
, struct dmar_domain
*domain
,
1880 size_t size
, u64 dma_mask
)
1882 struct pci_dev
*pdev
= to_pci_dev(dev
);
1883 struct iova
*iova
= NULL
;
1885 if (dma_mask
<= DMA_32BIT_MASK
|| dmar_forcedac
)
1886 iova
= iommu_alloc_iova(domain
, size
, dma_mask
);
1889 * First try to allocate an io virtual address in
1890 * DMA_32BIT_MASK and if that fails then try allocating
1893 iova
= iommu_alloc_iova(domain
, size
, DMA_32BIT_MASK
);
1895 iova
= iommu_alloc_iova(domain
, size
, dma_mask
);
1899 printk(KERN_ERR
"Allocating iova for %s failed", pci_name(pdev
));
1906 static struct dmar_domain
*
1907 get_valid_domain_for_dev(struct pci_dev
*pdev
)
1909 struct dmar_domain
*domain
;
1912 domain
= get_domain_for_dev(pdev
,
1913 DEFAULT_DOMAIN_ADDRESS_WIDTH
);
1916 "Allocating domain for %s failed", pci_name(pdev
));
1920 /* make sure context mapping is ok */
1921 if (unlikely(!domain_context_mapped(domain
, pdev
))) {
1922 ret
= domain_context_mapping(domain
, pdev
);
1925 "Domain context map for %s failed",
1934 static dma_addr_t
__intel_map_single(struct device
*hwdev
, phys_addr_t paddr
,
1935 size_t size
, int dir
, u64 dma_mask
)
1937 struct pci_dev
*pdev
= to_pci_dev(hwdev
);
1938 struct dmar_domain
*domain
;
1939 phys_addr_t start_paddr
;
1944 BUG_ON(dir
== DMA_NONE
);
1945 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
1948 domain
= get_valid_domain_for_dev(pdev
);
1952 size
= aligned_size((u64
)paddr
, size
);
1954 iova
= __intel_alloc_iova(hwdev
, domain
, size
, pdev
->dma_mask
);
1958 start_paddr
= (phys_addr_t
)iova
->pfn_lo
<< PAGE_SHIFT
;
1961 * Check if DMAR supports zero-length reads on write only
1964 if (dir
== DMA_TO_DEVICE
|| dir
== DMA_BIDIRECTIONAL
|| \
1965 !cap_zlr(domain
->iommu
->cap
))
1966 prot
|= DMA_PTE_READ
;
1967 if (dir
== DMA_FROM_DEVICE
|| dir
== DMA_BIDIRECTIONAL
)
1968 prot
|= DMA_PTE_WRITE
;
1970 * paddr - (paddr + size) might be partial page, we should map the whole
1971 * page. Note: if two part of one page are separately mapped, we
1972 * might have two guest_addr mapping to the same host paddr, but this
1973 * is not a big problem
1975 ret
= domain_page_mapping(domain
, start_paddr
,
1976 ((u64
)paddr
) & PAGE_MASK
, size
, prot
);
1980 /* it's a non-present to present mapping */
1981 ret
= iommu_flush_iotlb_psi(domain
->iommu
, domain
->id
,
1982 start_paddr
, size
>> VTD_PAGE_SHIFT
, 1);
1984 iommu_flush_write_buffer(domain
->iommu
);
1986 return start_paddr
+ ((u64
)paddr
& (~PAGE_MASK
));
1990 __free_iova(&domain
->iovad
, iova
);
1991 printk(KERN_ERR
"Device %s request: %lx@%llx dir %d --- failed\n",
1992 pci_name(pdev
), size
, (unsigned long long)paddr
, dir
);
1996 dma_addr_t
intel_map_single(struct device
*hwdev
, phys_addr_t paddr
,
1997 size_t size
, int dir
)
1999 return __intel_map_single(hwdev
, paddr
, size
, dir
,
2000 to_pci_dev(hwdev
)->dma_mask
);
2003 static void flush_unmaps(void)
2009 /* just flush them all */
2010 for (i
= 0; i
< g_num_of_iommus
; i
++) {
2011 if (deferred_flush
[i
].next
) {
2012 struct intel_iommu
*iommu
=
2013 deferred_flush
[i
].domain
[0]->iommu
;
2015 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0,
2016 DMA_TLB_GLOBAL_FLUSH
, 0);
2017 for (j
= 0; j
< deferred_flush
[i
].next
; j
++) {
2018 __free_iova(&deferred_flush
[i
].domain
[j
]->iovad
,
2019 deferred_flush
[i
].iova
[j
]);
2021 deferred_flush
[i
].next
= 0;
2028 static void flush_unmaps_timeout(unsigned long data
)
2030 unsigned long flags
;
2032 spin_lock_irqsave(&async_umap_flush_lock
, flags
);
2034 spin_unlock_irqrestore(&async_umap_flush_lock
, flags
);
2037 static void add_unmap(struct dmar_domain
*dom
, struct iova
*iova
)
2039 unsigned long flags
;
2042 spin_lock_irqsave(&async_umap_flush_lock
, flags
);
2043 if (list_size
== HIGH_WATER_MARK
)
2046 iommu_id
= dom
->iommu
->seq_id
;
2048 next
= deferred_flush
[iommu_id
].next
;
2049 deferred_flush
[iommu_id
].domain
[next
] = dom
;
2050 deferred_flush
[iommu_id
].iova
[next
] = iova
;
2051 deferred_flush
[iommu_id
].next
++;
2054 mod_timer(&unmap_timer
, jiffies
+ msecs_to_jiffies(10));
2058 spin_unlock_irqrestore(&async_umap_flush_lock
, flags
);
2061 void intel_unmap_single(struct device
*dev
, dma_addr_t dev_addr
, size_t size
,
2064 struct pci_dev
*pdev
= to_pci_dev(dev
);
2065 struct dmar_domain
*domain
;
2066 unsigned long start_addr
;
2069 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
2071 domain
= find_domain(pdev
);
2074 iova
= find_iova(&domain
->iovad
, IOVA_PFN(dev_addr
));
2078 start_addr
= iova
->pfn_lo
<< PAGE_SHIFT
;
2079 size
= aligned_size((u64
)dev_addr
, size
);
2081 pr_debug("Device %s unmapping: %lx@%llx\n",
2082 pci_name(pdev
), size
, (unsigned long long)start_addr
);
2084 /* clear the whole page */
2085 dma_pte_clear_range(domain
, start_addr
, start_addr
+ size
);
2086 /* free page tables */
2087 dma_pte_free_pagetable(domain
, start_addr
, start_addr
+ size
);
2088 if (intel_iommu_strict
) {
2089 if (iommu_flush_iotlb_psi(domain
->iommu
,
2090 domain
->id
, start_addr
, size
>> VTD_PAGE_SHIFT
, 0))
2091 iommu_flush_write_buffer(domain
->iommu
);
2093 __free_iova(&domain
->iovad
, iova
);
2095 add_unmap(domain
, iova
);
2097 * queue up the release of the unmap to save the 1/6th of the
2098 * cpu used up by the iotlb flush operation...
2103 void *intel_alloc_coherent(struct device
*hwdev
, size_t size
,
2104 dma_addr_t
*dma_handle
, gfp_t flags
)
2109 size
= PAGE_ALIGN(size
);
2110 order
= get_order(size
);
2111 flags
&= ~(GFP_DMA
| GFP_DMA32
);
2113 vaddr
= (void *)__get_free_pages(flags
, order
);
2116 memset(vaddr
, 0, size
);
2118 *dma_handle
= __intel_map_single(hwdev
, virt_to_bus(vaddr
), size
,
2120 hwdev
->coherent_dma_mask
);
2123 free_pages((unsigned long)vaddr
, order
);
2127 void intel_free_coherent(struct device
*hwdev
, size_t size
, void *vaddr
,
2128 dma_addr_t dma_handle
)
2132 size
= PAGE_ALIGN(size
);
2133 order
= get_order(size
);
2135 intel_unmap_single(hwdev
, dma_handle
, size
, DMA_BIDIRECTIONAL
);
2136 free_pages((unsigned long)vaddr
, order
);
2139 #define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
2141 void intel_unmap_sg(struct device
*hwdev
, struct scatterlist
*sglist
,
2142 int nelems
, int dir
)
2145 struct pci_dev
*pdev
= to_pci_dev(hwdev
);
2146 struct dmar_domain
*domain
;
2147 unsigned long start_addr
;
2151 struct scatterlist
*sg
;
2153 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
2156 domain
= find_domain(pdev
);
2158 iova
= find_iova(&domain
->iovad
, IOVA_PFN(sglist
[0].dma_address
));
2161 for_each_sg(sglist
, sg
, nelems
, i
) {
2162 addr
= SG_ENT_VIRT_ADDRESS(sg
);
2163 size
+= aligned_size((u64
)addr
, sg
->length
);
2166 start_addr
= iova
->pfn_lo
<< PAGE_SHIFT
;
2168 /* clear the whole page */
2169 dma_pte_clear_range(domain
, start_addr
, start_addr
+ size
);
2170 /* free page tables */
2171 dma_pte_free_pagetable(domain
, start_addr
, start_addr
+ size
);
2173 if (iommu_flush_iotlb_psi(domain
->iommu
, domain
->id
, start_addr
,
2174 size
>> VTD_PAGE_SHIFT
, 0))
2175 iommu_flush_write_buffer(domain
->iommu
);
2178 __free_iova(&domain
->iovad
, iova
);
2181 static int intel_nontranslate_map_sg(struct device
*hddev
,
2182 struct scatterlist
*sglist
, int nelems
, int dir
)
2185 struct scatterlist
*sg
;
2187 for_each_sg(sglist
, sg
, nelems
, i
) {
2188 BUG_ON(!sg_page(sg
));
2189 sg
->dma_address
= virt_to_bus(SG_ENT_VIRT_ADDRESS(sg
));
2190 sg
->dma_length
= sg
->length
;
2195 int intel_map_sg(struct device
*hwdev
, struct scatterlist
*sglist
, int nelems
,
2200 struct pci_dev
*pdev
= to_pci_dev(hwdev
);
2201 struct dmar_domain
*domain
;
2205 struct iova
*iova
= NULL
;
2207 struct scatterlist
*sg
;
2208 unsigned long start_addr
;
2210 BUG_ON(dir
== DMA_NONE
);
2211 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
2212 return intel_nontranslate_map_sg(hwdev
, sglist
, nelems
, dir
);
2214 domain
= get_valid_domain_for_dev(pdev
);
2218 for_each_sg(sglist
, sg
, nelems
, i
) {
2219 addr
= SG_ENT_VIRT_ADDRESS(sg
);
2220 addr
= (void *)virt_to_phys(addr
);
2221 size
+= aligned_size((u64
)addr
, sg
->length
);
2224 iova
= __intel_alloc_iova(hwdev
, domain
, size
, pdev
->dma_mask
);
2226 sglist
->dma_length
= 0;
2231 * Check if DMAR supports zero-length reads on write only
2234 if (dir
== DMA_TO_DEVICE
|| dir
== DMA_BIDIRECTIONAL
|| \
2235 !cap_zlr(domain
->iommu
->cap
))
2236 prot
|= DMA_PTE_READ
;
2237 if (dir
== DMA_FROM_DEVICE
|| dir
== DMA_BIDIRECTIONAL
)
2238 prot
|= DMA_PTE_WRITE
;
2240 start_addr
= iova
->pfn_lo
<< PAGE_SHIFT
;
2242 for_each_sg(sglist
, sg
, nelems
, i
) {
2243 addr
= SG_ENT_VIRT_ADDRESS(sg
);
2244 addr
= (void *)virt_to_phys(addr
);
2245 size
= aligned_size((u64
)addr
, sg
->length
);
2246 ret
= domain_page_mapping(domain
, start_addr
+ offset
,
2247 ((u64
)addr
) & PAGE_MASK
,
2250 /* clear the page */
2251 dma_pte_clear_range(domain
, start_addr
,
2252 start_addr
+ offset
);
2253 /* free page tables */
2254 dma_pte_free_pagetable(domain
, start_addr
,
2255 start_addr
+ offset
);
2257 __free_iova(&domain
->iovad
, iova
);
2260 sg
->dma_address
= start_addr
+ offset
+
2261 ((u64
)addr
& (~PAGE_MASK
));
2262 sg
->dma_length
= sg
->length
;
2266 /* it's a non-present to present mapping */
2267 if (iommu_flush_iotlb_psi(domain
->iommu
, domain
->id
,
2268 start_addr
, offset
>> VTD_PAGE_SHIFT
, 1))
2269 iommu_flush_write_buffer(domain
->iommu
);
2273 static struct dma_mapping_ops intel_dma_ops
= {
2274 .alloc_coherent
= intel_alloc_coherent
,
2275 .free_coherent
= intel_free_coherent
,
2276 .map_single
= intel_map_single
,
2277 .unmap_single
= intel_unmap_single
,
2278 .map_sg
= intel_map_sg
,
2279 .unmap_sg
= intel_unmap_sg
,
2282 static inline int iommu_domain_cache_init(void)
2286 iommu_domain_cache
= kmem_cache_create("iommu_domain",
2287 sizeof(struct dmar_domain
),
2292 if (!iommu_domain_cache
) {
2293 printk(KERN_ERR
"Couldn't create iommu_domain cache\n");
2300 static inline int iommu_devinfo_cache_init(void)
2304 iommu_devinfo_cache
= kmem_cache_create("iommu_devinfo",
2305 sizeof(struct device_domain_info
),
2309 if (!iommu_devinfo_cache
) {
2310 printk(KERN_ERR
"Couldn't create devinfo cache\n");
2317 static inline int iommu_iova_cache_init(void)
2321 iommu_iova_cache
= kmem_cache_create("iommu_iova",
2322 sizeof(struct iova
),
2326 if (!iommu_iova_cache
) {
2327 printk(KERN_ERR
"Couldn't create iova cache\n");
2334 static int __init
iommu_init_mempool(void)
2337 ret
= iommu_iova_cache_init();
2341 ret
= iommu_domain_cache_init();
2345 ret
= iommu_devinfo_cache_init();
2349 kmem_cache_destroy(iommu_domain_cache
);
2351 kmem_cache_destroy(iommu_iova_cache
);
2356 static void __init
iommu_exit_mempool(void)
2358 kmem_cache_destroy(iommu_devinfo_cache
);
2359 kmem_cache_destroy(iommu_domain_cache
);
2360 kmem_cache_destroy(iommu_iova_cache
);
2364 static void __init
init_no_remapping_devices(void)
2366 struct dmar_drhd_unit
*drhd
;
2368 for_each_drhd_unit(drhd
) {
2369 if (!drhd
->include_all
) {
2371 for (i
= 0; i
< drhd
->devices_cnt
; i
++)
2372 if (drhd
->devices
[i
] != NULL
)
2374 /* ignore DMAR unit if no pci devices exist */
2375 if (i
== drhd
->devices_cnt
)
2383 for_each_drhd_unit(drhd
) {
2385 if (drhd
->ignored
|| drhd
->include_all
)
2388 for (i
= 0; i
< drhd
->devices_cnt
; i
++)
2389 if (drhd
->devices
[i
] &&
2390 !IS_GFX_DEVICE(drhd
->devices
[i
]))
2393 if (i
< drhd
->devices_cnt
)
2396 /* bypass IOMMU if it is just for gfx devices */
2398 for (i
= 0; i
< drhd
->devices_cnt
; i
++) {
2399 if (!drhd
->devices
[i
])
2401 drhd
->devices
[i
]->dev
.archdata
.iommu
= DUMMY_DEVICE_DOMAIN_INFO
;
2406 int __init
intel_iommu_init(void)
2410 if (dmar_table_init())
2413 if (dmar_dev_scope_init())
2417 * Check the need for DMA-remapping initialization now.
2418 * Above initialization will also be used by Interrupt-remapping.
2420 if (no_iommu
|| swiotlb
|| dmar_disabled
)
2423 iommu_init_mempool();
2424 dmar_init_reserved_ranges();
2426 init_no_remapping_devices();
2430 printk(KERN_ERR
"IOMMU: dmar init failed\n");
2431 put_iova_domain(&reserved_iova_list
);
2432 iommu_exit_mempool();
2436 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2438 init_timer(&unmap_timer
);
2440 dma_ops
= &intel_dma_ops
;
2444 void intel_iommu_domain_exit(struct dmar_domain
*domain
)
2448 /* Domain 0 is reserved, so dont process it */
2452 end
= DOMAIN_MAX_ADDR(domain
->gaw
);
2453 end
= end
& (~VTD_PAGE_MASK
);
2456 dma_pte_clear_range(domain
, 0, end
);
2458 /* free page tables */
2459 dma_pte_free_pagetable(domain
, 0, end
);
2461 iommu_free_domain(domain
);
2462 free_domain_mem(domain
);
2464 EXPORT_SYMBOL_GPL(intel_iommu_domain_exit
);
2466 struct dmar_domain
*intel_iommu_domain_alloc(struct pci_dev
*pdev
)
2468 struct dmar_drhd_unit
*drhd
;
2469 struct dmar_domain
*domain
;
2470 struct intel_iommu
*iommu
;
2472 drhd
= dmar_find_matched_drhd_unit(pdev
);
2474 printk(KERN_ERR
"intel_iommu_domain_alloc: drhd == NULL\n");
2478 iommu
= drhd
->iommu
;
2481 "intel_iommu_domain_alloc: iommu == NULL\n");
2484 domain
= iommu_alloc_domain(iommu
);
2487 "intel_iommu_domain_alloc: domain == NULL\n");
2490 if (domain_init(domain
, DEFAULT_DOMAIN_ADDRESS_WIDTH
)) {
2492 "intel_iommu_domain_alloc: domain_init() failed\n");
2493 intel_iommu_domain_exit(domain
);
2498 EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc
);
2500 int intel_iommu_context_mapping(
2501 struct dmar_domain
*domain
, struct pci_dev
*pdev
)
2504 rc
= domain_context_mapping(domain
, pdev
);
2507 EXPORT_SYMBOL_GPL(intel_iommu_context_mapping
);
2509 int intel_iommu_page_mapping(
2510 struct dmar_domain
*domain
, dma_addr_t iova
,
2511 u64 hpa
, size_t size
, int prot
)
2514 rc
= domain_page_mapping(domain
, iova
, hpa
, size
, prot
);
2517 EXPORT_SYMBOL_GPL(intel_iommu_page_mapping
);
2519 void intel_iommu_detach_dev(struct dmar_domain
*domain
, u8 bus
, u8 devfn
)
2521 detach_domain_for_dev(domain
, bus
, devfn
);
2523 EXPORT_SYMBOL_GPL(intel_iommu_detach_dev
);
2525 struct dmar_domain
*
2526 intel_iommu_find_domain(struct pci_dev
*pdev
)
2528 return find_domain(pdev
);
2530 EXPORT_SYMBOL_GPL(intel_iommu_find_domain
);
2532 int intel_iommu_found(void)
2534 return g_num_of_iommus
;
2536 EXPORT_SYMBOL_GPL(intel_iommu_found
);
2538 u64
intel_iommu_iova_to_pfn(struct dmar_domain
*domain
, u64 iova
)
2540 struct dma_pte
*pte
;
2544 pte
= addr_to_dma_pte(domain
, iova
);
2547 pfn
= dma_pte_addr(*pte
);
2549 return pfn
>> VTD_PAGE_SHIFT
;
2551 EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn
);