2 * Copyright (c) 2006, Intel Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21 * Author: Fenghua Yu <fenghua.yu@intel.com>
24 #include <linux/init.h>
25 #include <linux/bitmap.h>
26 #include <linux/debugfs.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/timer.h>
36 #include <linux/iova.h>
37 #include <linux/intel-iommu.h>
38 #include <asm/cacheflush.h>
39 #include <asm/iommu.h>
42 #define ROOT_SIZE VTD_PAGE_SIZE
43 #define CONTEXT_SIZE VTD_PAGE_SIZE
45 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
46 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
48 #define IOAPIC_RANGE_START (0xfee00000)
49 #define IOAPIC_RANGE_END (0xfeefffff)
50 #define IOVA_START_ADDR (0x1000)
52 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
54 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
56 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
57 #define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
58 #define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
60 /* global iommu list, set NULL for ignored DMAR units */
61 static struct intel_iommu
**g_iommus
;
66 * 12-63: Context Ptr (12 - (haw-1))
73 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
74 static inline bool root_present(struct root_entry
*root
)
76 return (root
->val
& 1);
78 static inline void set_root_present(struct root_entry
*root
)
82 static inline void set_root_value(struct root_entry
*root
, unsigned long value
)
84 root
->val
|= value
& VTD_PAGE_MASK
;
87 static inline struct context_entry
*
88 get_context_addr_from_root(struct root_entry
*root
)
90 return (struct context_entry
*)
91 (root_present(root
)?phys_to_virt(
92 root
->val
& VTD_PAGE_MASK
) :
99 * 1: fault processing disable
100 * 2-3: translation type
101 * 12-63: address space root
107 struct context_entry
{
112 static inline bool context_present(struct context_entry
*context
)
114 return (context
->lo
& 1);
116 static inline void context_set_present(struct context_entry
*context
)
121 static inline void context_set_fault_enable(struct context_entry
*context
)
123 context
->lo
&= (((u64
)-1) << 2) | 1;
126 #define CONTEXT_TT_MULTI_LEVEL 0
128 static inline void context_set_translation_type(struct context_entry
*context
,
131 context
->lo
&= (((u64
)-1) << 4) | 3;
132 context
->lo
|= (value
& 3) << 2;
135 static inline void context_set_address_root(struct context_entry
*context
,
138 context
->lo
|= value
& VTD_PAGE_MASK
;
141 static inline void context_set_address_width(struct context_entry
*context
,
144 context
->hi
|= value
& 7;
147 static inline void context_set_domain_id(struct context_entry
*context
,
150 context
->hi
|= (value
& ((1 << 16) - 1)) << 8;
153 static inline void context_clear_entry(struct context_entry
*context
)
165 * 12-63: Host physcial address
171 static inline void dma_clear_pte(struct dma_pte
*pte
)
176 static inline void dma_set_pte_readable(struct dma_pte
*pte
)
178 pte
->val
|= DMA_PTE_READ
;
181 static inline void dma_set_pte_writable(struct dma_pte
*pte
)
183 pte
->val
|= DMA_PTE_WRITE
;
186 static inline void dma_set_pte_prot(struct dma_pte
*pte
, unsigned long prot
)
188 pte
->val
= (pte
->val
& ~3) | (prot
& 3);
191 static inline u64
dma_pte_addr(struct dma_pte
*pte
)
193 return (pte
->val
& VTD_PAGE_MASK
);
196 static inline void dma_set_pte_addr(struct dma_pte
*pte
, u64 addr
)
198 pte
->val
|= (addr
& VTD_PAGE_MASK
);
201 static inline bool dma_pte_present(struct dma_pte
*pte
)
203 return (pte
->val
& 3) != 0;
206 /* devices under the same p2p bridge are owned in one domain */
207 #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 < 0)
209 /* domain represents a virtual machine, more than one devices
210 * across iommus may be owned in one domain, e.g. kvm guest.
212 #define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
215 int id
; /* domain id */
216 unsigned long iommu_bmp
; /* bitmap of iommus this domain uses*/
218 struct list_head devices
; /* all devices' list */
219 struct iova_domain iovad
; /* iova's that belong to this domain */
221 struct dma_pte
*pgd
; /* virtual address */
222 spinlock_t mapping_lock
; /* page table lock */
223 int gaw
; /* max guest address width */
225 /* adjusted guest address width, 0 is level 2 30-bit */
228 int flags
; /* flags to find out type of domain */
230 int iommu_coherency
;/* indicate coherency of iommu access */
231 int iommu_count
; /* reference count of iommu */
232 spinlock_t iommu_lock
; /* protect iommu set in domain */
235 /* PCI domain-device relationship */
236 struct device_domain_info
{
237 struct list_head link
; /* link to domain siblings */
238 struct list_head global
; /* link to global list */
239 u8 bus
; /* PCI bus numer */
240 u8 devfn
; /* PCI devfn number */
241 struct pci_dev
*dev
; /* it's NULL for PCIE-to-PCI bridge */
242 struct dmar_domain
*domain
; /* pointer to domain */
245 static void flush_unmaps_timeout(unsigned long data
);
247 DEFINE_TIMER(unmap_timer
, flush_unmaps_timeout
, 0, 0);
249 #define HIGH_WATER_MARK 250
250 struct deferred_flush_tables
{
252 struct iova
*iova
[HIGH_WATER_MARK
];
253 struct dmar_domain
*domain
[HIGH_WATER_MARK
];
256 static struct deferred_flush_tables
*deferred_flush
;
258 /* bitmap for indexing intel_iommus */
259 static int g_num_of_iommus
;
261 static DEFINE_SPINLOCK(async_umap_flush_lock
);
262 static LIST_HEAD(unmaps_to_do
);
265 static long list_size
;
267 static void domain_remove_dev_info(struct dmar_domain
*domain
);
270 static int __initdata dmar_map_gfx
= 1;
271 static int dmar_forcedac
;
272 static int intel_iommu_strict
;
274 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
275 static DEFINE_SPINLOCK(device_domain_lock
);
276 static LIST_HEAD(device_domain_list
);
278 static int __init
intel_iommu_setup(char *str
)
283 if (!strncmp(str
, "off", 3)) {
285 printk(KERN_INFO
"Intel-IOMMU: disabled\n");
286 } else if (!strncmp(str
, "igfx_off", 8)) {
289 "Intel-IOMMU: disable GFX device mapping\n");
290 } else if (!strncmp(str
, "forcedac", 8)) {
292 "Intel-IOMMU: Forcing DAC for PCI devices\n");
294 } else if (!strncmp(str
, "strict", 6)) {
296 "Intel-IOMMU: disable batched IOTLB flush\n");
297 intel_iommu_strict
= 1;
300 str
+= strcspn(str
, ",");
306 __setup("intel_iommu=", intel_iommu_setup
);
308 static struct kmem_cache
*iommu_domain_cache
;
309 static struct kmem_cache
*iommu_devinfo_cache
;
310 static struct kmem_cache
*iommu_iova_cache
;
312 static inline void *iommu_kmem_cache_alloc(struct kmem_cache
*cachep
)
317 /* trying to avoid low memory issues */
318 flags
= current
->flags
& PF_MEMALLOC
;
319 current
->flags
|= PF_MEMALLOC
;
320 vaddr
= kmem_cache_alloc(cachep
, GFP_ATOMIC
);
321 current
->flags
&= (~PF_MEMALLOC
| flags
);
326 static inline void *alloc_pgtable_page(void)
331 /* trying to avoid low memory issues */
332 flags
= current
->flags
& PF_MEMALLOC
;
333 current
->flags
|= PF_MEMALLOC
;
334 vaddr
= (void *)get_zeroed_page(GFP_ATOMIC
);
335 current
->flags
&= (~PF_MEMALLOC
| flags
);
339 static inline void free_pgtable_page(void *vaddr
)
341 free_page((unsigned long)vaddr
);
344 static inline void *alloc_domain_mem(void)
346 return iommu_kmem_cache_alloc(iommu_domain_cache
);
349 static void free_domain_mem(void *vaddr
)
351 kmem_cache_free(iommu_domain_cache
, vaddr
);
354 static inline void * alloc_devinfo_mem(void)
356 return iommu_kmem_cache_alloc(iommu_devinfo_cache
);
359 static inline void free_devinfo_mem(void *vaddr
)
361 kmem_cache_free(iommu_devinfo_cache
, vaddr
);
364 struct iova
*alloc_iova_mem(void)
366 return iommu_kmem_cache_alloc(iommu_iova_cache
);
369 void free_iova_mem(struct iova
*iova
)
371 kmem_cache_free(iommu_iova_cache
, iova
);
375 static inline int width_to_agaw(int width
);
377 /* calculate agaw for each iommu.
378 * "SAGAW" may be different across iommus, use a default agaw, and
379 * get a supported less agaw for iommus that don't support the default agaw.
381 int iommu_calculate_agaw(struct intel_iommu
*iommu
)
386 sagaw
= cap_sagaw(iommu
->cap
);
387 for (agaw
= width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH
);
389 if (test_bit(agaw
, &sagaw
))
396 /* in native case, each domain is related to only one iommu */
397 static struct intel_iommu
*domain_get_iommu(struct dmar_domain
*domain
)
401 BUG_ON(domain
->flags
& DOMAIN_FLAG_VIRTUAL_MACHINE
);
403 iommu_id
= find_first_bit(&domain
->iommu_bmp
, g_num_of_iommus
);
404 if (iommu_id
< 0 || iommu_id
>= g_num_of_iommus
)
407 return g_iommus
[iommu_id
];
410 /* "Coherency" capability may be different across iommus */
411 static void domain_update_iommu_coherency(struct dmar_domain
*domain
)
415 domain
->iommu_coherency
= 1;
417 i
= find_first_bit(&domain
->iommu_bmp
, g_num_of_iommus
);
418 for (; i
< g_num_of_iommus
; ) {
419 if (!ecap_coherent(g_iommus
[i
]->ecap
)) {
420 domain
->iommu_coherency
= 0;
423 i
= find_next_bit(&domain
->iommu_bmp
, g_num_of_iommus
, i
+1);
427 static struct intel_iommu
*device_to_iommu(u8 bus
, u8 devfn
)
429 struct dmar_drhd_unit
*drhd
= NULL
;
432 for_each_drhd_unit(drhd
) {
436 for (i
= 0; i
< drhd
->devices_cnt
; i
++)
437 if (drhd
->devices
[i
]->bus
->number
== bus
&&
438 drhd
->devices
[i
]->devfn
== devfn
)
441 if (drhd
->include_all
)
448 static void domain_flush_cache(struct dmar_domain
*domain
,
449 void *addr
, int size
)
451 if (!domain
->iommu_coherency
)
452 clflush_cache_range(addr
, size
);
455 /* Gets context entry for a given bus and devfn */
456 static struct context_entry
* device_to_context_entry(struct intel_iommu
*iommu
,
459 struct root_entry
*root
;
460 struct context_entry
*context
;
461 unsigned long phy_addr
;
464 spin_lock_irqsave(&iommu
->lock
, flags
);
465 root
= &iommu
->root_entry
[bus
];
466 context
= get_context_addr_from_root(root
);
468 context
= (struct context_entry
*)alloc_pgtable_page();
470 spin_unlock_irqrestore(&iommu
->lock
, flags
);
473 __iommu_flush_cache(iommu
, (void *)context
, CONTEXT_SIZE
);
474 phy_addr
= virt_to_phys((void *)context
);
475 set_root_value(root
, phy_addr
);
476 set_root_present(root
);
477 __iommu_flush_cache(iommu
, root
, sizeof(*root
));
479 spin_unlock_irqrestore(&iommu
->lock
, flags
);
480 return &context
[devfn
];
483 static int device_context_mapped(struct intel_iommu
*iommu
, u8 bus
, u8 devfn
)
485 struct root_entry
*root
;
486 struct context_entry
*context
;
490 spin_lock_irqsave(&iommu
->lock
, flags
);
491 root
= &iommu
->root_entry
[bus
];
492 context
= get_context_addr_from_root(root
);
497 ret
= context_present(&context
[devfn
]);
499 spin_unlock_irqrestore(&iommu
->lock
, flags
);
503 static void clear_context_table(struct intel_iommu
*iommu
, u8 bus
, u8 devfn
)
505 struct root_entry
*root
;
506 struct context_entry
*context
;
509 spin_lock_irqsave(&iommu
->lock
, flags
);
510 root
= &iommu
->root_entry
[bus
];
511 context
= get_context_addr_from_root(root
);
513 context_clear_entry(&context
[devfn
]);
514 __iommu_flush_cache(iommu
, &context
[devfn
], \
517 spin_unlock_irqrestore(&iommu
->lock
, flags
);
520 static void free_context_table(struct intel_iommu
*iommu
)
522 struct root_entry
*root
;
525 struct context_entry
*context
;
527 spin_lock_irqsave(&iommu
->lock
, flags
);
528 if (!iommu
->root_entry
) {
531 for (i
= 0; i
< ROOT_ENTRY_NR
; i
++) {
532 root
= &iommu
->root_entry
[i
];
533 context
= get_context_addr_from_root(root
);
535 free_pgtable_page(context
);
537 free_pgtable_page(iommu
->root_entry
);
538 iommu
->root_entry
= NULL
;
540 spin_unlock_irqrestore(&iommu
->lock
, flags
);
543 /* page table handling */
544 #define LEVEL_STRIDE (9)
545 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
547 static inline int agaw_to_level(int agaw
)
552 static inline int agaw_to_width(int agaw
)
554 return 30 + agaw
* LEVEL_STRIDE
;
558 static inline int width_to_agaw(int width
)
560 return (width
- 30) / LEVEL_STRIDE
;
563 static inline unsigned int level_to_offset_bits(int level
)
565 return (12 + (level
- 1) * LEVEL_STRIDE
);
568 static inline int address_level_offset(u64 addr
, int level
)
570 return ((addr
>> level_to_offset_bits(level
)) & LEVEL_MASK
);
573 static inline u64
level_mask(int level
)
575 return ((u64
)-1 << level_to_offset_bits(level
));
578 static inline u64
level_size(int level
)
580 return ((u64
)1 << level_to_offset_bits(level
));
583 static inline u64
align_to_level(u64 addr
, int level
)
585 return ((addr
+ level_size(level
) - 1) & level_mask(level
));
588 static struct dma_pte
* addr_to_dma_pte(struct dmar_domain
*domain
, u64 addr
)
590 int addr_width
= agaw_to_width(domain
->agaw
);
591 struct dma_pte
*parent
, *pte
= NULL
;
592 int level
= agaw_to_level(domain
->agaw
);
596 BUG_ON(!domain
->pgd
);
598 addr
&= (((u64
)1) << addr_width
) - 1;
599 parent
= domain
->pgd
;
601 spin_lock_irqsave(&domain
->mapping_lock
, flags
);
605 offset
= address_level_offset(addr
, level
);
606 pte
= &parent
[offset
];
610 if (!dma_pte_present(pte
)) {
611 tmp_page
= alloc_pgtable_page();
614 spin_unlock_irqrestore(&domain
->mapping_lock
,
618 domain_flush_cache(domain
, tmp_page
, PAGE_SIZE
);
619 dma_set_pte_addr(pte
, virt_to_phys(tmp_page
));
621 * high level table always sets r/w, last level page
622 * table control read/write
624 dma_set_pte_readable(pte
);
625 dma_set_pte_writable(pte
);
626 domain_flush_cache(domain
, pte
, sizeof(*pte
));
628 parent
= phys_to_virt(dma_pte_addr(pte
));
632 spin_unlock_irqrestore(&domain
->mapping_lock
, flags
);
636 /* return address's pte at specific level */
637 static struct dma_pte
*dma_addr_level_pte(struct dmar_domain
*domain
, u64 addr
,
640 struct dma_pte
*parent
, *pte
= NULL
;
641 int total
= agaw_to_level(domain
->agaw
);
644 parent
= domain
->pgd
;
645 while (level
<= total
) {
646 offset
= address_level_offset(addr
, total
);
647 pte
= &parent
[offset
];
651 if (!dma_pte_present(pte
))
653 parent
= phys_to_virt(dma_pte_addr(pte
));
659 /* clear one page's page table */
660 static void dma_pte_clear_one(struct dmar_domain
*domain
, u64 addr
)
662 struct dma_pte
*pte
= NULL
;
664 /* get last level pte */
665 pte
= dma_addr_level_pte(domain
, addr
, 1);
669 domain_flush_cache(domain
, pte
, sizeof(*pte
));
673 /* clear last level pte, a tlb flush should be followed */
674 static void dma_pte_clear_range(struct dmar_domain
*domain
, u64 start
, u64 end
)
676 int addr_width
= agaw_to_width(domain
->agaw
);
678 start
&= (((u64
)1) << addr_width
) - 1;
679 end
&= (((u64
)1) << addr_width
) - 1;
680 /* in case it's partial page */
681 start
= PAGE_ALIGN(start
);
684 /* we don't need lock here, nobody else touches the iova range */
685 while (start
< end
) {
686 dma_pte_clear_one(domain
, start
);
687 start
+= VTD_PAGE_SIZE
;
691 /* free page table pages. last level pte should already be cleared */
692 static void dma_pte_free_pagetable(struct dmar_domain
*domain
,
695 int addr_width
= agaw_to_width(domain
->agaw
);
697 int total
= agaw_to_level(domain
->agaw
);
701 start
&= (((u64
)1) << addr_width
) - 1;
702 end
&= (((u64
)1) << addr_width
) - 1;
704 /* we don't need lock here, nobody else touches the iova range */
706 while (level
<= total
) {
707 tmp
= align_to_level(start
, level
);
708 if (tmp
>= end
|| (tmp
+ level_size(level
) > end
))
712 pte
= dma_addr_level_pte(domain
, tmp
, level
);
715 phys_to_virt(dma_pte_addr(pte
)));
717 domain_flush_cache(domain
, pte
, sizeof(*pte
));
719 tmp
+= level_size(level
);
724 if (start
== 0 && end
>= ((((u64
)1) << addr_width
) - 1)) {
725 free_pgtable_page(domain
->pgd
);
731 static int iommu_alloc_root_entry(struct intel_iommu
*iommu
)
733 struct root_entry
*root
;
736 root
= (struct root_entry
*)alloc_pgtable_page();
740 __iommu_flush_cache(iommu
, root
, ROOT_SIZE
);
742 spin_lock_irqsave(&iommu
->lock
, flags
);
743 iommu
->root_entry
= root
;
744 spin_unlock_irqrestore(&iommu
->lock
, flags
);
749 static void iommu_set_root_entry(struct intel_iommu
*iommu
)
755 addr
= iommu
->root_entry
;
757 spin_lock_irqsave(&iommu
->register_lock
, flag
);
758 dmar_writeq(iommu
->reg
+ DMAR_RTADDR_REG
, virt_to_phys(addr
));
760 cmd
= iommu
->gcmd
| DMA_GCMD_SRTP
;
761 writel(cmd
, iommu
->reg
+ DMAR_GCMD_REG
);
763 /* Make sure hardware complete it */
764 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
765 readl
, (sts
& DMA_GSTS_RTPS
), sts
);
767 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
770 static void iommu_flush_write_buffer(struct intel_iommu
*iommu
)
775 if (!cap_rwbf(iommu
->cap
))
777 val
= iommu
->gcmd
| DMA_GCMD_WBF
;
779 spin_lock_irqsave(&iommu
->register_lock
, flag
);
780 writel(val
, iommu
->reg
+ DMAR_GCMD_REG
);
782 /* Make sure hardware complete it */
783 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
784 readl
, (!(val
& DMA_GSTS_WBFS
)), val
);
786 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
789 /* return value determine if we need a write buffer flush */
790 static int __iommu_flush_context(struct intel_iommu
*iommu
,
791 u16 did
, u16 source_id
, u8 function_mask
, u64 type
,
792 int non_present_entry_flush
)
798 * In the non-present entry flush case, if hardware doesn't cache
799 * non-present entry we do nothing and if hardware cache non-present
800 * entry, we flush entries of domain 0 (the domain id is used to cache
801 * any non-present entries)
803 if (non_present_entry_flush
) {
804 if (!cap_caching_mode(iommu
->cap
))
811 case DMA_CCMD_GLOBAL_INVL
:
812 val
= DMA_CCMD_GLOBAL_INVL
;
814 case DMA_CCMD_DOMAIN_INVL
:
815 val
= DMA_CCMD_DOMAIN_INVL
|DMA_CCMD_DID(did
);
817 case DMA_CCMD_DEVICE_INVL
:
818 val
= DMA_CCMD_DEVICE_INVL
|DMA_CCMD_DID(did
)
819 | DMA_CCMD_SID(source_id
) | DMA_CCMD_FM(function_mask
);
826 spin_lock_irqsave(&iommu
->register_lock
, flag
);
827 dmar_writeq(iommu
->reg
+ DMAR_CCMD_REG
, val
);
829 /* Make sure hardware complete it */
830 IOMMU_WAIT_OP(iommu
, DMAR_CCMD_REG
,
831 dmar_readq
, (!(val
& DMA_CCMD_ICC
)), val
);
833 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
835 /* flush context entry will implicitly flush write buffer */
839 /* return value determine if we need a write buffer flush */
840 static int __iommu_flush_iotlb(struct intel_iommu
*iommu
, u16 did
,
841 u64 addr
, unsigned int size_order
, u64 type
,
842 int non_present_entry_flush
)
844 int tlb_offset
= ecap_iotlb_offset(iommu
->ecap
);
845 u64 val
= 0, val_iva
= 0;
849 * In the non-present entry flush case, if hardware doesn't cache
850 * non-present entry we do nothing and if hardware cache non-present
851 * entry, we flush entries of domain 0 (the domain id is used to cache
852 * any non-present entries)
854 if (non_present_entry_flush
) {
855 if (!cap_caching_mode(iommu
->cap
))
862 case DMA_TLB_GLOBAL_FLUSH
:
863 /* global flush doesn't need set IVA_REG */
864 val
= DMA_TLB_GLOBAL_FLUSH
|DMA_TLB_IVT
;
866 case DMA_TLB_DSI_FLUSH
:
867 val
= DMA_TLB_DSI_FLUSH
|DMA_TLB_IVT
|DMA_TLB_DID(did
);
869 case DMA_TLB_PSI_FLUSH
:
870 val
= DMA_TLB_PSI_FLUSH
|DMA_TLB_IVT
|DMA_TLB_DID(did
);
871 /* Note: always flush non-leaf currently */
872 val_iva
= size_order
| addr
;
877 /* Note: set drain read/write */
880 * This is probably to be super secure.. Looks like we can
881 * ignore it without any impact.
883 if (cap_read_drain(iommu
->cap
))
884 val
|= DMA_TLB_READ_DRAIN
;
886 if (cap_write_drain(iommu
->cap
))
887 val
|= DMA_TLB_WRITE_DRAIN
;
889 spin_lock_irqsave(&iommu
->register_lock
, flag
);
890 /* Note: Only uses first TLB reg currently */
892 dmar_writeq(iommu
->reg
+ tlb_offset
, val_iva
);
893 dmar_writeq(iommu
->reg
+ tlb_offset
+ 8, val
);
895 /* Make sure hardware complete it */
896 IOMMU_WAIT_OP(iommu
, tlb_offset
+ 8,
897 dmar_readq
, (!(val
& DMA_TLB_IVT
)), val
);
899 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
901 /* check IOTLB invalidation granularity */
902 if (DMA_TLB_IAIG(val
) == 0)
903 printk(KERN_ERR
"IOMMU: flush IOTLB failed\n");
904 if (DMA_TLB_IAIG(val
) != DMA_TLB_IIRG(type
))
905 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
906 (unsigned long long)DMA_TLB_IIRG(type
),
907 (unsigned long long)DMA_TLB_IAIG(val
));
908 /* flush iotlb entry will implicitly flush write buffer */
912 static int iommu_flush_iotlb_psi(struct intel_iommu
*iommu
, u16 did
,
913 u64 addr
, unsigned int pages
, int non_present_entry_flush
)
917 BUG_ON(addr
& (~VTD_PAGE_MASK
));
920 /* Fallback to domain selective flush if no PSI support */
921 if (!cap_pgsel_inv(iommu
->cap
))
922 return iommu
->flush
.flush_iotlb(iommu
, did
, 0, 0,
924 non_present_entry_flush
);
927 * PSI requires page size to be 2 ^ x, and the base address is naturally
928 * aligned to the size
930 mask
= ilog2(__roundup_pow_of_two(pages
));
931 /* Fallback to domain selective flush if size is too big */
932 if (mask
> cap_max_amask_val(iommu
->cap
))
933 return iommu
->flush
.flush_iotlb(iommu
, did
, 0, 0,
934 DMA_TLB_DSI_FLUSH
, non_present_entry_flush
);
936 return iommu
->flush
.flush_iotlb(iommu
, did
, addr
, mask
,
938 non_present_entry_flush
);
941 static void iommu_disable_protect_mem_regions(struct intel_iommu
*iommu
)
946 spin_lock_irqsave(&iommu
->register_lock
, flags
);
947 pmen
= readl(iommu
->reg
+ DMAR_PMEN_REG
);
948 pmen
&= ~DMA_PMEN_EPM
;
949 writel(pmen
, iommu
->reg
+ DMAR_PMEN_REG
);
951 /* wait for the protected region status bit to clear */
952 IOMMU_WAIT_OP(iommu
, DMAR_PMEN_REG
,
953 readl
, !(pmen
& DMA_PMEN_PRS
), pmen
);
955 spin_unlock_irqrestore(&iommu
->register_lock
, flags
);
958 static int iommu_enable_translation(struct intel_iommu
*iommu
)
963 spin_lock_irqsave(&iommu
->register_lock
, flags
);
964 writel(iommu
->gcmd
|DMA_GCMD_TE
, iommu
->reg
+ DMAR_GCMD_REG
);
966 /* Make sure hardware complete it */
967 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
968 readl
, (sts
& DMA_GSTS_TES
), sts
);
970 iommu
->gcmd
|= DMA_GCMD_TE
;
971 spin_unlock_irqrestore(&iommu
->register_lock
, flags
);
975 static int iommu_disable_translation(struct intel_iommu
*iommu
)
980 spin_lock_irqsave(&iommu
->register_lock
, flag
);
981 iommu
->gcmd
&= ~DMA_GCMD_TE
;
982 writel(iommu
->gcmd
, iommu
->reg
+ DMAR_GCMD_REG
);
984 /* Make sure hardware complete it */
985 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
986 readl
, (!(sts
& DMA_GSTS_TES
)), sts
);
988 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
992 /* iommu interrupt handling. Most stuff are MSI-like. */
994 static const char *fault_reason_strings
[] =
997 "Present bit in root entry is clear",
998 "Present bit in context entry is clear",
999 "Invalid context entry",
1000 "Access beyond MGAW",
1001 "PTE Write access is not set",
1002 "PTE Read access is not set",
1003 "Next page table ptr is invalid",
1004 "Root table address invalid",
1005 "Context table ptr is invalid",
1006 "non-zero reserved fields in RTP",
1007 "non-zero reserved fields in CTP",
1008 "non-zero reserved fields in PTE",
1010 #define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
1012 const char *dmar_get_fault_reason(u8 fault_reason
)
1014 if (fault_reason
> MAX_FAULT_REASON_IDX
)
1017 return fault_reason_strings
[fault_reason
];
1020 void dmar_msi_unmask(unsigned int irq
)
1022 struct intel_iommu
*iommu
= get_irq_data(irq
);
1026 spin_lock_irqsave(&iommu
->register_lock
, flag
);
1027 writel(0, iommu
->reg
+ DMAR_FECTL_REG
);
1028 /* Read a reg to force flush the post write */
1029 readl(iommu
->reg
+ DMAR_FECTL_REG
);
1030 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1033 void dmar_msi_mask(unsigned int irq
)
1036 struct intel_iommu
*iommu
= get_irq_data(irq
);
1039 spin_lock_irqsave(&iommu
->register_lock
, flag
);
1040 writel(DMA_FECTL_IM
, iommu
->reg
+ DMAR_FECTL_REG
);
1041 /* Read a reg to force flush the post write */
1042 readl(iommu
->reg
+ DMAR_FECTL_REG
);
1043 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1046 void dmar_msi_write(int irq
, struct msi_msg
*msg
)
1048 struct intel_iommu
*iommu
= get_irq_data(irq
);
1051 spin_lock_irqsave(&iommu
->register_lock
, flag
);
1052 writel(msg
->data
, iommu
->reg
+ DMAR_FEDATA_REG
);
1053 writel(msg
->address_lo
, iommu
->reg
+ DMAR_FEADDR_REG
);
1054 writel(msg
->address_hi
, iommu
->reg
+ DMAR_FEUADDR_REG
);
1055 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1058 void dmar_msi_read(int irq
, struct msi_msg
*msg
)
1060 struct intel_iommu
*iommu
= get_irq_data(irq
);
1063 spin_lock_irqsave(&iommu
->register_lock
, flag
);
1064 msg
->data
= readl(iommu
->reg
+ DMAR_FEDATA_REG
);
1065 msg
->address_lo
= readl(iommu
->reg
+ DMAR_FEADDR_REG
);
1066 msg
->address_hi
= readl(iommu
->reg
+ DMAR_FEUADDR_REG
);
1067 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1070 static int iommu_page_fault_do_one(struct intel_iommu
*iommu
, int type
,
1071 u8 fault_reason
, u16 source_id
, unsigned long long addr
)
1075 reason
= dmar_get_fault_reason(fault_reason
);
1078 "DMAR:[%s] Request device [%02x:%02x.%d] "
1079 "fault addr %llx \n"
1080 "DMAR:[fault reason %02d] %s\n",
1081 (type
? "DMA Read" : "DMA Write"),
1082 (source_id
>> 8), PCI_SLOT(source_id
& 0xFF),
1083 PCI_FUNC(source_id
& 0xFF), addr
, fault_reason
, reason
);
1087 #define PRIMARY_FAULT_REG_LEN (16)
1088 static irqreturn_t
iommu_page_fault(int irq
, void *dev_id
)
1090 struct intel_iommu
*iommu
= dev_id
;
1091 int reg
, fault_index
;
1095 spin_lock_irqsave(&iommu
->register_lock
, flag
);
1096 fault_status
= readl(iommu
->reg
+ DMAR_FSTS_REG
);
1098 /* TBD: ignore advanced fault log currently */
1099 if (!(fault_status
& DMA_FSTS_PPF
))
1100 goto clear_overflow
;
1102 fault_index
= dma_fsts_fault_record_index(fault_status
);
1103 reg
= cap_fault_reg_offset(iommu
->cap
);
1111 /* highest 32 bits */
1112 data
= readl(iommu
->reg
+ reg
+
1113 fault_index
* PRIMARY_FAULT_REG_LEN
+ 12);
1114 if (!(data
& DMA_FRCD_F
))
1117 fault_reason
= dma_frcd_fault_reason(data
);
1118 type
= dma_frcd_type(data
);
1120 data
= readl(iommu
->reg
+ reg
+
1121 fault_index
* PRIMARY_FAULT_REG_LEN
+ 8);
1122 source_id
= dma_frcd_source_id(data
);
1124 guest_addr
= dmar_readq(iommu
->reg
+ reg
+
1125 fault_index
* PRIMARY_FAULT_REG_LEN
);
1126 guest_addr
= dma_frcd_page_addr(guest_addr
);
1127 /* clear the fault */
1128 writel(DMA_FRCD_F
, iommu
->reg
+ reg
+
1129 fault_index
* PRIMARY_FAULT_REG_LEN
+ 12);
1131 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1133 iommu_page_fault_do_one(iommu
, type
, fault_reason
,
1134 source_id
, guest_addr
);
1137 if (fault_index
> cap_num_fault_regs(iommu
->cap
))
1139 spin_lock_irqsave(&iommu
->register_lock
, flag
);
1142 /* clear primary fault overflow */
1143 fault_status
= readl(iommu
->reg
+ DMAR_FSTS_REG
);
1144 if (fault_status
& DMA_FSTS_PFO
)
1145 writel(DMA_FSTS_PFO
, iommu
->reg
+ DMAR_FSTS_REG
);
1147 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1151 int dmar_set_interrupt(struct intel_iommu
*iommu
)
1157 printk(KERN_ERR
"IOMMU: no free vectors\n");
1161 set_irq_data(irq
, iommu
);
1164 ret
= arch_setup_dmar_msi(irq
);
1166 set_irq_data(irq
, NULL
);
1172 /* Force fault register is cleared */
1173 iommu_page_fault(irq
, iommu
);
1175 ret
= request_irq(irq
, iommu_page_fault
, 0, iommu
->name
, iommu
);
1177 printk(KERN_ERR
"IOMMU: can't request irq\n");
1181 static int iommu_init_domains(struct intel_iommu
*iommu
)
1183 unsigned long ndomains
;
1184 unsigned long nlongs
;
1186 ndomains
= cap_ndoms(iommu
->cap
);
1187 pr_debug("Number of Domains supportd <%ld>\n", ndomains
);
1188 nlongs
= BITS_TO_LONGS(ndomains
);
1190 /* TBD: there might be 64K domains,
1191 * consider other allocation for future chip
1193 iommu
->domain_ids
= kcalloc(nlongs
, sizeof(unsigned long), GFP_KERNEL
);
1194 if (!iommu
->domain_ids
) {
1195 printk(KERN_ERR
"Allocating domain id array failed\n");
1198 iommu
->domains
= kcalloc(ndomains
, sizeof(struct dmar_domain
*),
1200 if (!iommu
->domains
) {
1201 printk(KERN_ERR
"Allocating domain array failed\n");
1202 kfree(iommu
->domain_ids
);
1206 spin_lock_init(&iommu
->lock
);
1209 * if Caching mode is set, then invalid translations are tagged
1210 * with domainid 0. Hence we need to pre-allocate it.
1212 if (cap_caching_mode(iommu
->cap
))
1213 set_bit(0, iommu
->domain_ids
);
1218 static void domain_exit(struct dmar_domain
*domain
);
1219 static void vm_domain_exit(struct dmar_domain
*domain
);
1221 void free_dmar_iommu(struct intel_iommu
*iommu
)
1223 struct dmar_domain
*domain
;
1225 unsigned long flags
;
1227 i
= find_first_bit(iommu
->domain_ids
, cap_ndoms(iommu
->cap
));
1228 for (; i
< cap_ndoms(iommu
->cap
); ) {
1229 domain
= iommu
->domains
[i
];
1230 clear_bit(i
, iommu
->domain_ids
);
1232 spin_lock_irqsave(&domain
->iommu_lock
, flags
);
1233 if (--domain
->iommu_count
== 0) {
1234 if (domain
->flags
& DOMAIN_FLAG_VIRTUAL_MACHINE
)
1235 vm_domain_exit(domain
);
1237 domain_exit(domain
);
1239 spin_unlock_irqrestore(&domain
->iommu_lock
, flags
);
1241 i
= find_next_bit(iommu
->domain_ids
,
1242 cap_ndoms(iommu
->cap
), i
+1);
1245 if (iommu
->gcmd
& DMA_GCMD_TE
)
1246 iommu_disable_translation(iommu
);
1249 set_irq_data(iommu
->irq
, NULL
);
1250 /* This will mask the irq */
1251 free_irq(iommu
->irq
, iommu
);
1252 destroy_irq(iommu
->irq
);
1255 kfree(iommu
->domains
);
1256 kfree(iommu
->domain_ids
);
1258 g_iommus
[iommu
->seq_id
] = NULL
;
1260 /* if all iommus are freed, free g_iommus */
1261 for (i
= 0; i
< g_num_of_iommus
; i
++) {
1266 if (i
== g_num_of_iommus
)
1269 /* free context mapping */
1270 free_context_table(iommu
);
1273 static struct dmar_domain
* iommu_alloc_domain(struct intel_iommu
*iommu
)
1276 unsigned long ndomains
;
1277 struct dmar_domain
*domain
;
1278 unsigned long flags
;
1280 domain
= alloc_domain_mem();
1284 ndomains
= cap_ndoms(iommu
->cap
);
1286 spin_lock_irqsave(&iommu
->lock
, flags
);
1287 num
= find_first_zero_bit(iommu
->domain_ids
, ndomains
);
1288 if (num
>= ndomains
) {
1289 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1290 free_domain_mem(domain
);
1291 printk(KERN_ERR
"IOMMU: no free domain ids\n");
1295 set_bit(num
, iommu
->domain_ids
);
1297 memset(&domain
->iommu_bmp
, 0, sizeof(unsigned long));
1298 set_bit(iommu
->seq_id
, &domain
->iommu_bmp
);
1300 iommu
->domains
[num
] = domain
;
1301 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1306 static void iommu_free_domain(struct dmar_domain
*domain
)
1308 unsigned long flags
;
1309 struct intel_iommu
*iommu
;
1311 iommu
= domain_get_iommu(domain
);
1313 spin_lock_irqsave(&iommu
->lock
, flags
);
1314 clear_bit(domain
->id
, iommu
->domain_ids
);
1315 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1318 static struct iova_domain reserved_iova_list
;
1319 static struct lock_class_key reserved_alloc_key
;
1320 static struct lock_class_key reserved_rbtree_key
;
1322 static void dmar_init_reserved_ranges(void)
1324 struct pci_dev
*pdev
= NULL
;
1329 init_iova_domain(&reserved_iova_list
, DMA_32BIT_PFN
);
1331 lockdep_set_class(&reserved_iova_list
.iova_alloc_lock
,
1332 &reserved_alloc_key
);
1333 lockdep_set_class(&reserved_iova_list
.iova_rbtree_lock
,
1334 &reserved_rbtree_key
);
1336 /* IOAPIC ranges shouldn't be accessed by DMA */
1337 iova
= reserve_iova(&reserved_iova_list
, IOVA_PFN(IOAPIC_RANGE_START
),
1338 IOVA_PFN(IOAPIC_RANGE_END
));
1340 printk(KERN_ERR
"Reserve IOAPIC range failed\n");
1342 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1343 for_each_pci_dev(pdev
) {
1346 for (i
= 0; i
< PCI_NUM_RESOURCES
; i
++) {
1347 r
= &pdev
->resource
[i
];
1348 if (!r
->flags
|| !(r
->flags
& IORESOURCE_MEM
))
1352 size
= r
->end
- addr
;
1353 size
= PAGE_ALIGN(size
);
1354 iova
= reserve_iova(&reserved_iova_list
, IOVA_PFN(addr
),
1355 IOVA_PFN(size
+ addr
) - 1);
1357 printk(KERN_ERR
"Reserve iova failed\n");
1363 static void domain_reserve_special_ranges(struct dmar_domain
*domain
)
1365 copy_reserved_iova(&reserved_iova_list
, &domain
->iovad
);
1368 static inline int guestwidth_to_adjustwidth(int gaw
)
1371 int r
= (gaw
- 12) % 9;
1382 static int domain_init(struct dmar_domain
*domain
, int guest_width
)
1384 struct intel_iommu
*iommu
;
1385 int adjust_width
, agaw
;
1386 unsigned long sagaw
;
1388 init_iova_domain(&domain
->iovad
, DMA_32BIT_PFN
);
1389 spin_lock_init(&domain
->mapping_lock
);
1390 spin_lock_init(&domain
->iommu_lock
);
1392 domain_reserve_special_ranges(domain
);
1394 /* calculate AGAW */
1395 iommu
= domain_get_iommu(domain
);
1396 if (guest_width
> cap_mgaw(iommu
->cap
))
1397 guest_width
= cap_mgaw(iommu
->cap
);
1398 domain
->gaw
= guest_width
;
1399 adjust_width
= guestwidth_to_adjustwidth(guest_width
);
1400 agaw
= width_to_agaw(adjust_width
);
1401 sagaw
= cap_sagaw(iommu
->cap
);
1402 if (!test_bit(agaw
, &sagaw
)) {
1403 /* hardware doesn't support it, choose a bigger one */
1404 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw
);
1405 agaw
= find_next_bit(&sagaw
, 5, agaw
);
1409 domain
->agaw
= agaw
;
1410 INIT_LIST_HEAD(&domain
->devices
);
1412 if (ecap_coherent(iommu
->ecap
))
1413 domain
->iommu_coherency
= 1;
1415 domain
->iommu_coherency
= 0;
1417 domain
->iommu_count
= 1;
1419 /* always allocate the top pgd */
1420 domain
->pgd
= (struct dma_pte
*)alloc_pgtable_page();
1423 __iommu_flush_cache(iommu
, domain
->pgd
, PAGE_SIZE
);
1427 static void domain_exit(struct dmar_domain
*domain
)
1431 /* Domain 0 is reserved, so dont process it */
1435 domain_remove_dev_info(domain
);
1437 put_iova_domain(&domain
->iovad
);
1438 end
= DOMAIN_MAX_ADDR(domain
->gaw
);
1439 end
= end
& (~PAGE_MASK
);
1442 dma_pte_clear_range(domain
, 0, end
);
1444 /* free page tables */
1445 dma_pte_free_pagetable(domain
, 0, end
);
1447 iommu_free_domain(domain
);
1448 free_domain_mem(domain
);
1451 static int domain_context_mapping_one(struct dmar_domain
*domain
,
1454 struct context_entry
*context
;
1455 unsigned long flags
;
1456 struct intel_iommu
*iommu
;
1458 pr_debug("Set context mapping for %02x:%02x.%d\n",
1459 bus
, PCI_SLOT(devfn
), PCI_FUNC(devfn
));
1460 BUG_ON(!domain
->pgd
);
1462 iommu
= device_to_iommu(bus
, devfn
);
1466 context
= device_to_context_entry(iommu
, bus
, devfn
);
1469 spin_lock_irqsave(&iommu
->lock
, flags
);
1470 if (context_present(context
)) {
1471 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1475 context_set_domain_id(context
, domain
->id
);
1476 context_set_address_width(context
, domain
->agaw
);
1477 context_set_address_root(context
, virt_to_phys(domain
->pgd
));
1478 context_set_translation_type(context
, CONTEXT_TT_MULTI_LEVEL
);
1479 context_set_fault_enable(context
);
1480 context_set_present(context
);
1481 domain_flush_cache(domain
, context
, sizeof(*context
));
1483 /* it's a non-present to present mapping */
1484 if (iommu
->flush
.flush_context(iommu
, domain
->id
,
1485 (((u16
)bus
) << 8) | devfn
, DMA_CCMD_MASK_NOBIT
,
1486 DMA_CCMD_DEVICE_INVL
, 1))
1487 iommu_flush_write_buffer(iommu
);
1489 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0, DMA_TLB_DSI_FLUSH
, 0);
1491 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1493 spin_lock_irqsave(&domain
->iommu_lock
, flags
);
1494 if (!test_and_set_bit(iommu
->seq_id
, &domain
->iommu_bmp
)) {
1495 domain
->iommu_count
++;
1496 domain_update_iommu_coherency(domain
);
1498 spin_unlock_irqrestore(&domain
->iommu_lock
, flags
);
1503 domain_context_mapping(struct dmar_domain
*domain
, struct pci_dev
*pdev
)
1506 struct pci_dev
*tmp
, *parent
;
1508 ret
= domain_context_mapping_one(domain
, pdev
->bus
->number
,
1513 /* dependent device mapping */
1514 tmp
= pci_find_upstream_pcie_bridge(pdev
);
1517 /* Secondary interface's bus number and devfn 0 */
1518 parent
= pdev
->bus
->self
;
1519 while (parent
!= tmp
) {
1520 ret
= domain_context_mapping_one(domain
, parent
->bus
->number
,
1524 parent
= parent
->bus
->self
;
1526 if (tmp
->is_pcie
) /* this is a PCIE-to-PCI bridge */
1527 return domain_context_mapping_one(domain
,
1528 tmp
->subordinate
->number
, 0);
1529 else /* this is a legacy PCI bridge */
1530 return domain_context_mapping_one(domain
,
1531 tmp
->bus
->number
, tmp
->devfn
);
1534 static int domain_context_mapped(struct pci_dev
*pdev
)
1537 struct pci_dev
*tmp
, *parent
;
1538 struct intel_iommu
*iommu
;
1540 iommu
= device_to_iommu(pdev
->bus
->number
, pdev
->devfn
);
1544 ret
= device_context_mapped(iommu
,
1545 pdev
->bus
->number
, pdev
->devfn
);
1548 /* dependent device mapping */
1549 tmp
= pci_find_upstream_pcie_bridge(pdev
);
1552 /* Secondary interface's bus number and devfn 0 */
1553 parent
= pdev
->bus
->self
;
1554 while (parent
!= tmp
) {
1555 ret
= device_context_mapped(iommu
, parent
->bus
->number
,
1559 parent
= parent
->bus
->self
;
1562 return device_context_mapped(iommu
,
1563 tmp
->subordinate
->number
, 0);
1565 return device_context_mapped(iommu
,
1566 tmp
->bus
->number
, tmp
->devfn
);
1570 domain_page_mapping(struct dmar_domain
*domain
, dma_addr_t iova
,
1571 u64 hpa
, size_t size
, int prot
)
1573 u64 start_pfn
, end_pfn
;
1574 struct dma_pte
*pte
;
1576 int addr_width
= agaw_to_width(domain
->agaw
);
1578 hpa
&= (((u64
)1) << addr_width
) - 1;
1580 if ((prot
& (DMA_PTE_READ
|DMA_PTE_WRITE
)) == 0)
1583 start_pfn
= ((u64
)hpa
) >> VTD_PAGE_SHIFT
;
1584 end_pfn
= (VTD_PAGE_ALIGN(((u64
)hpa
) + size
)) >> VTD_PAGE_SHIFT
;
1586 while (start_pfn
< end_pfn
) {
1587 pte
= addr_to_dma_pte(domain
, iova
+ VTD_PAGE_SIZE
* index
);
1590 /* We don't need lock here, nobody else
1591 * touches the iova range
1593 BUG_ON(dma_pte_addr(pte
));
1594 dma_set_pte_addr(pte
, start_pfn
<< VTD_PAGE_SHIFT
);
1595 dma_set_pte_prot(pte
, prot
);
1596 domain_flush_cache(domain
, pte
, sizeof(*pte
));
1603 static void iommu_detach_dev(struct intel_iommu
*iommu
, u8 bus
, u8 devfn
)
1608 clear_context_table(iommu
, bus
, devfn
);
1609 iommu
->flush
.flush_context(iommu
, 0, 0, 0,
1610 DMA_CCMD_GLOBAL_INVL
, 0);
1611 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0,
1612 DMA_TLB_GLOBAL_FLUSH
, 0);
1615 static void domain_remove_dev_info(struct dmar_domain
*domain
)
1617 struct device_domain_info
*info
;
1618 unsigned long flags
;
1619 struct intel_iommu
*iommu
;
1621 spin_lock_irqsave(&device_domain_lock
, flags
);
1622 while (!list_empty(&domain
->devices
)) {
1623 info
= list_entry(domain
->devices
.next
,
1624 struct device_domain_info
, link
);
1625 list_del(&info
->link
);
1626 list_del(&info
->global
);
1628 info
->dev
->dev
.archdata
.iommu
= NULL
;
1629 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1631 iommu
= device_to_iommu(info
->bus
, info
->devfn
);
1632 iommu_detach_dev(iommu
, info
->bus
, info
->devfn
);
1633 free_devinfo_mem(info
);
1635 spin_lock_irqsave(&device_domain_lock
, flags
);
1637 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1642 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1644 static struct dmar_domain
*
1645 find_domain(struct pci_dev
*pdev
)
1647 struct device_domain_info
*info
;
1649 /* No lock here, assumes no domain exit in normal case */
1650 info
= pdev
->dev
.archdata
.iommu
;
1652 return info
->domain
;
1656 /* domain is initialized */
1657 static struct dmar_domain
*get_domain_for_dev(struct pci_dev
*pdev
, int gaw
)
1659 struct dmar_domain
*domain
, *found
= NULL
;
1660 struct intel_iommu
*iommu
;
1661 struct dmar_drhd_unit
*drhd
;
1662 struct device_domain_info
*info
, *tmp
;
1663 struct pci_dev
*dev_tmp
;
1664 unsigned long flags
;
1665 int bus
= 0, devfn
= 0;
1667 domain
= find_domain(pdev
);
1671 dev_tmp
= pci_find_upstream_pcie_bridge(pdev
);
1673 if (dev_tmp
->is_pcie
) {
1674 bus
= dev_tmp
->subordinate
->number
;
1677 bus
= dev_tmp
->bus
->number
;
1678 devfn
= dev_tmp
->devfn
;
1680 spin_lock_irqsave(&device_domain_lock
, flags
);
1681 list_for_each_entry(info
, &device_domain_list
, global
) {
1682 if (info
->bus
== bus
&& info
->devfn
== devfn
) {
1683 found
= info
->domain
;
1687 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1688 /* pcie-pci bridge already has a domain, uses it */
1695 /* Allocate new domain for the device */
1696 drhd
= dmar_find_matched_drhd_unit(pdev
);
1698 printk(KERN_ERR
"IOMMU: can't find DMAR for device %s\n",
1702 iommu
= drhd
->iommu
;
1704 domain
= iommu_alloc_domain(iommu
);
1708 if (domain_init(domain
, gaw
)) {
1709 domain_exit(domain
);
1713 /* register pcie-to-pci device */
1715 info
= alloc_devinfo_mem();
1717 domain_exit(domain
);
1721 info
->devfn
= devfn
;
1723 info
->domain
= domain
;
1724 /* This domain is shared by devices under p2p bridge */
1725 domain
->flags
|= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES
;
1727 /* pcie-to-pci bridge already has a domain, uses it */
1729 spin_lock_irqsave(&device_domain_lock
, flags
);
1730 list_for_each_entry(tmp
, &device_domain_list
, global
) {
1731 if (tmp
->bus
== bus
&& tmp
->devfn
== devfn
) {
1732 found
= tmp
->domain
;
1737 free_devinfo_mem(info
);
1738 domain_exit(domain
);
1741 list_add(&info
->link
, &domain
->devices
);
1742 list_add(&info
->global
, &device_domain_list
);
1744 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1748 info
= alloc_devinfo_mem();
1751 info
->bus
= pdev
->bus
->number
;
1752 info
->devfn
= pdev
->devfn
;
1754 info
->domain
= domain
;
1755 spin_lock_irqsave(&device_domain_lock
, flags
);
1756 /* somebody is fast */
1757 found
= find_domain(pdev
);
1758 if (found
!= NULL
) {
1759 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1760 if (found
!= domain
) {
1761 domain_exit(domain
);
1764 free_devinfo_mem(info
);
1767 list_add(&info
->link
, &domain
->devices
);
1768 list_add(&info
->global
, &device_domain_list
);
1769 pdev
->dev
.archdata
.iommu
= info
;
1770 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1773 /* recheck it here, maybe others set it */
1774 return find_domain(pdev
);
1777 static int iommu_prepare_identity_map(struct pci_dev
*pdev
,
1778 unsigned long long start
,
1779 unsigned long long end
)
1781 struct dmar_domain
*domain
;
1783 unsigned long long base
;
1787 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1788 pci_name(pdev
), start
, end
);
1789 /* page table init */
1790 domain
= get_domain_for_dev(pdev
, DEFAULT_DOMAIN_ADDRESS_WIDTH
);
1794 /* The address might not be aligned */
1795 base
= start
& PAGE_MASK
;
1797 size
= PAGE_ALIGN(size
);
1798 if (!reserve_iova(&domain
->iovad
, IOVA_PFN(base
),
1799 IOVA_PFN(base
+ size
) - 1)) {
1800 printk(KERN_ERR
"IOMMU: reserve iova failed\n");
1805 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1806 size
, base
, pci_name(pdev
));
1808 * RMRR range might have overlap with physical memory range,
1811 dma_pte_clear_range(domain
, base
, base
+ size
);
1813 ret
= domain_page_mapping(domain
, base
, base
, size
,
1814 DMA_PTE_READ
|DMA_PTE_WRITE
);
1818 /* context entry init */
1819 ret
= domain_context_mapping(domain
, pdev
);
1823 domain_exit(domain
);
1828 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit
*rmrr
,
1829 struct pci_dev
*pdev
)
1831 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
1833 return iommu_prepare_identity_map(pdev
, rmrr
->base_address
,
1834 rmrr
->end_address
+ 1);
1837 #ifdef CONFIG_DMAR_GFX_WA
1838 struct iommu_prepare_data
{
1839 struct pci_dev
*pdev
;
1843 static int __init
iommu_prepare_work_fn(unsigned long start_pfn
,
1844 unsigned long end_pfn
, void *datax
)
1846 struct iommu_prepare_data
*data
;
1848 data
= (struct iommu_prepare_data
*)datax
;
1850 data
->ret
= iommu_prepare_identity_map(data
->pdev
,
1851 start_pfn
<<PAGE_SHIFT
, end_pfn
<<PAGE_SHIFT
);
1856 static int __init
iommu_prepare_with_active_regions(struct pci_dev
*pdev
)
1859 struct iommu_prepare_data data
;
1864 for_each_online_node(nid
) {
1865 work_with_active_regions(nid
, iommu_prepare_work_fn
, &data
);
1872 static void __init
iommu_prepare_gfx_mapping(void)
1874 struct pci_dev
*pdev
= NULL
;
1877 for_each_pci_dev(pdev
) {
1878 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
||
1879 !IS_GFX_DEVICE(pdev
))
1881 printk(KERN_INFO
"IOMMU: gfx device %s 1-1 mapping\n",
1883 ret
= iommu_prepare_with_active_regions(pdev
);
1885 printk(KERN_ERR
"IOMMU: mapping reserved region failed\n");
1888 #else /* !CONFIG_DMAR_GFX_WA */
1889 static inline void iommu_prepare_gfx_mapping(void)
1895 #ifdef CONFIG_DMAR_FLOPPY_WA
1896 static inline void iommu_prepare_isa(void)
1898 struct pci_dev
*pdev
;
1901 pdev
= pci_get_class(PCI_CLASS_BRIDGE_ISA
<< 8, NULL
);
1905 printk(KERN_INFO
"IOMMU: Prepare 0-16M unity mapping for LPC\n");
1906 ret
= iommu_prepare_identity_map(pdev
, 0, 16*1024*1024);
1909 printk("IOMMU: Failed to create 0-64M identity map, "
1910 "floppy might not work\n");
1914 static inline void iommu_prepare_isa(void)
1918 #endif /* !CONFIG_DMAR_FLPY_WA */
1920 static int __init
init_dmars(void)
1922 struct dmar_drhd_unit
*drhd
;
1923 struct dmar_rmrr_unit
*rmrr
;
1924 struct pci_dev
*pdev
;
1925 struct intel_iommu
*iommu
;
1926 int i
, ret
, unit
= 0;
1931 * initialize and program root entry to not present
1934 for_each_drhd_unit(drhd
) {
1937 * lock not needed as this is only incremented in the single
1938 * threaded kernel __init code path all other access are read
1943 g_iommus
= kcalloc(g_num_of_iommus
, sizeof(struct intel_iommu
*),
1946 printk(KERN_ERR
"Allocating global iommu array failed\n");
1951 deferred_flush
= kzalloc(g_num_of_iommus
*
1952 sizeof(struct deferred_flush_tables
), GFP_KERNEL
);
1953 if (!deferred_flush
) {
1959 for_each_drhd_unit(drhd
) {
1963 iommu
= drhd
->iommu
;
1964 g_iommus
[iommu
->seq_id
] = iommu
;
1966 ret
= iommu_init_domains(iommu
);
1972 * we could share the same root & context tables
1973 * amoung all IOMMU's. Need to Split it later.
1975 ret
= iommu_alloc_root_entry(iommu
);
1977 printk(KERN_ERR
"IOMMU: allocate root entry failed\n");
1982 for_each_drhd_unit(drhd
) {
1986 iommu
= drhd
->iommu
;
1987 if (dmar_enable_qi(iommu
)) {
1989 * Queued Invalidate not enabled, use Register Based
1992 iommu
->flush
.flush_context
= __iommu_flush_context
;
1993 iommu
->flush
.flush_iotlb
= __iommu_flush_iotlb
;
1994 printk(KERN_INFO
"IOMMU 0x%Lx: using Register based "
1996 (unsigned long long)drhd
->reg_base_addr
);
1998 iommu
->flush
.flush_context
= qi_flush_context
;
1999 iommu
->flush
.flush_iotlb
= qi_flush_iotlb
;
2000 printk(KERN_INFO
"IOMMU 0x%Lx: using Queued "
2002 (unsigned long long)drhd
->reg_base_addr
);
2008 * for each dev attached to rmrr
2010 * locate drhd for dev, alloc domain for dev
2011 * allocate free domain
2012 * allocate page table entries for rmrr
2013 * if context not allocated for bus
2014 * allocate and init context
2015 * set present in root table for this bus
2016 * init context with domain, translation etc
2020 for_each_rmrr_units(rmrr
) {
2021 for (i
= 0; i
< rmrr
->devices_cnt
; i
++) {
2022 pdev
= rmrr
->devices
[i
];
2023 /* some BIOS lists non-exist devices in DMAR table */
2026 ret
= iommu_prepare_rmrr_dev(rmrr
, pdev
);
2029 "IOMMU: mapping reserved region failed\n");
2033 iommu_prepare_gfx_mapping();
2035 iommu_prepare_isa();
2040 * global invalidate context cache
2041 * global invalidate iotlb
2042 * enable translation
2044 for_each_drhd_unit(drhd
) {
2047 iommu
= drhd
->iommu
;
2048 sprintf (iommu
->name
, "dmar%d", unit
++);
2050 iommu_flush_write_buffer(iommu
);
2052 ret
= dmar_set_interrupt(iommu
);
2056 iommu_set_root_entry(iommu
);
2058 iommu
->flush
.flush_context(iommu
, 0, 0, 0, DMA_CCMD_GLOBAL_INVL
,
2060 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH
,
2062 iommu_disable_protect_mem_regions(iommu
);
2064 ret
= iommu_enable_translation(iommu
);
2071 for_each_drhd_unit(drhd
) {
2074 iommu
= drhd
->iommu
;
2081 static inline u64
aligned_size(u64 host_addr
, size_t size
)
2084 addr
= (host_addr
& (~PAGE_MASK
)) + size
;
2085 return PAGE_ALIGN(addr
);
2089 iommu_alloc_iova(struct dmar_domain
*domain
, size_t size
, u64 end
)
2093 /* Make sure it's in range */
2094 end
= min_t(u64
, DOMAIN_MAX_ADDR(domain
->gaw
), end
);
2095 if (!size
|| (IOVA_START_ADDR
+ size
> end
))
2098 piova
= alloc_iova(&domain
->iovad
,
2099 size
>> PAGE_SHIFT
, IOVA_PFN(end
), 1);
2103 static struct iova
*
2104 __intel_alloc_iova(struct device
*dev
, struct dmar_domain
*domain
,
2105 size_t size
, u64 dma_mask
)
2107 struct pci_dev
*pdev
= to_pci_dev(dev
);
2108 struct iova
*iova
= NULL
;
2110 if (dma_mask
<= DMA_32BIT_MASK
|| dmar_forcedac
)
2111 iova
= iommu_alloc_iova(domain
, size
, dma_mask
);
2114 * First try to allocate an io virtual address in
2115 * DMA_32BIT_MASK and if that fails then try allocating
2118 iova
= iommu_alloc_iova(domain
, size
, DMA_32BIT_MASK
);
2120 iova
= iommu_alloc_iova(domain
, size
, dma_mask
);
2124 printk(KERN_ERR
"Allocating iova for %s failed", pci_name(pdev
));
2131 static struct dmar_domain
*
2132 get_valid_domain_for_dev(struct pci_dev
*pdev
)
2134 struct dmar_domain
*domain
;
2137 domain
= get_domain_for_dev(pdev
,
2138 DEFAULT_DOMAIN_ADDRESS_WIDTH
);
2141 "Allocating domain for %s failed", pci_name(pdev
));
2145 /* make sure context mapping is ok */
2146 if (unlikely(!domain_context_mapped(pdev
))) {
2147 ret
= domain_context_mapping(domain
, pdev
);
2150 "Domain context map for %s failed",
2159 static dma_addr_t
__intel_map_single(struct device
*hwdev
, phys_addr_t paddr
,
2160 size_t size
, int dir
, u64 dma_mask
)
2162 struct pci_dev
*pdev
= to_pci_dev(hwdev
);
2163 struct dmar_domain
*domain
;
2164 phys_addr_t start_paddr
;
2168 struct intel_iommu
*iommu
;
2170 BUG_ON(dir
== DMA_NONE
);
2171 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
2174 domain
= get_valid_domain_for_dev(pdev
);
2178 iommu
= domain_get_iommu(domain
);
2179 size
= aligned_size((u64
)paddr
, size
);
2181 iova
= __intel_alloc_iova(hwdev
, domain
, size
, pdev
->dma_mask
);
2185 start_paddr
= (phys_addr_t
)iova
->pfn_lo
<< PAGE_SHIFT
;
2188 * Check if DMAR supports zero-length reads on write only
2191 if (dir
== DMA_TO_DEVICE
|| dir
== DMA_BIDIRECTIONAL
|| \
2192 !cap_zlr(iommu
->cap
))
2193 prot
|= DMA_PTE_READ
;
2194 if (dir
== DMA_FROM_DEVICE
|| dir
== DMA_BIDIRECTIONAL
)
2195 prot
|= DMA_PTE_WRITE
;
2197 * paddr - (paddr + size) might be partial page, we should map the whole
2198 * page. Note: if two part of one page are separately mapped, we
2199 * might have two guest_addr mapping to the same host paddr, but this
2200 * is not a big problem
2202 ret
= domain_page_mapping(domain
, start_paddr
,
2203 ((u64
)paddr
) & PAGE_MASK
, size
, prot
);
2207 /* it's a non-present to present mapping */
2208 ret
= iommu_flush_iotlb_psi(iommu
, domain
->id
,
2209 start_paddr
, size
>> VTD_PAGE_SHIFT
, 1);
2211 iommu_flush_write_buffer(iommu
);
2213 return start_paddr
+ ((u64
)paddr
& (~PAGE_MASK
));
2217 __free_iova(&domain
->iovad
, iova
);
2218 printk(KERN_ERR
"Device %s request: %lx@%llx dir %d --- failed\n",
2219 pci_name(pdev
), size
, (unsigned long long)paddr
, dir
);
2223 dma_addr_t
intel_map_single(struct device
*hwdev
, phys_addr_t paddr
,
2224 size_t size
, int dir
)
2226 return __intel_map_single(hwdev
, paddr
, size
, dir
,
2227 to_pci_dev(hwdev
)->dma_mask
);
2230 static void flush_unmaps(void)
2236 /* just flush them all */
2237 for (i
= 0; i
< g_num_of_iommus
; i
++) {
2238 struct intel_iommu
*iommu
= g_iommus
[i
];
2242 if (deferred_flush
[i
].next
) {
2243 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0,
2244 DMA_TLB_GLOBAL_FLUSH
, 0);
2245 for (j
= 0; j
< deferred_flush
[i
].next
; j
++) {
2246 __free_iova(&deferred_flush
[i
].domain
[j
]->iovad
,
2247 deferred_flush
[i
].iova
[j
]);
2249 deferred_flush
[i
].next
= 0;
2256 static void flush_unmaps_timeout(unsigned long data
)
2258 unsigned long flags
;
2260 spin_lock_irqsave(&async_umap_flush_lock
, flags
);
2262 spin_unlock_irqrestore(&async_umap_flush_lock
, flags
);
2265 static void add_unmap(struct dmar_domain
*dom
, struct iova
*iova
)
2267 unsigned long flags
;
2269 struct intel_iommu
*iommu
;
2271 spin_lock_irqsave(&async_umap_flush_lock
, flags
);
2272 if (list_size
== HIGH_WATER_MARK
)
2275 iommu
= domain_get_iommu(dom
);
2276 iommu_id
= iommu
->seq_id
;
2278 next
= deferred_flush
[iommu_id
].next
;
2279 deferred_flush
[iommu_id
].domain
[next
] = dom
;
2280 deferred_flush
[iommu_id
].iova
[next
] = iova
;
2281 deferred_flush
[iommu_id
].next
++;
2284 mod_timer(&unmap_timer
, jiffies
+ msecs_to_jiffies(10));
2288 spin_unlock_irqrestore(&async_umap_flush_lock
, flags
);
2291 void intel_unmap_single(struct device
*dev
, dma_addr_t dev_addr
, size_t size
,
2294 struct pci_dev
*pdev
= to_pci_dev(dev
);
2295 struct dmar_domain
*domain
;
2296 unsigned long start_addr
;
2298 struct intel_iommu
*iommu
;
2300 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
2302 domain
= find_domain(pdev
);
2305 iommu
= domain_get_iommu(domain
);
2307 iova
= find_iova(&domain
->iovad
, IOVA_PFN(dev_addr
));
2311 start_addr
= iova
->pfn_lo
<< PAGE_SHIFT
;
2312 size
= aligned_size((u64
)dev_addr
, size
);
2314 pr_debug("Device %s unmapping: %lx@%llx\n",
2315 pci_name(pdev
), size
, (unsigned long long)start_addr
);
2317 /* clear the whole page */
2318 dma_pte_clear_range(domain
, start_addr
, start_addr
+ size
);
2319 /* free page tables */
2320 dma_pte_free_pagetable(domain
, start_addr
, start_addr
+ size
);
2321 if (intel_iommu_strict
) {
2322 if (iommu_flush_iotlb_psi(iommu
,
2323 domain
->id
, start_addr
, size
>> VTD_PAGE_SHIFT
, 0))
2324 iommu_flush_write_buffer(iommu
);
2326 __free_iova(&domain
->iovad
, iova
);
2328 add_unmap(domain
, iova
);
2330 * queue up the release of the unmap to save the 1/6th of the
2331 * cpu used up by the iotlb flush operation...
2336 void *intel_alloc_coherent(struct device
*hwdev
, size_t size
,
2337 dma_addr_t
*dma_handle
, gfp_t flags
)
2342 size
= PAGE_ALIGN(size
);
2343 order
= get_order(size
);
2344 flags
&= ~(GFP_DMA
| GFP_DMA32
);
2346 vaddr
= (void *)__get_free_pages(flags
, order
);
2349 memset(vaddr
, 0, size
);
2351 *dma_handle
= __intel_map_single(hwdev
, virt_to_bus(vaddr
), size
,
2353 hwdev
->coherent_dma_mask
);
2356 free_pages((unsigned long)vaddr
, order
);
2360 void intel_free_coherent(struct device
*hwdev
, size_t size
, void *vaddr
,
2361 dma_addr_t dma_handle
)
2365 size
= PAGE_ALIGN(size
);
2366 order
= get_order(size
);
2368 intel_unmap_single(hwdev
, dma_handle
, size
, DMA_BIDIRECTIONAL
);
2369 free_pages((unsigned long)vaddr
, order
);
2372 #define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
2374 void intel_unmap_sg(struct device
*hwdev
, struct scatterlist
*sglist
,
2375 int nelems
, int dir
)
2378 struct pci_dev
*pdev
= to_pci_dev(hwdev
);
2379 struct dmar_domain
*domain
;
2380 unsigned long start_addr
;
2384 struct scatterlist
*sg
;
2385 struct intel_iommu
*iommu
;
2387 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
2390 domain
= find_domain(pdev
);
2393 iommu
= domain_get_iommu(domain
);
2395 iova
= find_iova(&domain
->iovad
, IOVA_PFN(sglist
[0].dma_address
));
2398 for_each_sg(sglist
, sg
, nelems
, i
) {
2399 addr
= SG_ENT_VIRT_ADDRESS(sg
);
2400 size
+= aligned_size((u64
)addr
, sg
->length
);
2403 start_addr
= iova
->pfn_lo
<< PAGE_SHIFT
;
2405 /* clear the whole page */
2406 dma_pte_clear_range(domain
, start_addr
, start_addr
+ size
);
2407 /* free page tables */
2408 dma_pte_free_pagetable(domain
, start_addr
, start_addr
+ size
);
2410 if (iommu_flush_iotlb_psi(iommu
, domain
->id
, start_addr
,
2411 size
>> VTD_PAGE_SHIFT
, 0))
2412 iommu_flush_write_buffer(iommu
);
2415 __free_iova(&domain
->iovad
, iova
);
2418 static int intel_nontranslate_map_sg(struct device
*hddev
,
2419 struct scatterlist
*sglist
, int nelems
, int dir
)
2422 struct scatterlist
*sg
;
2424 for_each_sg(sglist
, sg
, nelems
, i
) {
2425 BUG_ON(!sg_page(sg
));
2426 sg
->dma_address
= virt_to_bus(SG_ENT_VIRT_ADDRESS(sg
));
2427 sg
->dma_length
= sg
->length
;
2432 int intel_map_sg(struct device
*hwdev
, struct scatterlist
*sglist
, int nelems
,
2437 struct pci_dev
*pdev
= to_pci_dev(hwdev
);
2438 struct dmar_domain
*domain
;
2442 struct iova
*iova
= NULL
;
2444 struct scatterlist
*sg
;
2445 unsigned long start_addr
;
2446 struct intel_iommu
*iommu
;
2448 BUG_ON(dir
== DMA_NONE
);
2449 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
2450 return intel_nontranslate_map_sg(hwdev
, sglist
, nelems
, dir
);
2452 domain
= get_valid_domain_for_dev(pdev
);
2456 iommu
= domain_get_iommu(domain
);
2458 for_each_sg(sglist
, sg
, nelems
, i
) {
2459 addr
= SG_ENT_VIRT_ADDRESS(sg
);
2460 addr
= (void *)virt_to_phys(addr
);
2461 size
+= aligned_size((u64
)addr
, sg
->length
);
2464 iova
= __intel_alloc_iova(hwdev
, domain
, size
, pdev
->dma_mask
);
2466 sglist
->dma_length
= 0;
2471 * Check if DMAR supports zero-length reads on write only
2474 if (dir
== DMA_TO_DEVICE
|| dir
== DMA_BIDIRECTIONAL
|| \
2475 !cap_zlr(iommu
->cap
))
2476 prot
|= DMA_PTE_READ
;
2477 if (dir
== DMA_FROM_DEVICE
|| dir
== DMA_BIDIRECTIONAL
)
2478 prot
|= DMA_PTE_WRITE
;
2480 start_addr
= iova
->pfn_lo
<< PAGE_SHIFT
;
2482 for_each_sg(sglist
, sg
, nelems
, i
) {
2483 addr
= SG_ENT_VIRT_ADDRESS(sg
);
2484 addr
= (void *)virt_to_phys(addr
);
2485 size
= aligned_size((u64
)addr
, sg
->length
);
2486 ret
= domain_page_mapping(domain
, start_addr
+ offset
,
2487 ((u64
)addr
) & PAGE_MASK
,
2490 /* clear the page */
2491 dma_pte_clear_range(domain
, start_addr
,
2492 start_addr
+ offset
);
2493 /* free page tables */
2494 dma_pte_free_pagetable(domain
, start_addr
,
2495 start_addr
+ offset
);
2497 __free_iova(&domain
->iovad
, iova
);
2500 sg
->dma_address
= start_addr
+ offset
+
2501 ((u64
)addr
& (~PAGE_MASK
));
2502 sg
->dma_length
= sg
->length
;
2506 /* it's a non-present to present mapping */
2507 if (iommu_flush_iotlb_psi(iommu
, domain
->id
,
2508 start_addr
, offset
>> VTD_PAGE_SHIFT
, 1))
2509 iommu_flush_write_buffer(iommu
);
2513 static struct dma_mapping_ops intel_dma_ops
= {
2514 .alloc_coherent
= intel_alloc_coherent
,
2515 .free_coherent
= intel_free_coherent
,
2516 .map_single
= intel_map_single
,
2517 .unmap_single
= intel_unmap_single
,
2518 .map_sg
= intel_map_sg
,
2519 .unmap_sg
= intel_unmap_sg
,
2522 static inline int iommu_domain_cache_init(void)
2526 iommu_domain_cache
= kmem_cache_create("iommu_domain",
2527 sizeof(struct dmar_domain
),
2532 if (!iommu_domain_cache
) {
2533 printk(KERN_ERR
"Couldn't create iommu_domain cache\n");
2540 static inline int iommu_devinfo_cache_init(void)
2544 iommu_devinfo_cache
= kmem_cache_create("iommu_devinfo",
2545 sizeof(struct device_domain_info
),
2549 if (!iommu_devinfo_cache
) {
2550 printk(KERN_ERR
"Couldn't create devinfo cache\n");
2557 static inline int iommu_iova_cache_init(void)
2561 iommu_iova_cache
= kmem_cache_create("iommu_iova",
2562 sizeof(struct iova
),
2566 if (!iommu_iova_cache
) {
2567 printk(KERN_ERR
"Couldn't create iova cache\n");
2574 static int __init
iommu_init_mempool(void)
2577 ret
= iommu_iova_cache_init();
2581 ret
= iommu_domain_cache_init();
2585 ret
= iommu_devinfo_cache_init();
2589 kmem_cache_destroy(iommu_domain_cache
);
2591 kmem_cache_destroy(iommu_iova_cache
);
2596 static void __init
iommu_exit_mempool(void)
2598 kmem_cache_destroy(iommu_devinfo_cache
);
2599 kmem_cache_destroy(iommu_domain_cache
);
2600 kmem_cache_destroy(iommu_iova_cache
);
2604 static void __init
init_no_remapping_devices(void)
2606 struct dmar_drhd_unit
*drhd
;
2608 for_each_drhd_unit(drhd
) {
2609 if (!drhd
->include_all
) {
2611 for (i
= 0; i
< drhd
->devices_cnt
; i
++)
2612 if (drhd
->devices
[i
] != NULL
)
2614 /* ignore DMAR unit if no pci devices exist */
2615 if (i
== drhd
->devices_cnt
)
2623 for_each_drhd_unit(drhd
) {
2625 if (drhd
->ignored
|| drhd
->include_all
)
2628 for (i
= 0; i
< drhd
->devices_cnt
; i
++)
2629 if (drhd
->devices
[i
] &&
2630 !IS_GFX_DEVICE(drhd
->devices
[i
]))
2633 if (i
< drhd
->devices_cnt
)
2636 /* bypass IOMMU if it is just for gfx devices */
2638 for (i
= 0; i
< drhd
->devices_cnt
; i
++) {
2639 if (!drhd
->devices
[i
])
2641 drhd
->devices
[i
]->dev
.archdata
.iommu
= DUMMY_DEVICE_DOMAIN_INFO
;
2646 int __init
intel_iommu_init(void)
2650 if (dmar_table_init())
2653 if (dmar_dev_scope_init())
2657 * Check the need for DMA-remapping initialization now.
2658 * Above initialization will also be used by Interrupt-remapping.
2660 if (no_iommu
|| swiotlb
|| dmar_disabled
)
2663 iommu_init_mempool();
2664 dmar_init_reserved_ranges();
2666 init_no_remapping_devices();
2670 printk(KERN_ERR
"IOMMU: dmar init failed\n");
2671 put_iova_domain(&reserved_iova_list
);
2672 iommu_exit_mempool();
2676 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2678 init_timer(&unmap_timer
);
2680 dma_ops
= &intel_dma_ops
;
2684 static int vm_domain_add_dev_info(struct dmar_domain
*domain
,
2685 struct pci_dev
*pdev
)
2687 struct device_domain_info
*info
;
2688 unsigned long flags
;
2690 info
= alloc_devinfo_mem();
2694 info
->bus
= pdev
->bus
->number
;
2695 info
->devfn
= pdev
->devfn
;
2697 info
->domain
= domain
;
2699 spin_lock_irqsave(&device_domain_lock
, flags
);
2700 list_add(&info
->link
, &domain
->devices
);
2701 list_add(&info
->global
, &device_domain_list
);
2702 pdev
->dev
.archdata
.iommu
= info
;
2703 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2708 static void vm_domain_remove_one_dev_info(struct dmar_domain
*domain
,
2709 struct pci_dev
*pdev
)
2711 struct device_domain_info
*info
;
2712 struct intel_iommu
*iommu
;
2713 unsigned long flags
;
2715 struct list_head
*entry
, *tmp
;
2717 iommu
= device_to_iommu(pdev
->bus
->number
, pdev
->devfn
);
2721 spin_lock_irqsave(&device_domain_lock
, flags
);
2722 list_for_each_safe(entry
, tmp
, &domain
->devices
) {
2723 info
= list_entry(entry
, struct device_domain_info
, link
);
2724 if (info
->bus
== pdev
->bus
->number
&&
2725 info
->devfn
== pdev
->devfn
) {
2726 list_del(&info
->link
);
2727 list_del(&info
->global
);
2729 info
->dev
->dev
.archdata
.iommu
= NULL
;
2730 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2732 iommu_detach_dev(iommu
, info
->bus
, info
->devfn
);
2733 free_devinfo_mem(info
);
2735 spin_lock_irqsave(&device_domain_lock
, flags
);
2743 /* if there is no other devices under the same iommu
2744 * owned by this domain, clear this iommu in iommu_bmp
2745 * update iommu count and coherency
2747 if (device_to_iommu(info
->bus
, info
->devfn
) == iommu
)
2752 unsigned long tmp_flags
;
2753 spin_lock_irqsave(&domain
->iommu_lock
, tmp_flags
);
2754 clear_bit(iommu
->seq_id
, &domain
->iommu_bmp
);
2755 domain
->iommu_count
--;
2756 domain_update_iommu_coherency(domain
);
2757 spin_unlock_irqrestore(&domain
->iommu_lock
, tmp_flags
);
2760 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2763 static void vm_domain_remove_all_dev_info(struct dmar_domain
*domain
)
2765 struct device_domain_info
*info
;
2766 struct intel_iommu
*iommu
;
2767 unsigned long flags1
, flags2
;
2769 spin_lock_irqsave(&device_domain_lock
, flags1
);
2770 while (!list_empty(&domain
->devices
)) {
2771 info
= list_entry(domain
->devices
.next
,
2772 struct device_domain_info
, link
);
2773 list_del(&info
->link
);
2774 list_del(&info
->global
);
2776 info
->dev
->dev
.archdata
.iommu
= NULL
;
2778 spin_unlock_irqrestore(&device_domain_lock
, flags1
);
2780 iommu
= device_to_iommu(info
->bus
, info
->devfn
);
2781 iommu_detach_dev(iommu
, info
->bus
, info
->devfn
);
2783 /* clear this iommu in iommu_bmp, update iommu count
2786 spin_lock_irqsave(&domain
->iommu_lock
, flags2
);
2787 if (test_and_clear_bit(iommu
->seq_id
,
2788 &domain
->iommu_bmp
)) {
2789 domain
->iommu_count
--;
2790 domain_update_iommu_coherency(domain
);
2792 spin_unlock_irqrestore(&domain
->iommu_lock
, flags2
);
2794 free_devinfo_mem(info
);
2795 spin_lock_irqsave(&device_domain_lock
, flags1
);
2797 spin_unlock_irqrestore(&device_domain_lock
, flags1
);
2800 /* domain id for virtual machine, it won't be set in context */
2801 static unsigned long vm_domid
;
2803 static struct dmar_domain
*iommu_alloc_vm_domain(void)
2805 struct dmar_domain
*domain
;
2807 domain
= alloc_domain_mem();
2811 domain
->id
= vm_domid
++;
2812 memset(&domain
->iommu_bmp
, 0, sizeof(unsigned long));
2813 domain
->flags
= DOMAIN_FLAG_VIRTUAL_MACHINE
;
2818 static int vm_domain_init(struct dmar_domain
*domain
, int guest_width
)
2822 init_iova_domain(&domain
->iovad
, DMA_32BIT_PFN
);
2823 spin_lock_init(&domain
->mapping_lock
);
2824 spin_lock_init(&domain
->iommu_lock
);
2826 domain_reserve_special_ranges(domain
);
2828 /* calculate AGAW */
2829 domain
->gaw
= guest_width
;
2830 adjust_width
= guestwidth_to_adjustwidth(guest_width
);
2831 domain
->agaw
= width_to_agaw(adjust_width
);
2833 INIT_LIST_HEAD(&domain
->devices
);
2835 domain
->iommu_count
= 0;
2836 domain
->iommu_coherency
= 0;
2838 /* always allocate the top pgd */
2839 domain
->pgd
= (struct dma_pte
*)alloc_pgtable_page();
2842 domain_flush_cache(domain
, domain
->pgd
, PAGE_SIZE
);
2846 static void iommu_free_vm_domain(struct dmar_domain
*domain
)
2848 unsigned long flags
;
2849 struct dmar_drhd_unit
*drhd
;
2850 struct intel_iommu
*iommu
;
2852 unsigned long ndomains
;
2854 for_each_drhd_unit(drhd
) {
2857 iommu
= drhd
->iommu
;
2859 ndomains
= cap_ndoms(iommu
->cap
);
2860 i
= find_first_bit(iommu
->domain_ids
, ndomains
);
2861 for (; i
< ndomains
; ) {
2862 if (iommu
->domains
[i
] == domain
) {
2863 spin_lock_irqsave(&iommu
->lock
, flags
);
2864 clear_bit(i
, iommu
->domain_ids
);
2865 iommu
->domains
[i
] = NULL
;
2866 spin_unlock_irqrestore(&iommu
->lock
, flags
);
2869 i
= find_next_bit(iommu
->domain_ids
, ndomains
, i
+1);
2874 static void vm_domain_exit(struct dmar_domain
*domain
)
2878 /* Domain 0 is reserved, so dont process it */
2882 vm_domain_remove_all_dev_info(domain
);
2884 put_iova_domain(&domain
->iovad
);
2885 end
= DOMAIN_MAX_ADDR(domain
->gaw
);
2886 end
= end
& (~VTD_PAGE_MASK
);
2889 dma_pte_clear_range(domain
, 0, end
);
2891 /* free page tables */
2892 dma_pte_free_pagetable(domain
, 0, end
);
2894 iommu_free_vm_domain(domain
);
2895 free_domain_mem(domain
);
2898 void intel_iommu_domain_exit(struct dmar_domain
*domain
)
2902 /* Domain 0 is reserved, so dont process it */
2906 end
= DOMAIN_MAX_ADDR(domain
->gaw
);
2907 end
= end
& (~VTD_PAGE_MASK
);
2910 dma_pte_clear_range(domain
, 0, end
);
2912 /* free page tables */
2913 dma_pte_free_pagetable(domain
, 0, end
);
2915 iommu_free_domain(domain
);
2916 free_domain_mem(domain
);
2918 EXPORT_SYMBOL_GPL(intel_iommu_domain_exit
);
2920 struct dmar_domain
*intel_iommu_domain_alloc(struct pci_dev
*pdev
)
2922 struct dmar_drhd_unit
*drhd
;
2923 struct dmar_domain
*domain
;
2924 struct intel_iommu
*iommu
;
2926 drhd
= dmar_find_matched_drhd_unit(pdev
);
2928 printk(KERN_ERR
"intel_iommu_domain_alloc: drhd == NULL\n");
2932 iommu
= drhd
->iommu
;
2935 "intel_iommu_domain_alloc: iommu == NULL\n");
2938 domain
= iommu_alloc_domain(iommu
);
2941 "intel_iommu_domain_alloc: domain == NULL\n");
2944 if (domain_init(domain
, DEFAULT_DOMAIN_ADDRESS_WIDTH
)) {
2946 "intel_iommu_domain_alloc: domain_init() failed\n");
2947 intel_iommu_domain_exit(domain
);
2952 EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc
);
2954 int intel_iommu_context_mapping(
2955 struct dmar_domain
*domain
, struct pci_dev
*pdev
)
2958 rc
= domain_context_mapping(domain
, pdev
);
2961 EXPORT_SYMBOL_GPL(intel_iommu_context_mapping
);
2963 int intel_iommu_page_mapping(
2964 struct dmar_domain
*domain
, dma_addr_t iova
,
2965 u64 hpa
, size_t size
, int prot
)
2968 rc
= domain_page_mapping(domain
, iova
, hpa
, size
, prot
);
2971 EXPORT_SYMBOL_GPL(intel_iommu_page_mapping
);
2973 void intel_iommu_detach_dev(struct dmar_domain
*domain
, u8 bus
, u8 devfn
)
2975 struct intel_iommu
*iommu
;
2977 iommu
= device_to_iommu(bus
, devfn
);
2978 iommu_detach_dev(iommu
, bus
, devfn
);
2980 EXPORT_SYMBOL_GPL(intel_iommu_detach_dev
);
2982 struct dmar_domain
*
2983 intel_iommu_find_domain(struct pci_dev
*pdev
)
2985 return find_domain(pdev
);
2987 EXPORT_SYMBOL_GPL(intel_iommu_find_domain
);
2989 int intel_iommu_found(void)
2991 return g_num_of_iommus
;
2993 EXPORT_SYMBOL_GPL(intel_iommu_found
);
2995 u64
intel_iommu_iova_to_pfn(struct dmar_domain
*domain
, u64 iova
)
2997 struct dma_pte
*pte
;
3001 pte
= addr_to_dma_pte(domain
, iova
);
3004 pfn
= dma_pte_addr(pte
);
3006 return pfn
>> VTD_PAGE_SHIFT
;
3008 EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn
);