Merge branch 'perf/urgent' of git://git.kernel.org/pub/scm/linux/kernel/git/acme...
[deliverable/linux.git] / drivers / iommu / intel-iommu.c
CommitLineData
ba395927
KA
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
98bcef56 17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
5b6985ce 21 * Author: Fenghua Yu <fenghua.yu@intel.com>
ba395927
KA
22 */
23
24#include <linux/init.h>
25#include <linux/bitmap.h>
5e0d2a6f 26#include <linux/debugfs.h>
54485c30 27#include <linux/export.h>
ba395927
KA
28#include <linux/slab.h>
29#include <linux/irq.h>
30#include <linux/interrupt.h>
ba395927
KA
31#include <linux/spinlock.h>
32#include <linux/pci.h>
33#include <linux/dmar.h>
34#include <linux/dma-mapping.h>
35#include <linux/mempool.h>
5e0d2a6f 36#include <linux/timer.h>
38717946 37#include <linux/iova.h>
5d450806 38#include <linux/iommu.h>
38717946 39#include <linux/intel-iommu.h>
134fac3f 40#include <linux/syscore_ops.h>
69575d38 41#include <linux/tboot.h>
adb2fe02 42#include <linux/dmi.h>
5cdede24 43#include <linux/pci-ats.h>
0ee332c1 44#include <linux/memblock.h>
ba395927 45#include <asm/cacheflush.h>
46a7fa27 46#include <asm/iommu.h>
ba395927 47
5b6985ce
FY
48#define ROOT_SIZE VTD_PAGE_SIZE
49#define CONTEXT_SIZE VTD_PAGE_SIZE
50
ba395927
KA
51#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
52#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 53#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
54
55#define IOAPIC_RANGE_START (0xfee00000)
56#define IOAPIC_RANGE_END (0xfeefffff)
57#define IOVA_START_ADDR (0x1000)
58
59#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
60
4ed0d3e6
FY
61#define MAX_AGAW_WIDTH 64
62
2ebe3151
DW
63#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
64#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
65
66/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
67 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
68#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
69 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
70#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 71
f27be03b 72#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
284901a9 73#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
6a35528a 74#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
5e0d2a6f 75
df08cdc7
AM
76/* page table handling */
77#define LEVEL_STRIDE (9)
78#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
79
6d1c56a9
OBC
80/*
81 * This bitmap is used to advertise the page sizes our hardware support
82 * to the IOMMU core, which will then use this information to split
83 * physically contiguous memory regions it is mapping into page sizes
84 * that we support.
85 *
86 * Traditionally the IOMMU core just handed us the mappings directly,
87 * after making sure the size is an order of a 4KiB page and that the
88 * mapping has natural alignment.
89 *
90 * To retain this behavior, we currently advertise that we support
91 * all page sizes that are an order of 4KiB.
92 *
93 * If at some point we'd like to utilize the IOMMU core's new behavior,
94 * we could change this to advertise the real page sizes we support.
95 */
96#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
97
df08cdc7
AM
98static inline int agaw_to_level(int agaw)
99{
100 return agaw + 2;
101}
102
103static inline int agaw_to_width(int agaw)
104{
105 return 30 + agaw * LEVEL_STRIDE;
106}
107
108static inline int width_to_agaw(int width)
109{
110 return (width - 30) / LEVEL_STRIDE;
111}
112
113static inline unsigned int level_to_offset_bits(int level)
114{
115 return (level - 1) * LEVEL_STRIDE;
116}
117
118static inline int pfn_level_offset(unsigned long pfn, int level)
119{
120 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
121}
122
123static inline unsigned long level_mask(int level)
124{
125 return -1UL << level_to_offset_bits(level);
126}
127
128static inline unsigned long level_size(int level)
129{
130 return 1UL << level_to_offset_bits(level);
131}
132
133static inline unsigned long align_to_level(unsigned long pfn, int level)
134{
135 return (pfn + level_size(level) - 1) & level_mask(level);
136}
fd18de50 137
6dd9a7c7
YS
138static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
139{
140 return 1 << ((lvl - 1) * LEVEL_STRIDE);
141}
142
dd4e8319
DW
143/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
144 are never going to work. */
145static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
146{
147 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
148}
149
150static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
151{
152 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
153}
154static inline unsigned long page_to_dma_pfn(struct page *pg)
155{
156 return mm_to_dma_pfn(page_to_pfn(pg));
157}
158static inline unsigned long virt_to_dma_pfn(void *p)
159{
160 return page_to_dma_pfn(virt_to_page(p));
161}
162
d9630fe9
WH
163/* global iommu list, set NULL for ignored DMAR units */
164static struct intel_iommu **g_iommus;
165
e0fc7e0b 166static void __init check_tylersburg_isoch(void);
9af88143
DW
167static int rwbf_quirk;
168
b779260b
JC
169/*
170 * set to 1 to panic kernel if can't successfully enable VT-d
171 * (used when kernel is launched w/ TXT)
172 */
173static int force_on = 0;
174
46b08e1a
MM
175/*
176 * 0: Present
177 * 1-11: Reserved
178 * 12-63: Context Ptr (12 - (haw-1))
179 * 64-127: Reserved
180 */
181struct root_entry {
182 u64 val;
183 u64 rsvd1;
184};
185#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
186static inline bool root_present(struct root_entry *root)
187{
188 return (root->val & 1);
189}
190static inline void set_root_present(struct root_entry *root)
191{
192 root->val |= 1;
193}
194static inline void set_root_value(struct root_entry *root, unsigned long value)
195{
196 root->val |= value & VTD_PAGE_MASK;
197}
198
199static inline struct context_entry *
200get_context_addr_from_root(struct root_entry *root)
201{
202 return (struct context_entry *)
203 (root_present(root)?phys_to_virt(
204 root->val & VTD_PAGE_MASK) :
205 NULL);
206}
207
7a8fc25e
MM
208/*
209 * low 64 bits:
210 * 0: present
211 * 1: fault processing disable
212 * 2-3: translation type
213 * 12-63: address space root
214 * high 64 bits:
215 * 0-2: address width
216 * 3-6: aval
217 * 8-23: domain id
218 */
219struct context_entry {
220 u64 lo;
221 u64 hi;
222};
c07e7d21
MM
223
224static inline bool context_present(struct context_entry *context)
225{
226 return (context->lo & 1);
227}
228static inline void context_set_present(struct context_entry *context)
229{
230 context->lo |= 1;
231}
232
233static inline void context_set_fault_enable(struct context_entry *context)
234{
235 context->lo &= (((u64)-1) << 2) | 1;
236}
237
c07e7d21
MM
238static inline void context_set_translation_type(struct context_entry *context,
239 unsigned long value)
240{
241 context->lo &= (((u64)-1) << 4) | 3;
242 context->lo |= (value & 3) << 2;
243}
244
245static inline void context_set_address_root(struct context_entry *context,
246 unsigned long value)
247{
248 context->lo |= value & VTD_PAGE_MASK;
249}
250
251static inline void context_set_address_width(struct context_entry *context,
252 unsigned long value)
253{
254 context->hi |= value & 7;
255}
256
257static inline void context_set_domain_id(struct context_entry *context,
258 unsigned long value)
259{
260 context->hi |= (value & ((1 << 16) - 1)) << 8;
261}
262
263static inline void context_clear_entry(struct context_entry *context)
264{
265 context->lo = 0;
266 context->hi = 0;
267}
7a8fc25e 268
622ba12a
MM
269/*
270 * 0: readable
271 * 1: writable
272 * 2-6: reserved
273 * 7: super page
9cf06697
SY
274 * 8-10: available
275 * 11: snoop behavior
622ba12a
MM
276 * 12-63: Host physcial address
277 */
278struct dma_pte {
279 u64 val;
280};
622ba12a 281
19c239ce
MM
282static inline void dma_clear_pte(struct dma_pte *pte)
283{
284 pte->val = 0;
285}
286
287static inline void dma_set_pte_readable(struct dma_pte *pte)
288{
289 pte->val |= DMA_PTE_READ;
290}
291
292static inline void dma_set_pte_writable(struct dma_pte *pte)
293{
294 pte->val |= DMA_PTE_WRITE;
295}
296
9cf06697
SY
297static inline void dma_set_pte_snp(struct dma_pte *pte)
298{
299 pte->val |= DMA_PTE_SNP;
300}
301
19c239ce
MM
302static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
303{
304 pte->val = (pte->val & ~3) | (prot & 3);
305}
306
307static inline u64 dma_pte_addr(struct dma_pte *pte)
308{
c85994e4
DW
309#ifdef CONFIG_64BIT
310 return pte->val & VTD_PAGE_MASK;
311#else
312 /* Must have a full atomic 64-bit read */
1a8bd481 313 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
c85994e4 314#endif
19c239ce
MM
315}
316
dd4e8319 317static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn)
19c239ce 318{
dd4e8319 319 pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT;
19c239ce
MM
320}
321
322static inline bool dma_pte_present(struct dma_pte *pte)
323{
324 return (pte->val & 3) != 0;
325}
622ba12a 326
4399c8bf
AK
327static inline bool dma_pte_superpage(struct dma_pte *pte)
328{
329 return (pte->val & (1 << 7));
330}
331
75e6bf96
DW
332static inline int first_pte_in_page(struct dma_pte *pte)
333{
334 return !((unsigned long)pte & ~VTD_PAGE_MASK);
335}
336
2c2e2c38
FY
337/*
338 * This domain is a statically identity mapping domain.
339 * 1. This domain creats a static 1:1 mapping to all usable memory.
340 * 2. It maps to each iommu if successful.
341 * 3. Each iommu mapps to this domain if successful.
342 */
19943b0e
DW
343static struct dmar_domain *si_domain;
344static int hw_pass_through = 1;
2c2e2c38 345
3b5410e7 346/* devices under the same p2p bridge are owned in one domain */
cdc7b837 347#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
3b5410e7 348
1ce28feb
WH
349/* domain represents a virtual machine, more than one devices
350 * across iommus may be owned in one domain, e.g. kvm guest.
351 */
352#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
353
2c2e2c38
FY
354/* si_domain contains mulitple devices */
355#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2)
356
1b198bb0
MT
357/* define the limit of IOMMUs supported in each domain */
358#ifdef CONFIG_X86
359# define IOMMU_UNITS_SUPPORTED MAX_IO_APICS
360#else
361# define IOMMU_UNITS_SUPPORTED 64
362#endif
363
99126f7c
MM
364struct dmar_domain {
365 int id; /* domain id */
4c923d47 366 int nid; /* node id */
1b198bb0
MT
367 DECLARE_BITMAP(iommu_bmp, IOMMU_UNITS_SUPPORTED);
368 /* bitmap of iommus this domain uses*/
99126f7c
MM
369
370 struct list_head devices; /* all devices' list */
371 struct iova_domain iovad; /* iova's that belong to this domain */
372
373 struct dma_pte *pgd; /* virtual address */
99126f7c
MM
374 int gaw; /* max guest address width */
375
376 /* adjusted guest address width, 0 is level 2 30-bit */
377 int agaw;
378
3b5410e7 379 int flags; /* flags to find out type of domain */
8e604097
WH
380
381 int iommu_coherency;/* indicate coherency of iommu access */
58c610bd 382 int iommu_snooping; /* indicate snooping control feature*/
c7151a8d 383 int iommu_count; /* reference count of iommu */
6dd9a7c7
YS
384 int iommu_superpage;/* Level of superpages supported:
385 0 == 4KiB (no superpages), 1 == 2MiB,
386 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
c7151a8d 387 spinlock_t iommu_lock; /* protect iommu set in domain */
fe40f1e0 388 u64 max_addr; /* maximum mapped address */
99126f7c
MM
389};
390
a647dacb
MM
391/* PCI domain-device relationship */
392struct device_domain_info {
393 struct list_head link; /* link to domain siblings */
394 struct list_head global; /* link to global list */
276dbf99
DW
395 int segment; /* PCI domain */
396 u8 bus; /* PCI bus number */
a647dacb 397 u8 devfn; /* PCI devfn number */
45e829ea 398 struct pci_dev *dev; /* it's NULL for PCIe-to-PCI bridge */
93a23a72 399 struct intel_iommu *iommu; /* IOMMU used by this device */
a647dacb
MM
400 struct dmar_domain *domain; /* pointer to domain */
401};
402
5e0d2a6f 403static void flush_unmaps_timeout(unsigned long data);
404
405DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
406
80b20dd8 407#define HIGH_WATER_MARK 250
408struct deferred_flush_tables {
409 int next;
410 struct iova *iova[HIGH_WATER_MARK];
411 struct dmar_domain *domain[HIGH_WATER_MARK];
412};
413
414static struct deferred_flush_tables *deferred_flush;
415
5e0d2a6f 416/* bitmap for indexing intel_iommus */
5e0d2a6f 417static int g_num_of_iommus;
418
419static DEFINE_SPINLOCK(async_umap_flush_lock);
420static LIST_HEAD(unmaps_to_do);
421
422static int timer_on;
423static long list_size;
5e0d2a6f 424
ba395927
KA
425static void domain_remove_dev_info(struct dmar_domain *domain);
426
d3f13810 427#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
0cd5c3c8
KM
428int dmar_disabled = 0;
429#else
430int dmar_disabled = 1;
d3f13810 431#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
0cd5c3c8 432
8bc1f85c
ED
433int intel_iommu_enabled = 0;
434EXPORT_SYMBOL_GPL(intel_iommu_enabled);
435
2d9e667e 436static int dmar_map_gfx = 1;
7d3b03ce 437static int dmar_forcedac;
5e0d2a6f 438static int intel_iommu_strict;
6dd9a7c7 439static int intel_iommu_superpage = 1;
ba395927 440
c0771df8
DW
441int intel_iommu_gfx_mapped;
442EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
443
ba395927
KA
444#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
445static DEFINE_SPINLOCK(device_domain_lock);
446static LIST_HEAD(device_domain_list);
447
a8bcbb0d
JR
448static struct iommu_ops intel_iommu_ops;
449
ba395927
KA
450static int __init intel_iommu_setup(char *str)
451{
452 if (!str)
453 return -EINVAL;
454 while (*str) {
0cd5c3c8
KM
455 if (!strncmp(str, "on", 2)) {
456 dmar_disabled = 0;
457 printk(KERN_INFO "Intel-IOMMU: enabled\n");
458 } else if (!strncmp(str, "off", 3)) {
ba395927 459 dmar_disabled = 1;
0cd5c3c8 460 printk(KERN_INFO "Intel-IOMMU: disabled\n");
ba395927
KA
461 } else if (!strncmp(str, "igfx_off", 8)) {
462 dmar_map_gfx = 0;
463 printk(KERN_INFO
464 "Intel-IOMMU: disable GFX device mapping\n");
7d3b03ce 465 } else if (!strncmp(str, "forcedac", 8)) {
5e0d2a6f 466 printk(KERN_INFO
7d3b03ce
KA
467 "Intel-IOMMU: Forcing DAC for PCI devices\n");
468 dmar_forcedac = 1;
5e0d2a6f 469 } else if (!strncmp(str, "strict", 6)) {
470 printk(KERN_INFO
471 "Intel-IOMMU: disable batched IOTLB flush\n");
472 intel_iommu_strict = 1;
6dd9a7c7
YS
473 } else if (!strncmp(str, "sp_off", 6)) {
474 printk(KERN_INFO
475 "Intel-IOMMU: disable supported super page\n");
476 intel_iommu_superpage = 0;
ba395927
KA
477 }
478
479 str += strcspn(str, ",");
480 while (*str == ',')
481 str++;
482 }
483 return 0;
484}
485__setup("intel_iommu=", intel_iommu_setup);
486
487static struct kmem_cache *iommu_domain_cache;
488static struct kmem_cache *iommu_devinfo_cache;
489static struct kmem_cache *iommu_iova_cache;
490
4c923d47 491static inline void *alloc_pgtable_page(int node)
eb3fa7cb 492{
4c923d47
SS
493 struct page *page;
494 void *vaddr = NULL;
eb3fa7cb 495
4c923d47
SS
496 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
497 if (page)
498 vaddr = page_address(page);
eb3fa7cb 499 return vaddr;
ba395927
KA
500}
501
502static inline void free_pgtable_page(void *vaddr)
503{
504 free_page((unsigned long)vaddr);
505}
506
507static inline void *alloc_domain_mem(void)
508{
354bb65e 509 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
510}
511
38717946 512static void free_domain_mem(void *vaddr)
ba395927
KA
513{
514 kmem_cache_free(iommu_domain_cache, vaddr);
515}
516
517static inline void * alloc_devinfo_mem(void)
518{
354bb65e 519 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
520}
521
522static inline void free_devinfo_mem(void *vaddr)
523{
524 kmem_cache_free(iommu_devinfo_cache, vaddr);
525}
526
527struct iova *alloc_iova_mem(void)
528{
354bb65e 529 return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
ba395927
KA
530}
531
532void free_iova_mem(struct iova *iova)
533{
534 kmem_cache_free(iommu_iova_cache, iova);
535}
536
1b573683 537
4ed0d3e6 538static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
539{
540 unsigned long sagaw;
541 int agaw = -1;
542
543 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 544 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
545 agaw >= 0; agaw--) {
546 if (test_bit(agaw, &sagaw))
547 break;
548 }
549
550 return agaw;
551}
552
4ed0d3e6
FY
553/*
554 * Calculate max SAGAW for each iommu.
555 */
556int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
557{
558 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
559}
560
561/*
562 * calculate agaw for each iommu.
563 * "SAGAW" may be different across iommus, use a default agaw, and
564 * get a supported less agaw for iommus that don't support the default agaw.
565 */
566int iommu_calculate_agaw(struct intel_iommu *iommu)
567{
568 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
569}
570
2c2e2c38 571/* This functionin only returns single iommu in a domain */
8c11e798
WH
572static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
573{
574 int iommu_id;
575
2c2e2c38 576 /* si_domain and vm domain should not get here. */
1ce28feb 577 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
2c2e2c38 578 BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
1ce28feb 579
1b198bb0 580 iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
8c11e798
WH
581 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
582 return NULL;
583
584 return g_iommus[iommu_id];
585}
586
8e604097
WH
587static void domain_update_iommu_coherency(struct dmar_domain *domain)
588{
589 int i;
590
591 domain->iommu_coherency = 1;
592
1b198bb0 593 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
8e604097
WH
594 if (!ecap_coherent(g_iommus[i]->ecap)) {
595 domain->iommu_coherency = 0;
596 break;
597 }
8e604097
WH
598 }
599}
600
58c610bd
SY
601static void domain_update_iommu_snooping(struct dmar_domain *domain)
602{
603 int i;
604
605 domain->iommu_snooping = 1;
606
1b198bb0 607 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
58c610bd
SY
608 if (!ecap_sc_support(g_iommus[i]->ecap)) {
609 domain->iommu_snooping = 0;
610 break;
611 }
58c610bd
SY
612 }
613}
614
6dd9a7c7
YS
615static void domain_update_iommu_superpage(struct dmar_domain *domain)
616{
8140a95d
AK
617 struct dmar_drhd_unit *drhd;
618 struct intel_iommu *iommu = NULL;
619 int mask = 0xf;
6dd9a7c7
YS
620
621 if (!intel_iommu_superpage) {
622 domain->iommu_superpage = 0;
623 return;
624 }
625
8140a95d
AK
626 /* set iommu_superpage to the smallest common denominator */
627 for_each_active_iommu(iommu, drhd) {
628 mask &= cap_super_page_val(iommu->cap);
6dd9a7c7
YS
629 if (!mask) {
630 break;
631 }
632 }
633 domain->iommu_superpage = fls(mask);
634}
635
58c610bd
SY
636/* Some capabilities may be different across iommus */
637static void domain_update_iommu_cap(struct dmar_domain *domain)
638{
639 domain_update_iommu_coherency(domain);
640 domain_update_iommu_snooping(domain);
6dd9a7c7 641 domain_update_iommu_superpage(domain);
58c610bd
SY
642}
643
276dbf99 644static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
c7151a8d
WH
645{
646 struct dmar_drhd_unit *drhd = NULL;
647 int i;
648
649 for_each_drhd_unit(drhd) {
650 if (drhd->ignored)
651 continue;
276dbf99
DW
652 if (segment != drhd->segment)
653 continue;
c7151a8d 654
924b6231 655 for (i = 0; i < drhd->devices_cnt; i++) {
288e4877
DH
656 if (drhd->devices[i] &&
657 drhd->devices[i]->bus->number == bus &&
c7151a8d
WH
658 drhd->devices[i]->devfn == devfn)
659 return drhd->iommu;
4958c5dc
DW
660 if (drhd->devices[i] &&
661 drhd->devices[i]->subordinate &&
924b6231
DW
662 drhd->devices[i]->subordinate->number <= bus &&
663 drhd->devices[i]->subordinate->subordinate >= bus)
664 return drhd->iommu;
665 }
c7151a8d
WH
666
667 if (drhd->include_all)
668 return drhd->iommu;
669 }
670
671 return NULL;
672}
673
5331fe6f
WH
674static void domain_flush_cache(struct dmar_domain *domain,
675 void *addr, int size)
676{
677 if (!domain->iommu_coherency)
678 clflush_cache_range(addr, size);
679}
680
ba395927
KA
681/* Gets context entry for a given bus and devfn */
682static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
683 u8 bus, u8 devfn)
684{
685 struct root_entry *root;
686 struct context_entry *context;
687 unsigned long phy_addr;
688 unsigned long flags;
689
690 spin_lock_irqsave(&iommu->lock, flags);
691 root = &iommu->root_entry[bus];
692 context = get_context_addr_from_root(root);
693 if (!context) {
4c923d47
SS
694 context = (struct context_entry *)
695 alloc_pgtable_page(iommu->node);
ba395927
KA
696 if (!context) {
697 spin_unlock_irqrestore(&iommu->lock, flags);
698 return NULL;
699 }
5b6985ce 700 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
ba395927
KA
701 phy_addr = virt_to_phys((void *)context);
702 set_root_value(root, phy_addr);
703 set_root_present(root);
704 __iommu_flush_cache(iommu, root, sizeof(*root));
705 }
706 spin_unlock_irqrestore(&iommu->lock, flags);
707 return &context[devfn];
708}
709
710static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
711{
712 struct root_entry *root;
713 struct context_entry *context;
714 int ret;
715 unsigned long flags;
716
717 spin_lock_irqsave(&iommu->lock, flags);
718 root = &iommu->root_entry[bus];
719 context = get_context_addr_from_root(root);
720 if (!context) {
721 ret = 0;
722 goto out;
723 }
c07e7d21 724 ret = context_present(&context[devfn]);
ba395927
KA
725out:
726 spin_unlock_irqrestore(&iommu->lock, flags);
727 return ret;
728}
729
730static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
731{
732 struct root_entry *root;
733 struct context_entry *context;
734 unsigned long flags;
735
736 spin_lock_irqsave(&iommu->lock, flags);
737 root = &iommu->root_entry[bus];
738 context = get_context_addr_from_root(root);
739 if (context) {
c07e7d21 740 context_clear_entry(&context[devfn]);
ba395927
KA
741 __iommu_flush_cache(iommu, &context[devfn], \
742 sizeof(*context));
743 }
744 spin_unlock_irqrestore(&iommu->lock, flags);
745}
746
747static void free_context_table(struct intel_iommu *iommu)
748{
749 struct root_entry *root;
750 int i;
751 unsigned long flags;
752 struct context_entry *context;
753
754 spin_lock_irqsave(&iommu->lock, flags);
755 if (!iommu->root_entry) {
756 goto out;
757 }
758 for (i = 0; i < ROOT_ENTRY_NR; i++) {
759 root = &iommu->root_entry[i];
760 context = get_context_addr_from_root(root);
761 if (context)
762 free_pgtable_page(context);
763 }
764 free_pgtable_page(iommu->root_entry);
765 iommu->root_entry = NULL;
766out:
767 spin_unlock_irqrestore(&iommu->lock, flags);
768}
769
b026fd28 770static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
4399c8bf 771 unsigned long pfn, int target_level)
ba395927 772{
b026fd28 773 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
ba395927
KA
774 struct dma_pte *parent, *pte = NULL;
775 int level = agaw_to_level(domain->agaw);
4399c8bf 776 int offset;
ba395927
KA
777
778 BUG_ON(!domain->pgd);
b026fd28 779 BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
ba395927
KA
780 parent = domain->pgd;
781
ba395927
KA
782 while (level > 0) {
783 void *tmp_page;
784
b026fd28 785 offset = pfn_level_offset(pfn, level);
ba395927 786 pte = &parent[offset];
4399c8bf 787 if (!target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
6dd9a7c7
YS
788 break;
789 if (level == target_level)
ba395927
KA
790 break;
791
19c239ce 792 if (!dma_pte_present(pte)) {
c85994e4
DW
793 uint64_t pteval;
794
4c923d47 795 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 796
206a73c1 797 if (!tmp_page)
ba395927 798 return NULL;
206a73c1 799
c85994e4 800 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 801 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
c85994e4
DW
802 if (cmpxchg64(&pte->val, 0ULL, pteval)) {
803 /* Someone else set it while we were thinking; use theirs. */
804 free_pgtable_page(tmp_page);
805 } else {
806 dma_pte_addr(pte);
807 domain_flush_cache(domain, pte, sizeof(*pte));
808 }
ba395927 809 }
19c239ce 810 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
811 level--;
812 }
813
ba395927
KA
814 return pte;
815}
816
6dd9a7c7 817
ba395927 818/* return address's pte at specific level */
90dcfb5e
DW
819static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
820 unsigned long pfn,
6dd9a7c7 821 int level, int *large_page)
ba395927
KA
822{
823 struct dma_pte *parent, *pte = NULL;
824 int total = agaw_to_level(domain->agaw);
825 int offset;
826
827 parent = domain->pgd;
828 while (level <= total) {
90dcfb5e 829 offset = pfn_level_offset(pfn, total);
ba395927
KA
830 pte = &parent[offset];
831 if (level == total)
832 return pte;
833
6dd9a7c7
YS
834 if (!dma_pte_present(pte)) {
835 *large_page = total;
ba395927 836 break;
6dd9a7c7
YS
837 }
838
839 if (pte->val & DMA_PTE_LARGE_PAGE) {
840 *large_page = total;
841 return pte;
842 }
843
19c239ce 844 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
845 total--;
846 }
847 return NULL;
848}
849
ba395927 850/* clear last level pte, a tlb flush should be followed */
292827cb 851static int dma_pte_clear_range(struct dmar_domain *domain,
595badf5
DW
852 unsigned long start_pfn,
853 unsigned long last_pfn)
ba395927 854{
04b18e65 855 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
6dd9a7c7 856 unsigned int large_page = 1;
310a5ab9 857 struct dma_pte *first_pte, *pte;
292827cb 858 int order;
66eae846 859
04b18e65 860 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
595badf5 861 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
59c36286 862 BUG_ON(start_pfn > last_pfn);
ba395927 863
04b18e65 864 /* we don't need lock here; nobody else touches the iova range */
59c36286 865 do {
6dd9a7c7
YS
866 large_page = 1;
867 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
310a5ab9 868 if (!pte) {
6dd9a7c7 869 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
310a5ab9
DW
870 continue;
871 }
6dd9a7c7 872 do {
310a5ab9 873 dma_clear_pte(pte);
6dd9a7c7 874 start_pfn += lvl_to_nr_pages(large_page);
310a5ab9 875 pte++;
75e6bf96
DW
876 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
877
310a5ab9
DW
878 domain_flush_cache(domain, first_pte,
879 (void *)pte - (void *)first_pte);
59c36286
DW
880
881 } while (start_pfn && start_pfn <= last_pfn);
292827cb
AK
882
883 order = (large_page - 1) * 9;
884 return order;
ba395927
KA
885}
886
887/* free page table pages. last level pte should already be cleared */
888static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b
DW
889 unsigned long start_pfn,
890 unsigned long last_pfn)
ba395927 891{
6660c63a 892 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
f3a0a52f 893 struct dma_pte *first_pte, *pte;
ba395927
KA
894 int total = agaw_to_level(domain->agaw);
895 int level;
6660c63a 896 unsigned long tmp;
6dd9a7c7 897 int large_page = 2;
ba395927 898
6660c63a
DW
899 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
900 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
59c36286 901 BUG_ON(start_pfn > last_pfn);
ba395927 902
f3a0a52f 903 /* We don't need lock here; nobody else touches the iova range */
ba395927
KA
904 level = 2;
905 while (level <= total) {
6660c63a
DW
906 tmp = align_to_level(start_pfn, level);
907
f3a0a52f 908 /* If we can't even clear one PTE at this level, we're done */
6660c63a 909 if (tmp + level_size(level) - 1 > last_pfn)
ba395927
KA
910 return;
911
59c36286 912 do {
6dd9a7c7
YS
913 large_page = level;
914 first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page);
915 if (large_page > level)
916 level = large_page + 1;
f3a0a52f
DW
917 if (!pte) {
918 tmp = align_to_level(tmp + 1, level + 1);
919 continue;
920 }
75e6bf96 921 do {
6a43e574
DW
922 if (dma_pte_present(pte)) {
923 free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
924 dma_clear_pte(pte);
925 }
f3a0a52f
DW
926 pte++;
927 tmp += level_size(level);
75e6bf96
DW
928 } while (!first_pte_in_page(pte) &&
929 tmp + level_size(level) - 1 <= last_pfn);
930
f3a0a52f
DW
931 domain_flush_cache(domain, first_pte,
932 (void *)pte - (void *)first_pte);
933
59c36286 934 } while (tmp && tmp + level_size(level) - 1 <= last_pfn);
ba395927
KA
935 level++;
936 }
937 /* free pgd */
d794dc9b 938 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
939 free_pgtable_page(domain->pgd);
940 domain->pgd = NULL;
941 }
942}
943
944/* iommu handling */
945static int iommu_alloc_root_entry(struct intel_iommu *iommu)
946{
947 struct root_entry *root;
948 unsigned long flags;
949
4c923d47 950 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ba395927
KA
951 if (!root)
952 return -ENOMEM;
953
5b6985ce 954 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
955
956 spin_lock_irqsave(&iommu->lock, flags);
957 iommu->root_entry = root;
958 spin_unlock_irqrestore(&iommu->lock, flags);
959
960 return 0;
961}
962
ba395927
KA
963static void iommu_set_root_entry(struct intel_iommu *iommu)
964{
965 void *addr;
c416daa9 966 u32 sts;
ba395927
KA
967 unsigned long flag;
968
969 addr = iommu->root_entry;
970
1f5b3c3f 971 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
972 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
973
c416daa9 974 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
975
976 /* Make sure hardware complete it */
977 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 978 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927 979
1f5b3c3f 980 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
981}
982
983static void iommu_flush_write_buffer(struct intel_iommu *iommu)
984{
985 u32 val;
986 unsigned long flag;
987
9af88143 988 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 989 return;
ba395927 990
1f5b3c3f 991 raw_spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 992 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
993
994 /* Make sure hardware complete it */
995 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 996 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927 997
1f5b3c3f 998 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
999}
1000
1001/* return value determine if we need a write buffer flush */
4c25a2c1
DW
1002static void __iommu_flush_context(struct intel_iommu *iommu,
1003 u16 did, u16 source_id, u8 function_mask,
1004 u64 type)
ba395927
KA
1005{
1006 u64 val = 0;
1007 unsigned long flag;
1008
ba395927
KA
1009 switch (type) {
1010 case DMA_CCMD_GLOBAL_INVL:
1011 val = DMA_CCMD_GLOBAL_INVL;
1012 break;
1013 case DMA_CCMD_DOMAIN_INVL:
1014 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1015 break;
1016 case DMA_CCMD_DEVICE_INVL:
1017 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1018 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1019 break;
1020 default:
1021 BUG();
1022 }
1023 val |= DMA_CCMD_ICC;
1024
1f5b3c3f 1025 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1026 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1027
1028 /* Make sure hardware complete it */
1029 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1030 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1031
1f5b3c3f 1032 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1033}
1034
ba395927 1035/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
1036static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1037 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
1038{
1039 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1040 u64 val = 0, val_iva = 0;
1041 unsigned long flag;
1042
ba395927
KA
1043 switch (type) {
1044 case DMA_TLB_GLOBAL_FLUSH:
1045 /* global flush doesn't need set IVA_REG */
1046 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1047 break;
1048 case DMA_TLB_DSI_FLUSH:
1049 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1050 break;
1051 case DMA_TLB_PSI_FLUSH:
1052 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1053 /* Note: always flush non-leaf currently */
1054 val_iva = size_order | addr;
1055 break;
1056 default:
1057 BUG();
1058 }
1059 /* Note: set drain read/write */
1060#if 0
1061 /*
1062 * This is probably to be super secure.. Looks like we can
1063 * ignore it without any impact.
1064 */
1065 if (cap_read_drain(iommu->cap))
1066 val |= DMA_TLB_READ_DRAIN;
1067#endif
1068 if (cap_write_drain(iommu->cap))
1069 val |= DMA_TLB_WRITE_DRAIN;
1070
1f5b3c3f 1071 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1072 /* Note: Only uses first TLB reg currently */
1073 if (val_iva)
1074 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1075 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1076
1077 /* Make sure hardware complete it */
1078 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1079 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1080
1f5b3c3f 1081 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1082
1083 /* check IOTLB invalidation granularity */
1084 if (DMA_TLB_IAIG(val) == 0)
1085 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1086 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1087 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
5b6985ce
FY
1088 (unsigned long long)DMA_TLB_IIRG(type),
1089 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1090}
1091
93a23a72
YZ
1092static struct device_domain_info *iommu_support_dev_iotlb(
1093 struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
1094{
1095 int found = 0;
1096 unsigned long flags;
1097 struct device_domain_info *info;
1098 struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
1099
1100 if (!ecap_dev_iotlb_support(iommu->ecap))
1101 return NULL;
1102
1103 if (!iommu->qi)
1104 return NULL;
1105
1106 spin_lock_irqsave(&device_domain_lock, flags);
1107 list_for_each_entry(info, &domain->devices, link)
1108 if (info->bus == bus && info->devfn == devfn) {
1109 found = 1;
1110 break;
1111 }
1112 spin_unlock_irqrestore(&device_domain_lock, flags);
1113
1114 if (!found || !info->dev)
1115 return NULL;
1116
1117 if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
1118 return NULL;
1119
1120 if (!dmar_find_matched_atsr_unit(info->dev))
1121 return NULL;
1122
1123 info->iommu = iommu;
1124
1125 return info;
1126}
1127
1128static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1129{
93a23a72
YZ
1130 if (!info)
1131 return;
1132
1133 pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
1134}
1135
1136static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1137{
1138 if (!info->dev || !pci_ats_enabled(info->dev))
1139 return;
1140
1141 pci_disable_ats(info->dev);
1142}
1143
1144static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1145 u64 addr, unsigned mask)
1146{
1147 u16 sid, qdep;
1148 unsigned long flags;
1149 struct device_domain_info *info;
1150
1151 spin_lock_irqsave(&device_domain_lock, flags);
1152 list_for_each_entry(info, &domain->devices, link) {
1153 if (!info->dev || !pci_ats_enabled(info->dev))
1154 continue;
1155
1156 sid = info->bus << 8 | info->devfn;
1157 qdep = pci_ats_queue_depth(info->dev);
1158 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1159 }
1160 spin_unlock_irqrestore(&device_domain_lock, flags);
1161}
1162
1f0ef2aa 1163static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
82653633 1164 unsigned long pfn, unsigned int pages, int map)
ba395927 1165{
9dd2fe89 1166 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1167 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
ba395927 1168
ba395927
KA
1169 BUG_ON(pages == 0);
1170
ba395927 1171 /*
9dd2fe89
YZ
1172 * Fallback to domain selective flush if no PSI support or the size is
1173 * too big.
ba395927
KA
1174 * PSI requires page size to be 2 ^ x, and the base address is naturally
1175 * aligned to the size
1176 */
9dd2fe89
YZ
1177 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1178 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1179 DMA_TLB_DSI_FLUSH);
9dd2fe89
YZ
1180 else
1181 iommu->flush.flush_iotlb(iommu, did, addr, mask,
1182 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1183
1184 /*
82653633
NA
1185 * In caching mode, changes of pages from non-present to present require
1186 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1187 */
82653633 1188 if (!cap_caching_mode(iommu->cap) || !map)
93a23a72 1189 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
ba395927
KA
1190}
1191
f8bab735 1192static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1193{
1194 u32 pmen;
1195 unsigned long flags;
1196
1f5b3c3f 1197 raw_spin_lock_irqsave(&iommu->register_lock, flags);
f8bab735 1198 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1199 pmen &= ~DMA_PMEN_EPM;
1200 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1201
1202 /* wait for the protected region status bit to clear */
1203 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1204 readl, !(pmen & DMA_PMEN_PRS), pmen);
1205
1f5b3c3f 1206 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
f8bab735 1207}
1208
ba395927
KA
1209static int iommu_enable_translation(struct intel_iommu *iommu)
1210{
1211 u32 sts;
1212 unsigned long flags;
1213
1f5b3c3f 1214 raw_spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1215 iommu->gcmd |= DMA_GCMD_TE;
1216 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1217
1218 /* Make sure hardware complete it */
1219 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1220 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1221
1f5b3c3f 1222 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
ba395927
KA
1223 return 0;
1224}
1225
1226static int iommu_disable_translation(struct intel_iommu *iommu)
1227{
1228 u32 sts;
1229 unsigned long flag;
1230
1f5b3c3f 1231 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1232 iommu->gcmd &= ~DMA_GCMD_TE;
1233 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1234
1235 /* Make sure hardware complete it */
1236 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1237 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927 1238
1f5b3c3f 1239 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1240 return 0;
1241}
1242
3460a6d9 1243
ba395927
KA
1244static int iommu_init_domains(struct intel_iommu *iommu)
1245{
1246 unsigned long ndomains;
1247 unsigned long nlongs;
1248
1249 ndomains = cap_ndoms(iommu->cap);
68aeb968 1250 pr_debug("IOMMU %d: Number of Domains supported <%ld>\n", iommu->seq_id,
680a7524 1251 ndomains);
ba395927
KA
1252 nlongs = BITS_TO_LONGS(ndomains);
1253
94a91b50
DD
1254 spin_lock_init(&iommu->lock);
1255
ba395927
KA
1256 /* TBD: there might be 64K domains,
1257 * consider other allocation for future chip
1258 */
1259 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1260 if (!iommu->domain_ids) {
1261 printk(KERN_ERR "Allocating domain id array failed\n");
1262 return -ENOMEM;
1263 }
1264 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1265 GFP_KERNEL);
1266 if (!iommu->domains) {
1267 printk(KERN_ERR "Allocating domain array failed\n");
ba395927
KA
1268 return -ENOMEM;
1269 }
1270
1271 /*
1272 * if Caching mode is set, then invalid translations are tagged
1273 * with domainid 0. Hence we need to pre-allocate it.
1274 */
1275 if (cap_caching_mode(iommu->cap))
1276 set_bit(0, iommu->domain_ids);
1277 return 0;
1278}
ba395927 1279
ba395927
KA
1280
1281static void domain_exit(struct dmar_domain *domain);
5e98c4b1 1282static void vm_domain_exit(struct dmar_domain *domain);
e61d98d8
SS
1283
1284void free_dmar_iommu(struct intel_iommu *iommu)
ba395927
KA
1285{
1286 struct dmar_domain *domain;
1287 int i;
c7151a8d 1288 unsigned long flags;
ba395927 1289
94a91b50 1290 if ((iommu->domains) && (iommu->domain_ids)) {
a45946ab 1291 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
94a91b50
DD
1292 domain = iommu->domains[i];
1293 clear_bit(i, iommu->domain_ids);
1294
1295 spin_lock_irqsave(&domain->iommu_lock, flags);
1296 if (--domain->iommu_count == 0) {
1297 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1298 vm_domain_exit(domain);
1299 else
1300 domain_exit(domain);
1301 }
1302 spin_unlock_irqrestore(&domain->iommu_lock, flags);
5e98c4b1 1303 }
ba395927
KA
1304 }
1305
1306 if (iommu->gcmd & DMA_GCMD_TE)
1307 iommu_disable_translation(iommu);
1308
1309 if (iommu->irq) {
dced35ae 1310 irq_set_handler_data(iommu->irq, NULL);
ba395927
KA
1311 /* This will mask the irq */
1312 free_irq(iommu->irq, iommu);
1313 destroy_irq(iommu->irq);
1314 }
1315
1316 kfree(iommu->domains);
1317 kfree(iommu->domain_ids);
1318
d9630fe9
WH
1319 g_iommus[iommu->seq_id] = NULL;
1320
1321 /* if all iommus are freed, free g_iommus */
1322 for (i = 0; i < g_num_of_iommus; i++) {
1323 if (g_iommus[i])
1324 break;
1325 }
1326
1327 if (i == g_num_of_iommus)
1328 kfree(g_iommus);
1329
ba395927
KA
1330 /* free context mapping */
1331 free_context_table(iommu);
ba395927
KA
1332}
1333
2c2e2c38 1334static struct dmar_domain *alloc_domain(void)
ba395927 1335{
ba395927 1336 struct dmar_domain *domain;
ba395927
KA
1337
1338 domain = alloc_domain_mem();
1339 if (!domain)
1340 return NULL;
1341
4c923d47 1342 domain->nid = -1;
1b198bb0 1343 memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp));
2c2e2c38
FY
1344 domain->flags = 0;
1345
1346 return domain;
1347}
1348
1349static int iommu_attach_domain(struct dmar_domain *domain,
1350 struct intel_iommu *iommu)
1351{
1352 int num;
1353 unsigned long ndomains;
1354 unsigned long flags;
1355
ba395927
KA
1356 ndomains = cap_ndoms(iommu->cap);
1357
1358 spin_lock_irqsave(&iommu->lock, flags);
2c2e2c38 1359
ba395927
KA
1360 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1361 if (num >= ndomains) {
1362 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927 1363 printk(KERN_ERR "IOMMU: no free domain ids\n");
2c2e2c38 1364 return -ENOMEM;
ba395927
KA
1365 }
1366
ba395927 1367 domain->id = num;
2c2e2c38 1368 set_bit(num, iommu->domain_ids);
1b198bb0 1369 set_bit(iommu->seq_id, domain->iommu_bmp);
ba395927
KA
1370 iommu->domains[num] = domain;
1371 spin_unlock_irqrestore(&iommu->lock, flags);
1372
2c2e2c38 1373 return 0;
ba395927
KA
1374}
1375
2c2e2c38
FY
1376static void iommu_detach_domain(struct dmar_domain *domain,
1377 struct intel_iommu *iommu)
ba395927
KA
1378{
1379 unsigned long flags;
2c2e2c38
FY
1380 int num, ndomains;
1381 int found = 0;
ba395927 1382
8c11e798 1383 spin_lock_irqsave(&iommu->lock, flags);
2c2e2c38 1384 ndomains = cap_ndoms(iommu->cap);
a45946ab 1385 for_each_set_bit(num, iommu->domain_ids, ndomains) {
2c2e2c38
FY
1386 if (iommu->domains[num] == domain) {
1387 found = 1;
1388 break;
1389 }
2c2e2c38
FY
1390 }
1391
1392 if (found) {
1393 clear_bit(num, iommu->domain_ids);
1b198bb0 1394 clear_bit(iommu->seq_id, domain->iommu_bmp);
2c2e2c38
FY
1395 iommu->domains[num] = NULL;
1396 }
8c11e798 1397 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927
KA
1398}
1399
1400static struct iova_domain reserved_iova_list;
8a443df4 1401static struct lock_class_key reserved_rbtree_key;
ba395927 1402
51a63e67 1403static int dmar_init_reserved_ranges(void)
ba395927
KA
1404{
1405 struct pci_dev *pdev = NULL;
1406 struct iova *iova;
1407 int i;
ba395927 1408
f661197e 1409 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
ba395927 1410
8a443df4
MG
1411 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1412 &reserved_rbtree_key);
1413
ba395927
KA
1414 /* IOAPIC ranges shouldn't be accessed by DMA */
1415 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1416 IOVA_PFN(IOAPIC_RANGE_END));
51a63e67 1417 if (!iova) {
ba395927 1418 printk(KERN_ERR "Reserve IOAPIC range failed\n");
51a63e67
JC
1419 return -ENODEV;
1420 }
ba395927
KA
1421
1422 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1423 for_each_pci_dev(pdev) {
1424 struct resource *r;
1425
1426 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1427 r = &pdev->resource[i];
1428 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1429 continue;
1a4a4551
DW
1430 iova = reserve_iova(&reserved_iova_list,
1431 IOVA_PFN(r->start),
1432 IOVA_PFN(r->end));
51a63e67 1433 if (!iova) {
ba395927 1434 printk(KERN_ERR "Reserve iova failed\n");
51a63e67
JC
1435 return -ENODEV;
1436 }
ba395927
KA
1437 }
1438 }
51a63e67 1439 return 0;
ba395927
KA
1440}
1441
1442static void domain_reserve_special_ranges(struct dmar_domain *domain)
1443{
1444 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1445}
1446
1447static inline int guestwidth_to_adjustwidth(int gaw)
1448{
1449 int agaw;
1450 int r = (gaw - 12) % 9;
1451
1452 if (r == 0)
1453 agaw = gaw;
1454 else
1455 agaw = gaw + 9 - r;
1456 if (agaw > 64)
1457 agaw = 64;
1458 return agaw;
1459}
1460
1461static int domain_init(struct dmar_domain *domain, int guest_width)
1462{
1463 struct intel_iommu *iommu;
1464 int adjust_width, agaw;
1465 unsigned long sagaw;
1466
f661197e 1467 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
c7151a8d 1468 spin_lock_init(&domain->iommu_lock);
ba395927
KA
1469
1470 domain_reserve_special_ranges(domain);
1471
1472 /* calculate AGAW */
8c11e798 1473 iommu = domain_get_iommu(domain);
ba395927
KA
1474 if (guest_width > cap_mgaw(iommu->cap))
1475 guest_width = cap_mgaw(iommu->cap);
1476 domain->gaw = guest_width;
1477 adjust_width = guestwidth_to_adjustwidth(guest_width);
1478 agaw = width_to_agaw(adjust_width);
1479 sagaw = cap_sagaw(iommu->cap);
1480 if (!test_bit(agaw, &sagaw)) {
1481 /* hardware doesn't support it, choose a bigger one */
1482 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1483 agaw = find_next_bit(&sagaw, 5, agaw);
1484 if (agaw >= 5)
1485 return -ENODEV;
1486 }
1487 domain->agaw = agaw;
1488 INIT_LIST_HEAD(&domain->devices);
1489
8e604097
WH
1490 if (ecap_coherent(iommu->ecap))
1491 domain->iommu_coherency = 1;
1492 else
1493 domain->iommu_coherency = 0;
1494
58c610bd
SY
1495 if (ecap_sc_support(iommu->ecap))
1496 domain->iommu_snooping = 1;
1497 else
1498 domain->iommu_snooping = 0;
1499
6dd9a7c7 1500 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
c7151a8d 1501 domain->iommu_count = 1;
4c923d47 1502 domain->nid = iommu->node;
c7151a8d 1503
ba395927 1504 /* always allocate the top pgd */
4c923d47 1505 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
ba395927
KA
1506 if (!domain->pgd)
1507 return -ENOMEM;
5b6985ce 1508 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1509 return 0;
1510}
1511
1512static void domain_exit(struct dmar_domain *domain)
1513{
2c2e2c38
FY
1514 struct dmar_drhd_unit *drhd;
1515 struct intel_iommu *iommu;
ba395927
KA
1516
1517 /* Domain 0 is reserved, so dont process it */
1518 if (!domain)
1519 return;
1520
7b668357
AW
1521 /* Flush any lazy unmaps that may reference this domain */
1522 if (!intel_iommu_strict)
1523 flush_unmaps_timeout(0);
1524
ba395927
KA
1525 domain_remove_dev_info(domain);
1526 /* destroy iovas */
1527 put_iova_domain(&domain->iovad);
ba395927
KA
1528
1529 /* clear ptes */
595badf5 1530 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927
KA
1531
1532 /* free page tables */
d794dc9b 1533 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927 1534
2c2e2c38 1535 for_each_active_iommu(iommu, drhd)
1b198bb0 1536 if (test_bit(iommu->seq_id, domain->iommu_bmp))
2c2e2c38
FY
1537 iommu_detach_domain(domain, iommu);
1538
ba395927
KA
1539 free_domain_mem(domain);
1540}
1541
4ed0d3e6
FY
1542static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1543 u8 bus, u8 devfn, int translation)
ba395927
KA
1544{
1545 struct context_entry *context;
ba395927 1546 unsigned long flags;
5331fe6f 1547 struct intel_iommu *iommu;
ea6606b0
WH
1548 struct dma_pte *pgd;
1549 unsigned long num;
1550 unsigned long ndomains;
1551 int id;
1552 int agaw;
93a23a72 1553 struct device_domain_info *info = NULL;
ba395927
KA
1554
1555 pr_debug("Set context mapping for %02x:%02x.%d\n",
1556 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 1557
ba395927 1558 BUG_ON(!domain->pgd);
4ed0d3e6
FY
1559 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1560 translation != CONTEXT_TT_MULTI_LEVEL);
5331fe6f 1561
276dbf99 1562 iommu = device_to_iommu(segment, bus, devfn);
5331fe6f
WH
1563 if (!iommu)
1564 return -ENODEV;
1565
ba395927
KA
1566 context = device_to_context_entry(iommu, bus, devfn);
1567 if (!context)
1568 return -ENOMEM;
1569 spin_lock_irqsave(&iommu->lock, flags);
c07e7d21 1570 if (context_present(context)) {
ba395927
KA
1571 spin_unlock_irqrestore(&iommu->lock, flags);
1572 return 0;
1573 }
1574
ea6606b0
WH
1575 id = domain->id;
1576 pgd = domain->pgd;
1577
2c2e2c38
FY
1578 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1579 domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
ea6606b0
WH
1580 int found = 0;
1581
1582 /* find an available domain id for this device in iommu */
1583 ndomains = cap_ndoms(iommu->cap);
a45946ab 1584 for_each_set_bit(num, iommu->domain_ids, ndomains) {
ea6606b0
WH
1585 if (iommu->domains[num] == domain) {
1586 id = num;
1587 found = 1;
1588 break;
1589 }
ea6606b0
WH
1590 }
1591
1592 if (found == 0) {
1593 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1594 if (num >= ndomains) {
1595 spin_unlock_irqrestore(&iommu->lock, flags);
1596 printk(KERN_ERR "IOMMU: no free domain ids\n");
1597 return -EFAULT;
1598 }
1599
1600 set_bit(num, iommu->domain_ids);
1601 iommu->domains[num] = domain;
1602 id = num;
1603 }
1604
1605 /* Skip top levels of page tables for
1606 * iommu which has less agaw than default.
1672af11 1607 * Unnecessary for PT mode.
ea6606b0 1608 */
1672af11
CW
1609 if (translation != CONTEXT_TT_PASS_THROUGH) {
1610 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1611 pgd = phys_to_virt(dma_pte_addr(pgd));
1612 if (!dma_pte_present(pgd)) {
1613 spin_unlock_irqrestore(&iommu->lock, flags);
1614 return -ENOMEM;
1615 }
ea6606b0
WH
1616 }
1617 }
1618 }
1619
1620 context_set_domain_id(context, id);
4ed0d3e6 1621
93a23a72
YZ
1622 if (translation != CONTEXT_TT_PASS_THROUGH) {
1623 info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
1624 translation = info ? CONTEXT_TT_DEV_IOTLB :
1625 CONTEXT_TT_MULTI_LEVEL;
1626 }
4ed0d3e6
FY
1627 /*
1628 * In pass through mode, AW must be programmed to indicate the largest
1629 * AGAW value supported by hardware. And ASR is ignored by hardware.
1630 */
93a23a72 1631 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
4ed0d3e6 1632 context_set_address_width(context, iommu->msagaw);
93a23a72
YZ
1633 else {
1634 context_set_address_root(context, virt_to_phys(pgd));
1635 context_set_address_width(context, iommu->agaw);
1636 }
4ed0d3e6
FY
1637
1638 context_set_translation_type(context, translation);
c07e7d21
MM
1639 context_set_fault_enable(context);
1640 context_set_present(context);
5331fe6f 1641 domain_flush_cache(domain, context, sizeof(*context));
ba395927 1642
4c25a2c1
DW
1643 /*
1644 * It's a non-present to present mapping. If hardware doesn't cache
1645 * non-present entry we only need to flush the write-buffer. If the
1646 * _does_ cache non-present entries, then it does so in the special
1647 * domain #0, which we have to flush:
1648 */
1649 if (cap_caching_mode(iommu->cap)) {
1650 iommu->flush.flush_context(iommu, 0,
1651 (((u16)bus) << 8) | devfn,
1652 DMA_CCMD_MASK_NOBIT,
1653 DMA_CCMD_DEVICE_INVL);
82653633 1654 iommu->flush.flush_iotlb(iommu, domain->id, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 1655 } else {
ba395927 1656 iommu_flush_write_buffer(iommu);
4c25a2c1 1657 }
93a23a72 1658 iommu_enable_dev_iotlb(info);
ba395927 1659 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d
WH
1660
1661 spin_lock_irqsave(&domain->iommu_lock, flags);
1b198bb0 1662 if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
c7151a8d 1663 domain->iommu_count++;
4c923d47
SS
1664 if (domain->iommu_count == 1)
1665 domain->nid = iommu->node;
58c610bd 1666 domain_update_iommu_cap(domain);
c7151a8d
WH
1667 }
1668 spin_unlock_irqrestore(&domain->iommu_lock, flags);
ba395927
KA
1669 return 0;
1670}
1671
1672static int
4ed0d3e6
FY
1673domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
1674 int translation)
ba395927
KA
1675{
1676 int ret;
1677 struct pci_dev *tmp, *parent;
1678
276dbf99 1679 ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
4ed0d3e6
FY
1680 pdev->bus->number, pdev->devfn,
1681 translation);
ba395927
KA
1682 if (ret)
1683 return ret;
1684
1685 /* dependent device mapping */
1686 tmp = pci_find_upstream_pcie_bridge(pdev);
1687 if (!tmp)
1688 return 0;
1689 /* Secondary interface's bus number and devfn 0 */
1690 parent = pdev->bus->self;
1691 while (parent != tmp) {
276dbf99
DW
1692 ret = domain_context_mapping_one(domain,
1693 pci_domain_nr(parent->bus),
1694 parent->bus->number,
4ed0d3e6 1695 parent->devfn, translation);
ba395927
KA
1696 if (ret)
1697 return ret;
1698 parent = parent->bus->self;
1699 }
45e829ea 1700 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
ba395927 1701 return domain_context_mapping_one(domain,
276dbf99 1702 pci_domain_nr(tmp->subordinate),
4ed0d3e6
FY
1703 tmp->subordinate->number, 0,
1704 translation);
ba395927
KA
1705 else /* this is a legacy PCI bridge */
1706 return domain_context_mapping_one(domain,
276dbf99
DW
1707 pci_domain_nr(tmp->bus),
1708 tmp->bus->number,
4ed0d3e6
FY
1709 tmp->devfn,
1710 translation);
ba395927
KA
1711}
1712
5331fe6f 1713static int domain_context_mapped(struct pci_dev *pdev)
ba395927
KA
1714{
1715 int ret;
1716 struct pci_dev *tmp, *parent;
5331fe6f
WH
1717 struct intel_iommu *iommu;
1718
276dbf99
DW
1719 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
1720 pdev->devfn);
5331fe6f
WH
1721 if (!iommu)
1722 return -ENODEV;
ba395927 1723
276dbf99 1724 ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
ba395927
KA
1725 if (!ret)
1726 return ret;
1727 /* dependent device mapping */
1728 tmp = pci_find_upstream_pcie_bridge(pdev);
1729 if (!tmp)
1730 return ret;
1731 /* Secondary interface's bus number and devfn 0 */
1732 parent = pdev->bus->self;
1733 while (parent != tmp) {
8c11e798 1734 ret = device_context_mapped(iommu, parent->bus->number,
276dbf99 1735 parent->devfn);
ba395927
KA
1736 if (!ret)
1737 return ret;
1738 parent = parent->bus->self;
1739 }
5f4d91a1 1740 if (pci_is_pcie(tmp))
276dbf99
DW
1741 return device_context_mapped(iommu, tmp->subordinate->number,
1742 0);
ba395927 1743 else
276dbf99
DW
1744 return device_context_mapped(iommu, tmp->bus->number,
1745 tmp->devfn);
ba395927
KA
1746}
1747
f532959b
FY
1748/* Returns a number of VTD pages, but aligned to MM page size */
1749static inline unsigned long aligned_nrpages(unsigned long host_addr,
1750 size_t size)
1751{
1752 host_addr &= ~PAGE_MASK;
1753 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1754}
1755
6dd9a7c7
YS
1756/* Return largest possible superpage level for a given mapping */
1757static inline int hardware_largepage_caps(struct dmar_domain *domain,
1758 unsigned long iov_pfn,
1759 unsigned long phy_pfn,
1760 unsigned long pages)
1761{
1762 int support, level = 1;
1763 unsigned long pfnmerge;
1764
1765 support = domain->iommu_superpage;
1766
1767 /* To use a large page, the virtual *and* physical addresses
1768 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1769 of them will mean we have to use smaller pages. So just
1770 merge them and check both at once. */
1771 pfnmerge = iov_pfn | phy_pfn;
1772
1773 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1774 pages >>= VTD_STRIDE_SHIFT;
1775 if (!pages)
1776 break;
1777 pfnmerge >>= VTD_STRIDE_SHIFT;
1778 level++;
1779 support--;
1780 }
1781 return level;
1782}
1783
9051aa02
DW
1784static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1785 struct scatterlist *sg, unsigned long phys_pfn,
1786 unsigned long nr_pages, int prot)
e1605495
DW
1787{
1788 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 1789 phys_addr_t uninitialized_var(pteval);
e1605495 1790 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
9051aa02 1791 unsigned long sg_res;
6dd9a7c7
YS
1792 unsigned int largepage_lvl = 0;
1793 unsigned long lvl_pages = 0;
e1605495
DW
1794
1795 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1796
1797 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1798 return -EINVAL;
1799
1800 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1801
9051aa02
DW
1802 if (sg)
1803 sg_res = 0;
1804 else {
1805 sg_res = nr_pages + 1;
1806 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1807 }
1808
6dd9a7c7 1809 while (nr_pages > 0) {
c85994e4
DW
1810 uint64_t tmp;
1811
e1605495 1812 if (!sg_res) {
f532959b 1813 sg_res = aligned_nrpages(sg->offset, sg->length);
e1605495
DW
1814 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1815 sg->dma_length = sg->length;
1816 pteval = page_to_phys(sg_page(sg)) | prot;
6dd9a7c7 1817 phys_pfn = pteval >> VTD_PAGE_SHIFT;
e1605495 1818 }
6dd9a7c7 1819
e1605495 1820 if (!pte) {
6dd9a7c7
YS
1821 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
1822
1823 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl);
e1605495
DW
1824 if (!pte)
1825 return -ENOMEM;
6dd9a7c7
YS
1826 /* It is large page*/
1827 if (largepage_lvl > 1)
1828 pteval |= DMA_PTE_LARGE_PAGE;
1829 else
1830 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
1831
e1605495
DW
1832 }
1833 /* We don't need lock here, nobody else
1834 * touches the iova range
1835 */
7766a3fb 1836 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 1837 if (tmp) {
1bf20f0d 1838 static int dumps = 5;
c85994e4
DW
1839 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1840 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
1841 if (dumps) {
1842 dumps--;
1843 debug_dma_dump_mappings(NULL);
1844 }
1845 WARN_ON(1);
1846 }
6dd9a7c7
YS
1847
1848 lvl_pages = lvl_to_nr_pages(largepage_lvl);
1849
1850 BUG_ON(nr_pages < lvl_pages);
1851 BUG_ON(sg_res < lvl_pages);
1852
1853 nr_pages -= lvl_pages;
1854 iov_pfn += lvl_pages;
1855 phys_pfn += lvl_pages;
1856 pteval += lvl_pages * VTD_PAGE_SIZE;
1857 sg_res -= lvl_pages;
1858
1859 /* If the next PTE would be the first in a new page, then we
1860 need to flush the cache on the entries we've just written.
1861 And then we'll need to recalculate 'pte', so clear it and
1862 let it get set again in the if (!pte) block above.
1863
1864 If we're done (!nr_pages) we need to flush the cache too.
1865
1866 Also if we've been setting superpages, we may need to
1867 recalculate 'pte' and switch back to smaller pages for the
1868 end of the mapping, if the trailing size is not enough to
1869 use another superpage (i.e. sg_res < lvl_pages). */
e1605495 1870 pte++;
6dd9a7c7
YS
1871 if (!nr_pages || first_pte_in_page(pte) ||
1872 (largepage_lvl > 1 && sg_res < lvl_pages)) {
e1605495
DW
1873 domain_flush_cache(domain, first_pte,
1874 (void *)pte - (void *)first_pte);
1875 pte = NULL;
1876 }
6dd9a7c7
YS
1877
1878 if (!sg_res && nr_pages)
e1605495
DW
1879 sg = sg_next(sg);
1880 }
1881 return 0;
1882}
1883
9051aa02
DW
1884static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1885 struct scatterlist *sg, unsigned long nr_pages,
1886 int prot)
ba395927 1887{
9051aa02
DW
1888 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1889}
6f6a00e4 1890
9051aa02
DW
1891static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1892 unsigned long phys_pfn, unsigned long nr_pages,
1893 int prot)
1894{
1895 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
1896}
1897
c7151a8d 1898static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 1899{
c7151a8d
WH
1900 if (!iommu)
1901 return;
8c11e798
WH
1902
1903 clear_context_table(iommu, bus, devfn);
1904 iommu->flush.flush_context(iommu, 0, 0, 0,
4c25a2c1 1905 DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 1906 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
ba395927
KA
1907}
1908
1909static void domain_remove_dev_info(struct dmar_domain *domain)
1910{
1911 struct device_domain_info *info;
1912 unsigned long flags;
c7151a8d 1913 struct intel_iommu *iommu;
ba395927
KA
1914
1915 spin_lock_irqsave(&device_domain_lock, flags);
1916 while (!list_empty(&domain->devices)) {
1917 info = list_entry(domain->devices.next,
1918 struct device_domain_info, link);
1919 list_del(&info->link);
1920 list_del(&info->global);
1921 if (info->dev)
358dd8ac 1922 info->dev->dev.archdata.iommu = NULL;
ba395927
KA
1923 spin_unlock_irqrestore(&device_domain_lock, flags);
1924
93a23a72 1925 iommu_disable_dev_iotlb(info);
276dbf99 1926 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
c7151a8d 1927 iommu_detach_dev(iommu, info->bus, info->devfn);
ba395927
KA
1928 free_devinfo_mem(info);
1929
1930 spin_lock_irqsave(&device_domain_lock, flags);
1931 }
1932 spin_unlock_irqrestore(&device_domain_lock, flags);
1933}
1934
1935/*
1936 * find_domain
358dd8ac 1937 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
ba395927 1938 */
38717946 1939static struct dmar_domain *
ba395927
KA
1940find_domain(struct pci_dev *pdev)
1941{
1942 struct device_domain_info *info;
1943
1944 /* No lock here, assumes no domain exit in normal case */
358dd8ac 1945 info = pdev->dev.archdata.iommu;
ba395927
KA
1946 if (info)
1947 return info->domain;
1948 return NULL;
1949}
1950
ba395927
KA
1951/* domain is initialized */
1952static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1953{
1954 struct dmar_domain *domain, *found = NULL;
1955 struct intel_iommu *iommu;
1956 struct dmar_drhd_unit *drhd;
1957 struct device_domain_info *info, *tmp;
1958 struct pci_dev *dev_tmp;
1959 unsigned long flags;
1960 int bus = 0, devfn = 0;
276dbf99 1961 int segment;
2c2e2c38 1962 int ret;
ba395927
KA
1963
1964 domain = find_domain(pdev);
1965 if (domain)
1966 return domain;
1967
276dbf99
DW
1968 segment = pci_domain_nr(pdev->bus);
1969
ba395927
KA
1970 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1971 if (dev_tmp) {
5f4d91a1 1972 if (pci_is_pcie(dev_tmp)) {
ba395927
KA
1973 bus = dev_tmp->subordinate->number;
1974 devfn = 0;
1975 } else {
1976 bus = dev_tmp->bus->number;
1977 devfn = dev_tmp->devfn;
1978 }
1979 spin_lock_irqsave(&device_domain_lock, flags);
1980 list_for_each_entry(info, &device_domain_list, global) {
276dbf99
DW
1981 if (info->segment == segment &&
1982 info->bus == bus && info->devfn == devfn) {
ba395927
KA
1983 found = info->domain;
1984 break;
1985 }
1986 }
1987 spin_unlock_irqrestore(&device_domain_lock, flags);
1988 /* pcie-pci bridge already has a domain, uses it */
1989 if (found) {
1990 domain = found;
1991 goto found_domain;
1992 }
1993 }
1994
2c2e2c38
FY
1995 domain = alloc_domain();
1996 if (!domain)
1997 goto error;
1998
ba395927
KA
1999 /* Allocate new domain for the device */
2000 drhd = dmar_find_matched_drhd_unit(pdev);
2001 if (!drhd) {
2002 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
2003 pci_name(pdev));
2004 return NULL;
2005 }
2006 iommu = drhd->iommu;
2007
2c2e2c38
FY
2008 ret = iommu_attach_domain(domain, iommu);
2009 if (ret) {
2fe9723d 2010 free_domain_mem(domain);
ba395927 2011 goto error;
2c2e2c38 2012 }
ba395927
KA
2013
2014 if (domain_init(domain, gaw)) {
2015 domain_exit(domain);
2016 goto error;
2017 }
2018
2019 /* register pcie-to-pci device */
2020 if (dev_tmp) {
2021 info = alloc_devinfo_mem();
2022 if (!info) {
2023 domain_exit(domain);
2024 goto error;
2025 }
276dbf99 2026 info->segment = segment;
ba395927
KA
2027 info->bus = bus;
2028 info->devfn = devfn;
2029 info->dev = NULL;
2030 info->domain = domain;
2031 /* This domain is shared by devices under p2p bridge */
3b5410e7 2032 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
ba395927
KA
2033
2034 /* pcie-to-pci bridge already has a domain, uses it */
2035 found = NULL;
2036 spin_lock_irqsave(&device_domain_lock, flags);
2037 list_for_each_entry(tmp, &device_domain_list, global) {
276dbf99
DW
2038 if (tmp->segment == segment &&
2039 tmp->bus == bus && tmp->devfn == devfn) {
ba395927
KA
2040 found = tmp->domain;
2041 break;
2042 }
2043 }
2044 if (found) {
00dfff77 2045 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927
KA
2046 free_devinfo_mem(info);
2047 domain_exit(domain);
2048 domain = found;
2049 } else {
2050 list_add(&info->link, &domain->devices);
2051 list_add(&info->global, &device_domain_list);
00dfff77 2052 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927 2053 }
ba395927
KA
2054 }
2055
2056found_domain:
2057 info = alloc_devinfo_mem();
2058 if (!info)
2059 goto error;
276dbf99 2060 info->segment = segment;
ba395927
KA
2061 info->bus = pdev->bus->number;
2062 info->devfn = pdev->devfn;
2063 info->dev = pdev;
2064 info->domain = domain;
2065 spin_lock_irqsave(&device_domain_lock, flags);
2066 /* somebody is fast */
2067 found = find_domain(pdev);
2068 if (found != NULL) {
2069 spin_unlock_irqrestore(&device_domain_lock, flags);
2070 if (found != domain) {
2071 domain_exit(domain);
2072 domain = found;
2073 }
2074 free_devinfo_mem(info);
2075 return domain;
2076 }
2077 list_add(&info->link, &domain->devices);
2078 list_add(&info->global, &device_domain_list);
358dd8ac 2079 pdev->dev.archdata.iommu = info;
ba395927
KA
2080 spin_unlock_irqrestore(&device_domain_lock, flags);
2081 return domain;
2082error:
2083 /* recheck it here, maybe others set it */
2084 return find_domain(pdev);
2085}
2086
2c2e2c38 2087static int iommu_identity_mapping;
e0fc7e0b
DW
2088#define IDENTMAP_ALL 1
2089#define IDENTMAP_GFX 2
2090#define IDENTMAP_AZALIA 4
2c2e2c38 2091
b213203e
DW
2092static int iommu_domain_identity_map(struct dmar_domain *domain,
2093 unsigned long long start,
2094 unsigned long long end)
ba395927 2095{
c5395d5c
DW
2096 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2097 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2098
2099 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2100 dma_to_mm_pfn(last_vpfn))) {
ba395927 2101 printk(KERN_ERR "IOMMU: reserve iova failed\n");
b213203e 2102 return -ENOMEM;
ba395927
KA
2103 }
2104
c5395d5c
DW
2105 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2106 start, end, domain->id);
ba395927
KA
2107 /*
2108 * RMRR range might have overlap with physical memory range,
2109 * clear it first
2110 */
c5395d5c 2111 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 2112
c5395d5c
DW
2113 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2114 last_vpfn - first_vpfn + 1,
61df7443 2115 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
2116}
2117
2118static int iommu_prepare_identity_map(struct pci_dev *pdev,
2119 unsigned long long start,
2120 unsigned long long end)
2121{
2122 struct dmar_domain *domain;
2123 int ret;
2124
c7ab48d2 2125 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
b213203e
DW
2126 if (!domain)
2127 return -ENOMEM;
2128
19943b0e
DW
2129 /* For _hardware_ passthrough, don't bother. But for software
2130 passthrough, we do it anyway -- it may indicate a memory
2131 range which is reserved in E820, so which didn't get set
2132 up to start with in si_domain */
2133 if (domain == si_domain && hw_pass_through) {
2134 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2135 pci_name(pdev), start, end);
2136 return 0;
2137 }
2138
2139 printk(KERN_INFO
2140 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2141 pci_name(pdev), start, end);
2ff729f5 2142
5595b528
DW
2143 if (end < start) {
2144 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2145 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2146 dmi_get_system_info(DMI_BIOS_VENDOR),
2147 dmi_get_system_info(DMI_BIOS_VERSION),
2148 dmi_get_system_info(DMI_PRODUCT_VERSION));
2149 ret = -EIO;
2150 goto error;
2151 }
2152
2ff729f5
DW
2153 if (end >> agaw_to_width(domain->agaw)) {
2154 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2155 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2156 agaw_to_width(domain->agaw),
2157 dmi_get_system_info(DMI_BIOS_VENDOR),
2158 dmi_get_system_info(DMI_BIOS_VERSION),
2159 dmi_get_system_info(DMI_PRODUCT_VERSION));
2160 ret = -EIO;
2161 goto error;
2162 }
19943b0e 2163
b213203e 2164 ret = iommu_domain_identity_map(domain, start, end);
ba395927
KA
2165 if (ret)
2166 goto error;
2167
2168 /* context entry init */
4ed0d3e6 2169 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
b213203e
DW
2170 if (ret)
2171 goto error;
2172
2173 return 0;
2174
2175 error:
ba395927
KA
2176 domain_exit(domain);
2177 return ret;
ba395927
KA
2178}
2179
2180static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2181 struct pci_dev *pdev)
2182{
358dd8ac 2183 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
2184 return 0;
2185 return iommu_prepare_identity_map(pdev, rmrr->base_address,
70e535d1 2186 rmrr->end_address);
ba395927
KA
2187}
2188
d3f13810 2189#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
49a0429e
KA
2190static inline void iommu_prepare_isa(void)
2191{
2192 struct pci_dev *pdev;
2193 int ret;
2194
2195 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2196 if (!pdev)
2197 return;
2198
c7ab48d2 2199 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
70e535d1 2200 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1);
49a0429e
KA
2201
2202 if (ret)
c7ab48d2
DW
2203 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2204 "floppy might not work\n");
49a0429e
KA
2205
2206}
2207#else
2208static inline void iommu_prepare_isa(void)
2209{
2210 return;
2211}
d3f13810 2212#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
49a0429e 2213
2c2e2c38 2214static int md_domain_init(struct dmar_domain *domain, int guest_width);
c7ab48d2 2215
071e1374 2216static int __init si_domain_init(int hw)
2c2e2c38
FY
2217{
2218 struct dmar_drhd_unit *drhd;
2219 struct intel_iommu *iommu;
c7ab48d2 2220 int nid, ret = 0;
2c2e2c38
FY
2221
2222 si_domain = alloc_domain();
2223 if (!si_domain)
2224 return -EFAULT;
2225
c7ab48d2 2226 pr_debug("Identity mapping domain is domain %d\n", si_domain->id);
2c2e2c38
FY
2227
2228 for_each_active_iommu(iommu, drhd) {
2229 ret = iommu_attach_domain(si_domain, iommu);
2230 if (ret) {
2231 domain_exit(si_domain);
2232 return -EFAULT;
2233 }
2234 }
2235
2236 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2237 domain_exit(si_domain);
2238 return -EFAULT;
2239 }
2240
2241 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2242
19943b0e
DW
2243 if (hw)
2244 return 0;
2245
c7ab48d2 2246 for_each_online_node(nid) {
5dfe8660
TH
2247 unsigned long start_pfn, end_pfn;
2248 int i;
2249
2250 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2251 ret = iommu_domain_identity_map(si_domain,
2252 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2253 if (ret)
2254 return ret;
2255 }
c7ab48d2
DW
2256 }
2257
2c2e2c38
FY
2258 return 0;
2259}
2260
2261static void domain_remove_one_dev_info(struct dmar_domain *domain,
2262 struct pci_dev *pdev);
2263static int identity_mapping(struct pci_dev *pdev)
2264{
2265 struct device_domain_info *info;
2266
2267 if (likely(!iommu_identity_mapping))
2268 return 0;
2269
cb452a40
MT
2270 info = pdev->dev.archdata.iommu;
2271 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2272 return (info->domain == si_domain);
2c2e2c38 2273
2c2e2c38
FY
2274 return 0;
2275}
2276
2277static int domain_add_dev_info(struct dmar_domain *domain,
5fe60f4e
DW
2278 struct pci_dev *pdev,
2279 int translation)
2c2e2c38
FY
2280{
2281 struct device_domain_info *info;
2282 unsigned long flags;
5fe60f4e 2283 int ret;
2c2e2c38
FY
2284
2285 info = alloc_devinfo_mem();
2286 if (!info)
2287 return -ENOMEM;
2288
5fe60f4e
DW
2289 ret = domain_context_mapping(domain, pdev, translation);
2290 if (ret) {
2291 free_devinfo_mem(info);
2292 return ret;
2293 }
2294
2c2e2c38
FY
2295 info->segment = pci_domain_nr(pdev->bus);
2296 info->bus = pdev->bus->number;
2297 info->devfn = pdev->devfn;
2298 info->dev = pdev;
2299 info->domain = domain;
2300
2301 spin_lock_irqsave(&device_domain_lock, flags);
2302 list_add(&info->link, &domain->devices);
2303 list_add(&info->global, &device_domain_list);
2304 pdev->dev.archdata.iommu = info;
2305 spin_unlock_irqrestore(&device_domain_lock, flags);
2306
2307 return 0;
2308}
2309
6941af28
DW
2310static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2311{
e0fc7e0b
DW
2312 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2313 return 1;
2314
2315 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2316 return 1;
2317
2318 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2319 return 0;
6941af28 2320
3dfc813d
DW
2321 /*
2322 * We want to start off with all devices in the 1:1 domain, and
2323 * take them out later if we find they can't access all of memory.
2324 *
2325 * However, we can't do this for PCI devices behind bridges,
2326 * because all PCI devices behind the same bridge will end up
2327 * with the same source-id on their transactions.
2328 *
2329 * Practically speaking, we can't change things around for these
2330 * devices at run-time, because we can't be sure there'll be no
2331 * DMA transactions in flight for any of their siblings.
2332 *
2333 * So PCI devices (unless they're on the root bus) as well as
2334 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2335 * the 1:1 domain, just in _case_ one of their siblings turns out
2336 * not to be able to map all of memory.
2337 */
5f4d91a1 2338 if (!pci_is_pcie(pdev)) {
3dfc813d
DW
2339 if (!pci_is_root_bus(pdev->bus))
2340 return 0;
2341 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2342 return 0;
2343 } else if (pdev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
2344 return 0;
2345
2346 /*
2347 * At boot time, we don't yet know if devices will be 64-bit capable.
2348 * Assume that they will -- if they turn out not to be, then we can
2349 * take them out of the 1:1 domain later.
2350 */
8fcc5372
CW
2351 if (!startup) {
2352 /*
2353 * If the device's dma_mask is less than the system's memory
2354 * size then this is not a candidate for identity mapping.
2355 */
2356 u64 dma_mask = pdev->dma_mask;
2357
2358 if (pdev->dev.coherent_dma_mask &&
2359 pdev->dev.coherent_dma_mask < dma_mask)
2360 dma_mask = pdev->dev.coherent_dma_mask;
2361
2362 return dma_mask >= dma_get_required_mask(&pdev->dev);
2363 }
6941af28
DW
2364
2365 return 1;
2366}
2367
071e1374 2368static int __init iommu_prepare_static_identity_mapping(int hw)
2c2e2c38 2369{
2c2e2c38
FY
2370 struct pci_dev *pdev = NULL;
2371 int ret;
2372
19943b0e 2373 ret = si_domain_init(hw);
2c2e2c38
FY
2374 if (ret)
2375 return -EFAULT;
2376
2c2e2c38 2377 for_each_pci_dev(pdev) {
6941af28 2378 if (iommu_should_identity_map(pdev, 1)) {
5fe60f4e 2379 ret = domain_add_dev_info(si_domain, pdev,
eae460b6
MT
2380 hw ? CONTEXT_TT_PASS_THROUGH :
2381 CONTEXT_TT_MULTI_LEVEL);
2382 if (ret) {
2383 /* device not associated with an iommu */
2384 if (ret == -ENODEV)
2385 continue;
62edf5dc 2386 return ret;
eae460b6
MT
2387 }
2388 pr_info("IOMMU: %s identity mapping for device %s\n",
2389 hw ? "hardware" : "software", pci_name(pdev));
62edf5dc 2390 }
2c2e2c38
FY
2391 }
2392
2393 return 0;
2394}
2395
b779260b 2396static int __init init_dmars(void)
ba395927
KA
2397{
2398 struct dmar_drhd_unit *drhd;
2399 struct dmar_rmrr_unit *rmrr;
2400 struct pci_dev *pdev;
2401 struct intel_iommu *iommu;
9d783ba0 2402 int i, ret;
2c2e2c38 2403
ba395927
KA
2404 /*
2405 * for each drhd
2406 * allocate root
2407 * initialize and program root entry to not present
2408 * endfor
2409 */
2410 for_each_drhd_unit(drhd) {
5e0d2a6f 2411 /*
2412 * lock not needed as this is only incremented in the single
2413 * threaded kernel __init code path all other access are read
2414 * only
2415 */
1b198bb0
MT
2416 if (g_num_of_iommus < IOMMU_UNITS_SUPPORTED) {
2417 g_num_of_iommus++;
2418 continue;
2419 }
2420 printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n",
2421 IOMMU_UNITS_SUPPORTED);
5e0d2a6f 2422 }
2423
d9630fe9
WH
2424 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2425 GFP_KERNEL);
2426 if (!g_iommus) {
2427 printk(KERN_ERR "Allocating global iommu array failed\n");
2428 ret = -ENOMEM;
2429 goto error;
2430 }
2431
80b20dd8 2432 deferred_flush = kzalloc(g_num_of_iommus *
2433 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2434 if (!deferred_flush) {
5e0d2a6f 2435 ret = -ENOMEM;
2436 goto error;
2437 }
2438
5e0d2a6f 2439 for_each_drhd_unit(drhd) {
2440 if (drhd->ignored)
2441 continue;
1886e8a9
SS
2442
2443 iommu = drhd->iommu;
d9630fe9 2444 g_iommus[iommu->seq_id] = iommu;
ba395927 2445
e61d98d8
SS
2446 ret = iommu_init_domains(iommu);
2447 if (ret)
2448 goto error;
2449
ba395927
KA
2450 /*
2451 * TBD:
2452 * we could share the same root & context tables
25985edc 2453 * among all IOMMU's. Need to Split it later.
ba395927
KA
2454 */
2455 ret = iommu_alloc_root_entry(iommu);
2456 if (ret) {
2457 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2458 goto error;
2459 }
4ed0d3e6 2460 if (!ecap_pass_through(iommu->ecap))
19943b0e 2461 hw_pass_through = 0;
ba395927
KA
2462 }
2463
1531a6a6
SS
2464 /*
2465 * Start from the sane iommu hardware state.
2466 */
a77b67d4
YS
2467 for_each_drhd_unit(drhd) {
2468 if (drhd->ignored)
2469 continue;
2470
2471 iommu = drhd->iommu;
1531a6a6
SS
2472
2473 /*
2474 * If the queued invalidation is already initialized by us
2475 * (for example, while enabling interrupt-remapping) then
2476 * we got the things already rolling from a sane state.
2477 */
2478 if (iommu->qi)
2479 continue;
2480
2481 /*
2482 * Clear any previous faults.
2483 */
2484 dmar_fault(-1, iommu);
2485 /*
2486 * Disable queued invalidation if supported and already enabled
2487 * before OS handover.
2488 */
2489 dmar_disable_qi(iommu);
2490 }
2491
2492 for_each_drhd_unit(drhd) {
2493 if (drhd->ignored)
2494 continue;
2495
2496 iommu = drhd->iommu;
2497
a77b67d4
YS
2498 if (dmar_enable_qi(iommu)) {
2499 /*
2500 * Queued Invalidate not enabled, use Register Based
2501 * Invalidate
2502 */
2503 iommu->flush.flush_context = __iommu_flush_context;
2504 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
680a7524 2505 printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
b4e0f9eb 2506 "invalidation\n",
680a7524 2507 iommu->seq_id,
b4e0f9eb 2508 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2509 } else {
2510 iommu->flush.flush_context = qi_flush_context;
2511 iommu->flush.flush_iotlb = qi_flush_iotlb;
680a7524 2512 printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
b4e0f9eb 2513 "invalidation\n",
680a7524 2514 iommu->seq_id,
b4e0f9eb 2515 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2516 }
2517 }
2518
19943b0e 2519 if (iommu_pass_through)
e0fc7e0b
DW
2520 iommu_identity_mapping |= IDENTMAP_ALL;
2521
d3f13810 2522#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
e0fc7e0b 2523 iommu_identity_mapping |= IDENTMAP_GFX;
19943b0e 2524#endif
e0fc7e0b
DW
2525
2526 check_tylersburg_isoch();
2527
ba395927 2528 /*
19943b0e
DW
2529 * If pass through is not set or not enabled, setup context entries for
2530 * identity mappings for rmrr, gfx, and isa and may fall back to static
2531 * identity mapping if iommu_identity_mapping is set.
ba395927 2532 */
19943b0e
DW
2533 if (iommu_identity_mapping) {
2534 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
4ed0d3e6 2535 if (ret) {
19943b0e
DW
2536 printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2537 goto error;
ba395927
KA
2538 }
2539 }
ba395927 2540 /*
19943b0e
DW
2541 * For each rmrr
2542 * for each dev attached to rmrr
2543 * do
2544 * locate drhd for dev, alloc domain for dev
2545 * allocate free domain
2546 * allocate page table entries for rmrr
2547 * if context not allocated for bus
2548 * allocate and init context
2549 * set present in root table for this bus
2550 * init context with domain, translation etc
2551 * endfor
2552 * endfor
ba395927 2553 */
19943b0e
DW
2554 printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2555 for_each_rmrr_units(rmrr) {
2556 for (i = 0; i < rmrr->devices_cnt; i++) {
2557 pdev = rmrr->devices[i];
2558 /*
2559 * some BIOS lists non-exist devices in DMAR
2560 * table.
2561 */
2562 if (!pdev)
2563 continue;
2564 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2565 if (ret)
2566 printk(KERN_ERR
2567 "IOMMU: mapping reserved region failed\n");
ba395927 2568 }
4ed0d3e6 2569 }
49a0429e 2570
19943b0e
DW
2571 iommu_prepare_isa();
2572
ba395927
KA
2573 /*
2574 * for each drhd
2575 * enable fault log
2576 * global invalidate context cache
2577 * global invalidate iotlb
2578 * enable translation
2579 */
2580 for_each_drhd_unit(drhd) {
51a63e67
JC
2581 if (drhd->ignored) {
2582 /*
2583 * we always have to disable PMRs or DMA may fail on
2584 * this device
2585 */
2586 if (force_on)
2587 iommu_disable_protect_mem_regions(drhd->iommu);
ba395927 2588 continue;
51a63e67 2589 }
ba395927 2590 iommu = drhd->iommu;
ba395927
KA
2591
2592 iommu_flush_write_buffer(iommu);
2593
3460a6d9
KA
2594 ret = dmar_set_interrupt(iommu);
2595 if (ret)
2596 goto error;
2597
ba395927
KA
2598 iommu_set_root_entry(iommu);
2599
4c25a2c1 2600 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 2601 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
f8bab735 2602
ba395927
KA
2603 ret = iommu_enable_translation(iommu);
2604 if (ret)
2605 goto error;
b94996c9
DW
2606
2607 iommu_disable_protect_mem_regions(iommu);
ba395927
KA
2608 }
2609
2610 return 0;
2611error:
2612 for_each_drhd_unit(drhd) {
2613 if (drhd->ignored)
2614 continue;
2615 iommu = drhd->iommu;
2616 free_iommu(iommu);
2617 }
d9630fe9 2618 kfree(g_iommus);
ba395927
KA
2619 return ret;
2620}
2621
5a5e02a6 2622/* This takes a number of _MM_ pages, not VTD pages */
875764de
DW
2623static struct iova *intel_alloc_iova(struct device *dev,
2624 struct dmar_domain *domain,
2625 unsigned long nrpages, uint64_t dma_mask)
ba395927 2626{
ba395927 2627 struct pci_dev *pdev = to_pci_dev(dev);
ba395927 2628 struct iova *iova = NULL;
ba395927 2629
875764de
DW
2630 /* Restrict dma_mask to the width that the iommu can handle */
2631 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2632
2633 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
2634 /*
2635 * First try to allocate an io virtual address in
284901a9 2636 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 2637 * from higher range
ba395927 2638 */
875764de
DW
2639 iova = alloc_iova(&domain->iovad, nrpages,
2640 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2641 if (iova)
2642 return iova;
2643 }
2644 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2645 if (unlikely(!iova)) {
2646 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2647 nrpages, pci_name(pdev));
f76aec76
KA
2648 return NULL;
2649 }
2650
2651 return iova;
2652}
2653
147202aa 2654static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)
f76aec76
KA
2655{
2656 struct dmar_domain *domain;
2657 int ret;
2658
2659 domain = get_domain_for_dev(pdev,
2660 DEFAULT_DOMAIN_ADDRESS_WIDTH);
2661 if (!domain) {
2662 printk(KERN_ERR
2663 "Allocating domain for %s failed", pci_name(pdev));
4fe05bbc 2664 return NULL;
ba395927
KA
2665 }
2666
2667 /* make sure context mapping is ok */
5331fe6f 2668 if (unlikely(!domain_context_mapped(pdev))) {
4ed0d3e6
FY
2669 ret = domain_context_mapping(domain, pdev,
2670 CONTEXT_TT_MULTI_LEVEL);
f76aec76
KA
2671 if (ret) {
2672 printk(KERN_ERR
2673 "Domain context map for %s failed",
2674 pci_name(pdev));
4fe05bbc 2675 return NULL;
f76aec76 2676 }
ba395927
KA
2677 }
2678
f76aec76
KA
2679 return domain;
2680}
2681
147202aa
DW
2682static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev)
2683{
2684 struct device_domain_info *info;
2685
2686 /* No lock here, assumes no domain exit in normal case */
2687 info = dev->dev.archdata.iommu;
2688 if (likely(info))
2689 return info->domain;
2690
2691 return __get_valid_domain_for_dev(dev);
2692}
2693
2c2e2c38
FY
2694static int iommu_dummy(struct pci_dev *pdev)
2695{
2696 return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2697}
2698
2699/* Check if the pdev needs to go through non-identity map and unmap process.*/
73676832 2700static int iommu_no_mapping(struct device *dev)
2c2e2c38 2701{
73676832 2702 struct pci_dev *pdev;
2c2e2c38
FY
2703 int found;
2704
73676832
DW
2705 if (unlikely(dev->bus != &pci_bus_type))
2706 return 1;
2707
2708 pdev = to_pci_dev(dev);
1e4c64c4
DW
2709 if (iommu_dummy(pdev))
2710 return 1;
2711
2c2e2c38 2712 if (!iommu_identity_mapping)
1e4c64c4 2713 return 0;
2c2e2c38
FY
2714
2715 found = identity_mapping(pdev);
2716 if (found) {
6941af28 2717 if (iommu_should_identity_map(pdev, 0))
2c2e2c38
FY
2718 return 1;
2719 else {
2720 /*
2721 * 32 bit DMA is removed from si_domain and fall back
2722 * to non-identity mapping.
2723 */
2724 domain_remove_one_dev_info(si_domain, pdev);
2725 printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2726 pci_name(pdev));
2727 return 0;
2728 }
2729 } else {
2730 /*
2731 * In case of a detached 64 bit DMA device from vm, the device
2732 * is put into si_domain for identity mapping.
2733 */
6941af28 2734 if (iommu_should_identity_map(pdev, 0)) {
2c2e2c38 2735 int ret;
5fe60f4e
DW
2736 ret = domain_add_dev_info(si_domain, pdev,
2737 hw_pass_through ?
2738 CONTEXT_TT_PASS_THROUGH :
2739 CONTEXT_TT_MULTI_LEVEL);
2c2e2c38
FY
2740 if (!ret) {
2741 printk(KERN_INFO "64bit %s uses identity mapping\n",
2742 pci_name(pdev));
2743 return 1;
2744 }
2745 }
2746 }
2747
1e4c64c4 2748 return 0;
2c2e2c38
FY
2749}
2750
bb9e6d65
FT
2751static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2752 size_t size, int dir, u64 dma_mask)
f76aec76
KA
2753{
2754 struct pci_dev *pdev = to_pci_dev(hwdev);
f76aec76 2755 struct dmar_domain *domain;
5b6985ce 2756 phys_addr_t start_paddr;
f76aec76
KA
2757 struct iova *iova;
2758 int prot = 0;
6865f0d1 2759 int ret;
8c11e798 2760 struct intel_iommu *iommu;
33041ec0 2761 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
2762
2763 BUG_ON(dir == DMA_NONE);
2c2e2c38 2764
73676832 2765 if (iommu_no_mapping(hwdev))
6865f0d1 2766 return paddr;
f76aec76
KA
2767
2768 domain = get_valid_domain_for_dev(pdev);
2769 if (!domain)
2770 return 0;
2771
8c11e798 2772 iommu = domain_get_iommu(domain);
88cb6a74 2773 size = aligned_nrpages(paddr, size);
f76aec76 2774
c681d0ba 2775 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask);
f76aec76
KA
2776 if (!iova)
2777 goto error;
2778
ba395927
KA
2779 /*
2780 * Check if DMAR supports zero-length reads on write only
2781 * mappings..
2782 */
2783 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 2784 !cap_zlr(iommu->cap))
ba395927
KA
2785 prot |= DMA_PTE_READ;
2786 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2787 prot |= DMA_PTE_WRITE;
2788 /*
6865f0d1 2789 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 2790 * page. Note: if two part of one page are separately mapped, we
6865f0d1 2791 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
2792 * is not a big problem
2793 */
0ab36de2 2794 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
33041ec0 2795 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
2796 if (ret)
2797 goto error;
2798
1f0ef2aa
DW
2799 /* it's a non-present to present mapping. Only flush if caching mode */
2800 if (cap_caching_mode(iommu->cap))
82653633 2801 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 1);
1f0ef2aa 2802 else
8c11e798 2803 iommu_flush_write_buffer(iommu);
f76aec76 2804
03d6a246
DW
2805 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2806 start_paddr += paddr & ~PAGE_MASK;
2807 return start_paddr;
ba395927 2808
ba395927 2809error:
f76aec76
KA
2810 if (iova)
2811 __free_iova(&domain->iovad, iova);
4cf2e75d 2812 printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
5b6985ce 2813 pci_name(pdev), size, (unsigned long long)paddr, dir);
ba395927
KA
2814 return 0;
2815}
2816
ffbbef5c
FT
2817static dma_addr_t intel_map_page(struct device *dev, struct page *page,
2818 unsigned long offset, size_t size,
2819 enum dma_data_direction dir,
2820 struct dma_attrs *attrs)
bb9e6d65 2821{
ffbbef5c
FT
2822 return __intel_map_single(dev, page_to_phys(page) + offset, size,
2823 dir, to_pci_dev(dev)->dma_mask);
bb9e6d65
FT
2824}
2825
5e0d2a6f 2826static void flush_unmaps(void)
2827{
80b20dd8 2828 int i, j;
5e0d2a6f 2829
5e0d2a6f 2830 timer_on = 0;
2831
2832 /* just flush them all */
2833 for (i = 0; i < g_num_of_iommus; i++) {
a2bb8459
WH
2834 struct intel_iommu *iommu = g_iommus[i];
2835 if (!iommu)
2836 continue;
c42d9f32 2837
9dd2fe89
YZ
2838 if (!deferred_flush[i].next)
2839 continue;
2840
78d5f0f5
NA
2841 /* In caching mode, global flushes turn emulation expensive */
2842 if (!cap_caching_mode(iommu->cap))
2843 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
93a23a72 2844 DMA_TLB_GLOBAL_FLUSH);
9dd2fe89 2845 for (j = 0; j < deferred_flush[i].next; j++) {
93a23a72
YZ
2846 unsigned long mask;
2847 struct iova *iova = deferred_flush[i].iova[j];
78d5f0f5
NA
2848 struct dmar_domain *domain = deferred_flush[i].domain[j];
2849
2850 /* On real hardware multiple invalidations are expensive */
2851 if (cap_caching_mode(iommu->cap))
2852 iommu_flush_iotlb_psi(iommu, domain->id,
2853 iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1, 0);
2854 else {
2855 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
2856 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
2857 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
2858 }
93a23a72 2859 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
80b20dd8 2860 }
9dd2fe89 2861 deferred_flush[i].next = 0;
5e0d2a6f 2862 }
2863
5e0d2a6f 2864 list_size = 0;
5e0d2a6f 2865}
2866
2867static void flush_unmaps_timeout(unsigned long data)
2868{
80b20dd8 2869 unsigned long flags;
2870
2871 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 2872 flush_unmaps();
80b20dd8 2873 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 2874}
2875
2876static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2877{
2878 unsigned long flags;
80b20dd8 2879 int next, iommu_id;
8c11e798 2880 struct intel_iommu *iommu;
5e0d2a6f 2881
2882 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 2883 if (list_size == HIGH_WATER_MARK)
2884 flush_unmaps();
2885
8c11e798
WH
2886 iommu = domain_get_iommu(dom);
2887 iommu_id = iommu->seq_id;
c42d9f32 2888
80b20dd8 2889 next = deferred_flush[iommu_id].next;
2890 deferred_flush[iommu_id].domain[next] = dom;
2891 deferred_flush[iommu_id].iova[next] = iova;
2892 deferred_flush[iommu_id].next++;
5e0d2a6f 2893
2894 if (!timer_on) {
2895 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2896 timer_on = 1;
2897 }
2898 list_size++;
2899 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2900}
2901
ffbbef5c
FT
2902static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2903 size_t size, enum dma_data_direction dir,
2904 struct dma_attrs *attrs)
ba395927 2905{
ba395927 2906 struct pci_dev *pdev = to_pci_dev(dev);
f76aec76 2907 struct dmar_domain *domain;
d794dc9b 2908 unsigned long start_pfn, last_pfn;
ba395927 2909 struct iova *iova;
8c11e798 2910 struct intel_iommu *iommu;
ba395927 2911
73676832 2912 if (iommu_no_mapping(dev))
f76aec76 2913 return;
2c2e2c38 2914
ba395927
KA
2915 domain = find_domain(pdev);
2916 BUG_ON(!domain);
2917
8c11e798
WH
2918 iommu = domain_get_iommu(domain);
2919
ba395927 2920 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
85b98276
DW
2921 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2922 (unsigned long long)dev_addr))
ba395927 2923 return;
ba395927 2924
d794dc9b
DW
2925 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2926 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
ba395927 2927
d794dc9b
DW
2928 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2929 pci_name(pdev), start_pfn, last_pfn);
ba395927 2930
f76aec76 2931 /* clear the whole page */
d794dc9b
DW
2932 dma_pte_clear_range(domain, start_pfn, last_pfn);
2933
f76aec76 2934 /* free page tables */
d794dc9b
DW
2935 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2936
5e0d2a6f 2937 if (intel_iommu_strict) {
03d6a246 2938 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
82653633 2939 last_pfn - start_pfn + 1, 0);
5e0d2a6f 2940 /* free iova */
2941 __free_iova(&domain->iovad, iova);
2942 } else {
2943 add_unmap(domain, iova);
2944 /*
2945 * queue up the release of the unmap to save the 1/6th of the
2946 * cpu used up by the iotlb flush operation...
2947 */
5e0d2a6f 2948 }
ba395927
KA
2949}
2950
d7ab5c46
FT
2951static void *intel_alloc_coherent(struct device *hwdev, size_t size,
2952 dma_addr_t *dma_handle, gfp_t flags)
ba395927
KA
2953{
2954 void *vaddr;
2955 int order;
2956
5b6985ce 2957 size = PAGE_ALIGN(size);
ba395927 2958 order = get_order(size);
e8bb910d
AW
2959
2960 if (!iommu_no_mapping(hwdev))
2961 flags &= ~(GFP_DMA | GFP_DMA32);
2962 else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) {
2963 if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32))
2964 flags |= GFP_DMA;
2965 else
2966 flags |= GFP_DMA32;
2967 }
ba395927
KA
2968
2969 vaddr = (void *)__get_free_pages(flags, order);
2970 if (!vaddr)
2971 return NULL;
2972 memset(vaddr, 0, size);
2973
bb9e6d65
FT
2974 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2975 DMA_BIDIRECTIONAL,
2976 hwdev->coherent_dma_mask);
ba395927
KA
2977 if (*dma_handle)
2978 return vaddr;
2979 free_pages((unsigned long)vaddr, order);
2980 return NULL;
2981}
2982
d7ab5c46
FT
2983static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2984 dma_addr_t dma_handle)
ba395927
KA
2985{
2986 int order;
2987
5b6985ce 2988 size = PAGE_ALIGN(size);
ba395927
KA
2989 order = get_order(size);
2990
0db9b7ae 2991 intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
ba395927
KA
2992 free_pages((unsigned long)vaddr, order);
2993}
2994
d7ab5c46
FT
2995static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2996 int nelems, enum dma_data_direction dir,
2997 struct dma_attrs *attrs)
ba395927 2998{
ba395927
KA
2999 struct pci_dev *pdev = to_pci_dev(hwdev);
3000 struct dmar_domain *domain;
d794dc9b 3001 unsigned long start_pfn, last_pfn;
f76aec76 3002 struct iova *iova;
8c11e798 3003 struct intel_iommu *iommu;
ba395927 3004
73676832 3005 if (iommu_no_mapping(hwdev))
ba395927
KA
3006 return;
3007
3008 domain = find_domain(pdev);
8c11e798
WH
3009 BUG_ON(!domain);
3010
3011 iommu = domain_get_iommu(domain);
ba395927 3012
c03ab37c 3013 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
85b98276
DW
3014 if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
3015 (unsigned long long)sglist[0].dma_address))
f76aec76 3016 return;
f76aec76 3017
d794dc9b
DW
3018 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3019 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
f76aec76
KA
3020
3021 /* clear the whole page */
d794dc9b
DW
3022 dma_pte_clear_range(domain, start_pfn, last_pfn);
3023
f76aec76 3024 /* free page tables */
d794dc9b 3025 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
f76aec76 3026
acea0018
DW
3027 if (intel_iommu_strict) {
3028 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
82653633 3029 last_pfn - start_pfn + 1, 0);
acea0018
DW
3030 /* free iova */
3031 __free_iova(&domain->iovad, iova);
3032 } else {
3033 add_unmap(domain, iova);
3034 /*
3035 * queue up the release of the unmap to save the 1/6th of the
3036 * cpu used up by the iotlb flush operation...
3037 */
3038 }
ba395927
KA
3039}
3040
ba395927 3041static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 3042 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
3043{
3044 int i;
c03ab37c 3045 struct scatterlist *sg;
ba395927 3046
c03ab37c 3047 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 3048 BUG_ON(!sg_page(sg));
4cf2e75d 3049 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
c03ab37c 3050 sg->dma_length = sg->length;
ba395927
KA
3051 }
3052 return nelems;
3053}
3054
d7ab5c46
FT
3055static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
3056 enum dma_data_direction dir, struct dma_attrs *attrs)
ba395927 3057{
ba395927 3058 int i;
ba395927
KA
3059 struct pci_dev *pdev = to_pci_dev(hwdev);
3060 struct dmar_domain *domain;
f76aec76
KA
3061 size_t size = 0;
3062 int prot = 0;
f76aec76
KA
3063 struct iova *iova = NULL;
3064 int ret;
c03ab37c 3065 struct scatterlist *sg;
b536d24d 3066 unsigned long start_vpfn;
8c11e798 3067 struct intel_iommu *iommu;
ba395927
KA
3068
3069 BUG_ON(dir == DMA_NONE);
73676832 3070 if (iommu_no_mapping(hwdev))
c03ab37c 3071 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
ba395927 3072
f76aec76
KA
3073 domain = get_valid_domain_for_dev(pdev);
3074 if (!domain)
3075 return 0;
3076
8c11e798
WH
3077 iommu = domain_get_iommu(domain);
3078
b536d24d 3079 for_each_sg(sglist, sg, nelems, i)
88cb6a74 3080 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 3081
5a5e02a6
DW
3082 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
3083 pdev->dma_mask);
f76aec76 3084 if (!iova) {
c03ab37c 3085 sglist->dma_length = 0;
f76aec76
KA
3086 return 0;
3087 }
3088
3089 /*
3090 * Check if DMAR supports zero-length reads on write only
3091 * mappings..
3092 */
3093 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3094 !cap_zlr(iommu->cap))
f76aec76
KA
3095 prot |= DMA_PTE_READ;
3096 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3097 prot |= DMA_PTE_WRITE;
3098
b536d24d 3099 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
e1605495 3100
f532959b 3101 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495
DW
3102 if (unlikely(ret)) {
3103 /* clear the page */
3104 dma_pte_clear_range(domain, start_vpfn,
3105 start_vpfn + size - 1);
3106 /* free page tables */
3107 dma_pte_free_pagetable(domain, start_vpfn,
3108 start_vpfn + size - 1);
3109 /* free iova */
3110 __free_iova(&domain->iovad, iova);
3111 return 0;
ba395927
KA
3112 }
3113
1f0ef2aa
DW
3114 /* it's a non-present to present mapping. Only flush if caching mode */
3115 if (cap_caching_mode(iommu->cap))
82653633 3116 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 1);
1f0ef2aa 3117 else
8c11e798 3118 iommu_flush_write_buffer(iommu);
1f0ef2aa 3119
ba395927
KA
3120 return nelems;
3121}
3122
dfb805e8
FT
3123static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3124{
3125 return !dma_addr;
3126}
3127
160c1d8e 3128struct dma_map_ops intel_dma_ops = {
ba395927
KA
3129 .alloc_coherent = intel_alloc_coherent,
3130 .free_coherent = intel_free_coherent,
ba395927
KA
3131 .map_sg = intel_map_sg,
3132 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
3133 .map_page = intel_map_page,
3134 .unmap_page = intel_unmap_page,
dfb805e8 3135 .mapping_error = intel_mapping_error,
ba395927
KA
3136};
3137
3138static inline int iommu_domain_cache_init(void)
3139{
3140 int ret = 0;
3141
3142 iommu_domain_cache = kmem_cache_create("iommu_domain",
3143 sizeof(struct dmar_domain),
3144 0,
3145 SLAB_HWCACHE_ALIGN,
3146
3147 NULL);
3148 if (!iommu_domain_cache) {
3149 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3150 ret = -ENOMEM;
3151 }
3152
3153 return ret;
3154}
3155
3156static inline int iommu_devinfo_cache_init(void)
3157{
3158 int ret = 0;
3159
3160 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3161 sizeof(struct device_domain_info),
3162 0,
3163 SLAB_HWCACHE_ALIGN,
ba395927
KA
3164 NULL);
3165 if (!iommu_devinfo_cache) {
3166 printk(KERN_ERR "Couldn't create devinfo cache\n");
3167 ret = -ENOMEM;
3168 }
3169
3170 return ret;
3171}
3172
3173static inline int iommu_iova_cache_init(void)
3174{
3175 int ret = 0;
3176
3177 iommu_iova_cache = kmem_cache_create("iommu_iova",
3178 sizeof(struct iova),
3179 0,
3180 SLAB_HWCACHE_ALIGN,
ba395927
KA
3181 NULL);
3182 if (!iommu_iova_cache) {
3183 printk(KERN_ERR "Couldn't create iova cache\n");
3184 ret = -ENOMEM;
3185 }
3186
3187 return ret;
3188}
3189
3190static int __init iommu_init_mempool(void)
3191{
3192 int ret;
3193 ret = iommu_iova_cache_init();
3194 if (ret)
3195 return ret;
3196
3197 ret = iommu_domain_cache_init();
3198 if (ret)
3199 goto domain_error;
3200
3201 ret = iommu_devinfo_cache_init();
3202 if (!ret)
3203 return ret;
3204
3205 kmem_cache_destroy(iommu_domain_cache);
3206domain_error:
3207 kmem_cache_destroy(iommu_iova_cache);
3208
3209 return -ENOMEM;
3210}
3211
3212static void __init iommu_exit_mempool(void)
3213{
3214 kmem_cache_destroy(iommu_devinfo_cache);
3215 kmem_cache_destroy(iommu_domain_cache);
3216 kmem_cache_destroy(iommu_iova_cache);
3217
3218}
3219
556ab45f
DW
3220static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3221{
3222 struct dmar_drhd_unit *drhd;
3223 u32 vtbar;
3224 int rc;
3225
3226 /* We know that this device on this chipset has its own IOMMU.
3227 * If we find it under a different IOMMU, then the BIOS is lying
3228 * to us. Hope that the IOMMU for this device is actually
3229 * disabled, and it needs no translation...
3230 */
3231 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3232 if (rc) {
3233 /* "can't" happen */
3234 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3235 return;
3236 }
3237 vtbar &= 0xffff0000;
3238
3239 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3240 drhd = dmar_find_matched_drhd_unit(pdev);
3241 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3242 TAINT_FIRMWARE_WORKAROUND,
3243 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3244 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3245}
3246DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3247
ba395927
KA
3248static void __init init_no_remapping_devices(void)
3249{
3250 struct dmar_drhd_unit *drhd;
3251
3252 for_each_drhd_unit(drhd) {
3253 if (!drhd->include_all) {
3254 int i;
3255 for (i = 0; i < drhd->devices_cnt; i++)
3256 if (drhd->devices[i] != NULL)
3257 break;
3258 /* ignore DMAR unit if no pci devices exist */
3259 if (i == drhd->devices_cnt)
3260 drhd->ignored = 1;
3261 }
3262 }
3263
ba395927
KA
3264 for_each_drhd_unit(drhd) {
3265 int i;
3266 if (drhd->ignored || drhd->include_all)
3267 continue;
3268
3269 for (i = 0; i < drhd->devices_cnt; i++)
3270 if (drhd->devices[i] &&
c0771df8 3271 !IS_GFX_DEVICE(drhd->devices[i]))
ba395927
KA
3272 break;
3273
3274 if (i < drhd->devices_cnt)
3275 continue;
3276
c0771df8
DW
3277 /* This IOMMU has *only* gfx devices. Either bypass it or
3278 set the gfx_mapped flag, as appropriate */
3279 if (dmar_map_gfx) {
3280 intel_iommu_gfx_mapped = 1;
3281 } else {
3282 drhd->ignored = 1;
3283 for (i = 0; i < drhd->devices_cnt; i++) {
3284 if (!drhd->devices[i])
3285 continue;
3286 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3287 }
ba395927
KA
3288 }
3289 }
3290}
3291
f59c7b69
FY
3292#ifdef CONFIG_SUSPEND
3293static int init_iommu_hw(void)
3294{
3295 struct dmar_drhd_unit *drhd;
3296 struct intel_iommu *iommu = NULL;
3297
3298 for_each_active_iommu(iommu, drhd)
3299 if (iommu->qi)
3300 dmar_reenable_qi(iommu);
3301
b779260b
JC
3302 for_each_iommu(iommu, drhd) {
3303 if (drhd->ignored) {
3304 /*
3305 * we always have to disable PMRs or DMA may fail on
3306 * this device
3307 */
3308 if (force_on)
3309 iommu_disable_protect_mem_regions(iommu);
3310 continue;
3311 }
3312
f59c7b69
FY
3313 iommu_flush_write_buffer(iommu);
3314
3315 iommu_set_root_entry(iommu);
3316
3317 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3318 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3319 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3320 DMA_TLB_GLOBAL_FLUSH);
b779260b
JC
3321 if (iommu_enable_translation(iommu))
3322 return 1;
b94996c9 3323 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
3324 }
3325
3326 return 0;
3327}
3328
3329static void iommu_flush_all(void)
3330{
3331 struct dmar_drhd_unit *drhd;
3332 struct intel_iommu *iommu;
3333
3334 for_each_active_iommu(iommu, drhd) {
3335 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3336 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3337 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3338 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
3339 }
3340}
3341
134fac3f 3342static int iommu_suspend(void)
f59c7b69
FY
3343{
3344 struct dmar_drhd_unit *drhd;
3345 struct intel_iommu *iommu = NULL;
3346 unsigned long flag;
3347
3348 for_each_active_iommu(iommu, drhd) {
3349 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3350 GFP_ATOMIC);
3351 if (!iommu->iommu_state)
3352 goto nomem;
3353 }
3354
3355 iommu_flush_all();
3356
3357 for_each_active_iommu(iommu, drhd) {
3358 iommu_disable_translation(iommu);
3359
1f5b3c3f 3360 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
3361
3362 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3363 readl(iommu->reg + DMAR_FECTL_REG);
3364 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3365 readl(iommu->reg + DMAR_FEDATA_REG);
3366 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3367 readl(iommu->reg + DMAR_FEADDR_REG);
3368 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3369 readl(iommu->reg + DMAR_FEUADDR_REG);
3370
1f5b3c3f 3371 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
3372 }
3373 return 0;
3374
3375nomem:
3376 for_each_active_iommu(iommu, drhd)
3377 kfree(iommu->iommu_state);
3378
3379 return -ENOMEM;
3380}
3381
134fac3f 3382static void iommu_resume(void)
f59c7b69
FY
3383{
3384 struct dmar_drhd_unit *drhd;
3385 struct intel_iommu *iommu = NULL;
3386 unsigned long flag;
3387
3388 if (init_iommu_hw()) {
b779260b
JC
3389 if (force_on)
3390 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3391 else
3392 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 3393 return;
f59c7b69
FY
3394 }
3395
3396 for_each_active_iommu(iommu, drhd) {
3397
1f5b3c3f 3398 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
3399
3400 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3401 iommu->reg + DMAR_FECTL_REG);
3402 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3403 iommu->reg + DMAR_FEDATA_REG);
3404 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3405 iommu->reg + DMAR_FEADDR_REG);
3406 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3407 iommu->reg + DMAR_FEUADDR_REG);
3408
1f5b3c3f 3409 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
3410 }
3411
3412 for_each_active_iommu(iommu, drhd)
3413 kfree(iommu->iommu_state);
f59c7b69
FY
3414}
3415
134fac3f 3416static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
3417 .resume = iommu_resume,
3418 .suspend = iommu_suspend,
3419};
3420
134fac3f 3421static void __init init_iommu_pm_ops(void)
f59c7b69 3422{
134fac3f 3423 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
3424}
3425
3426#else
99592ba4 3427static inline void init_iommu_pm_ops(void) {}
f59c7b69
FY
3428#endif /* CONFIG_PM */
3429
318fe7df
SS
3430LIST_HEAD(dmar_rmrr_units);
3431
3432static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
3433{
3434 list_add(&rmrr->list, &dmar_rmrr_units);
3435}
3436
3437
3438int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
3439{
3440 struct acpi_dmar_reserved_memory *rmrr;
3441 struct dmar_rmrr_unit *rmrru;
3442
3443 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3444 if (!rmrru)
3445 return -ENOMEM;
3446
3447 rmrru->hdr = header;
3448 rmrr = (struct acpi_dmar_reserved_memory *)header;
3449 rmrru->base_address = rmrr->base_address;
3450 rmrru->end_address = rmrr->end_address;
3451
3452 dmar_register_rmrr_unit(rmrru);
3453 return 0;
3454}
3455
3456static int __init
3457rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
3458{
3459 struct acpi_dmar_reserved_memory *rmrr;
3460 int ret;
3461
3462 rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
3463 ret = dmar_parse_dev_scope((void *)(rmrr + 1),
3464 ((void *)rmrr) + rmrr->header.length,
3465 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
3466
3467 if (ret || (rmrru->devices_cnt == 0)) {
3468 list_del(&rmrru->list);
3469 kfree(rmrru);
3470 }
3471 return ret;
3472}
3473
3474static LIST_HEAD(dmar_atsr_units);
3475
3476int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
3477{
3478 struct acpi_dmar_atsr *atsr;
3479 struct dmar_atsr_unit *atsru;
3480
3481 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3482 atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
3483 if (!atsru)
3484 return -ENOMEM;
3485
3486 atsru->hdr = hdr;
3487 atsru->include_all = atsr->flags & 0x1;
3488
3489 list_add(&atsru->list, &dmar_atsr_units);
3490
3491 return 0;
3492}
3493
3494static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
3495{
3496 int rc;
3497 struct acpi_dmar_atsr *atsr;
3498
3499 if (atsru->include_all)
3500 return 0;
3501
3502 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3503 rc = dmar_parse_dev_scope((void *)(atsr + 1),
3504 (void *)atsr + atsr->header.length,
3505 &atsru->devices_cnt, &atsru->devices,
3506 atsr->segment);
3507 if (rc || !atsru->devices_cnt) {
3508 list_del(&atsru->list);
3509 kfree(atsru);
3510 }
3511
3512 return rc;
3513}
3514
3515int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3516{
3517 int i;
3518 struct pci_bus *bus;
3519 struct acpi_dmar_atsr *atsr;
3520 struct dmar_atsr_unit *atsru;
3521
3522 dev = pci_physfn(dev);
3523
3524 list_for_each_entry(atsru, &dmar_atsr_units, list) {
3525 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3526 if (atsr->segment == pci_domain_nr(dev->bus))
3527 goto found;
3528 }
3529
3530 return 0;
3531
3532found:
3533 for (bus = dev->bus; bus; bus = bus->parent) {
3534 struct pci_dev *bridge = bus->self;
3535
3536 if (!bridge || !pci_is_pcie(bridge) ||
3537 bridge->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
3538 return 0;
3539
3540 if (bridge->pcie_type == PCI_EXP_TYPE_ROOT_PORT) {
3541 for (i = 0; i < atsru->devices_cnt; i++)
3542 if (atsru->devices[i] == bridge)
3543 return 1;
3544 break;
3545 }
3546 }
3547
3548 if (atsru->include_all)
3549 return 1;
3550
3551 return 0;
3552}
3553
c8f369ab 3554int __init dmar_parse_rmrr_atsr_dev(void)
318fe7df
SS
3555{
3556 struct dmar_rmrr_unit *rmrr, *rmrr_n;
3557 struct dmar_atsr_unit *atsr, *atsr_n;
3558 int ret = 0;
3559
3560 list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
3561 ret = rmrr_parse_dev(rmrr);
3562 if (ret)
3563 return ret;
3564 }
3565
3566 list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) {
3567 ret = atsr_parse_dev(atsr);
3568 if (ret)
3569 return ret;
3570 }
3571
3572 return ret;
3573}
3574
99dcaded
FY
3575/*
3576 * Here we only respond to action of unbound device from driver.
3577 *
3578 * Added device is not attached to its DMAR domain here yet. That will happen
3579 * when mapping the device to iova.
3580 */
3581static int device_notifier(struct notifier_block *nb,
3582 unsigned long action, void *data)
3583{
3584 struct device *dev = data;
3585 struct pci_dev *pdev = to_pci_dev(dev);
3586 struct dmar_domain *domain;
3587
44cd613c
DW
3588 if (iommu_no_mapping(dev))
3589 return 0;
3590
99dcaded
FY
3591 domain = find_domain(pdev);
3592 if (!domain)
3593 return 0;
3594
a97590e5 3595 if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through) {
99dcaded
FY
3596 domain_remove_one_dev_info(domain, pdev);
3597
a97590e5
AW
3598 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3599 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
3600 list_empty(&domain->devices))
3601 domain_exit(domain);
3602 }
3603
99dcaded
FY
3604 return 0;
3605}
3606
3607static struct notifier_block device_nb = {
3608 .notifier_call = device_notifier,
3609};
3610
ba395927
KA
3611int __init intel_iommu_init(void)
3612{
3613 int ret = 0;
3614
a59b50e9
JC
3615 /* VT-d is required for a TXT/tboot launch, so enforce that */
3616 force_on = tboot_force_iommu();
3617
3618 if (dmar_table_init()) {
3619 if (force_on)
3620 panic("tboot: Failed to initialize DMAR table\n");
ba395927 3621 return -ENODEV;
a59b50e9 3622 }
ba395927 3623
c2c7286a 3624 if (dmar_dev_scope_init() < 0) {
a59b50e9
JC
3625 if (force_on)
3626 panic("tboot: Failed to initialize DMAR device scope\n");
1886e8a9 3627 return -ENODEV;
a59b50e9 3628 }
1886e8a9 3629
75f1cdf1 3630 if (no_iommu || dmar_disabled)
2ae21010
SS
3631 return -ENODEV;
3632
51a63e67
JC
3633 if (iommu_init_mempool()) {
3634 if (force_on)
3635 panic("tboot: Failed to initialize iommu memory\n");
3636 return -ENODEV;
3637 }
3638
318fe7df
SS
3639 if (list_empty(&dmar_rmrr_units))
3640 printk(KERN_INFO "DMAR: No RMRR found\n");
3641
3642 if (list_empty(&dmar_atsr_units))
3643 printk(KERN_INFO "DMAR: No ATSR found\n");
3644
51a63e67
JC
3645 if (dmar_init_reserved_ranges()) {
3646 if (force_on)
3647 panic("tboot: Failed to reserve iommu ranges\n");
3648 return -ENODEV;
3649 }
ba395927
KA
3650
3651 init_no_remapping_devices();
3652
b779260b 3653 ret = init_dmars();
ba395927 3654 if (ret) {
a59b50e9
JC
3655 if (force_on)
3656 panic("tboot: Failed to initialize DMARs\n");
ba395927
KA
3657 printk(KERN_ERR "IOMMU: dmar init failed\n");
3658 put_iova_domain(&reserved_iova_list);
3659 iommu_exit_mempool();
3660 return ret;
3661 }
3662 printk(KERN_INFO
3663 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
3664
5e0d2a6f 3665 init_timer(&unmap_timer);
75f1cdf1
FT
3666#ifdef CONFIG_SWIOTLB
3667 swiotlb = 0;
3668#endif
19943b0e 3669 dma_ops = &intel_dma_ops;
4ed0d3e6 3670
134fac3f 3671 init_iommu_pm_ops();
a8bcbb0d 3672
4236d97d 3673 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
a8bcbb0d 3674
99dcaded
FY
3675 bus_register_notifier(&pci_bus_type, &device_nb);
3676
8bc1f85c
ED
3677 intel_iommu_enabled = 1;
3678
ba395927
KA
3679 return 0;
3680}
e820482c 3681
3199aa6b
HW
3682static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
3683 struct pci_dev *pdev)
3684{
3685 struct pci_dev *tmp, *parent;
3686
3687 if (!iommu || !pdev)
3688 return;
3689
3690 /* dependent device detach */
3691 tmp = pci_find_upstream_pcie_bridge(pdev);
3692 /* Secondary interface's bus number and devfn 0 */
3693 if (tmp) {
3694 parent = pdev->bus->self;
3695 while (parent != tmp) {
3696 iommu_detach_dev(iommu, parent->bus->number,
276dbf99 3697 parent->devfn);
3199aa6b
HW
3698 parent = parent->bus->self;
3699 }
45e829ea 3700 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
3199aa6b
HW
3701 iommu_detach_dev(iommu,
3702 tmp->subordinate->number, 0);
3703 else /* this is a legacy PCI bridge */
276dbf99
DW
3704 iommu_detach_dev(iommu, tmp->bus->number,
3705 tmp->devfn);
3199aa6b
HW
3706 }
3707}
3708
2c2e2c38 3709static void domain_remove_one_dev_info(struct dmar_domain *domain,
c7151a8d
WH
3710 struct pci_dev *pdev)
3711{
3712 struct device_domain_info *info;
3713 struct intel_iommu *iommu;
3714 unsigned long flags;
3715 int found = 0;
3716 struct list_head *entry, *tmp;
3717
276dbf99
DW
3718 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3719 pdev->devfn);
c7151a8d
WH
3720 if (!iommu)
3721 return;
3722
3723 spin_lock_irqsave(&device_domain_lock, flags);
3724 list_for_each_safe(entry, tmp, &domain->devices) {
3725 info = list_entry(entry, struct device_domain_info, link);
8519dc44
MH
3726 if (info->segment == pci_domain_nr(pdev->bus) &&
3727 info->bus == pdev->bus->number &&
c7151a8d
WH
3728 info->devfn == pdev->devfn) {
3729 list_del(&info->link);
3730 list_del(&info->global);
3731 if (info->dev)
3732 info->dev->dev.archdata.iommu = NULL;
3733 spin_unlock_irqrestore(&device_domain_lock, flags);
3734
93a23a72 3735 iommu_disable_dev_iotlb(info);
c7151a8d 3736 iommu_detach_dev(iommu, info->bus, info->devfn);
3199aa6b 3737 iommu_detach_dependent_devices(iommu, pdev);
c7151a8d
WH
3738 free_devinfo_mem(info);
3739
3740 spin_lock_irqsave(&device_domain_lock, flags);
3741
3742 if (found)
3743 break;
3744 else
3745 continue;
3746 }
3747
3748 /* if there is no other devices under the same iommu
3749 * owned by this domain, clear this iommu in iommu_bmp
3750 * update iommu count and coherency
3751 */
276dbf99
DW
3752 if (iommu == device_to_iommu(info->segment, info->bus,
3753 info->devfn))
c7151a8d
WH
3754 found = 1;
3755 }
3756
3e7abe25
RD
3757 spin_unlock_irqrestore(&device_domain_lock, flags);
3758
c7151a8d
WH
3759 if (found == 0) {
3760 unsigned long tmp_flags;
3761 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
1b198bb0 3762 clear_bit(iommu->seq_id, domain->iommu_bmp);
c7151a8d 3763 domain->iommu_count--;
58c610bd 3764 domain_update_iommu_cap(domain);
c7151a8d 3765 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
a97590e5 3766
9b4554b2
AW
3767 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3768 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
3769 spin_lock_irqsave(&iommu->lock, tmp_flags);
3770 clear_bit(domain->id, iommu->domain_ids);
3771 iommu->domains[domain->id] = NULL;
3772 spin_unlock_irqrestore(&iommu->lock, tmp_flags);
3773 }
c7151a8d 3774 }
c7151a8d
WH
3775}
3776
3777static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
3778{
3779 struct device_domain_info *info;
3780 struct intel_iommu *iommu;
3781 unsigned long flags1, flags2;
3782
3783 spin_lock_irqsave(&device_domain_lock, flags1);
3784 while (!list_empty(&domain->devices)) {
3785 info = list_entry(domain->devices.next,
3786 struct device_domain_info, link);
3787 list_del(&info->link);
3788 list_del(&info->global);
3789 if (info->dev)
3790 info->dev->dev.archdata.iommu = NULL;
3791
3792 spin_unlock_irqrestore(&device_domain_lock, flags1);
3793
93a23a72 3794 iommu_disable_dev_iotlb(info);
276dbf99 3795 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
c7151a8d 3796 iommu_detach_dev(iommu, info->bus, info->devfn);
3199aa6b 3797 iommu_detach_dependent_devices(iommu, info->dev);
c7151a8d
WH
3798
3799 /* clear this iommu in iommu_bmp, update iommu count
58c610bd 3800 * and capabilities
c7151a8d
WH
3801 */
3802 spin_lock_irqsave(&domain->iommu_lock, flags2);
3803 if (test_and_clear_bit(iommu->seq_id,
1b198bb0 3804 domain->iommu_bmp)) {
c7151a8d 3805 domain->iommu_count--;
58c610bd 3806 domain_update_iommu_cap(domain);
c7151a8d
WH
3807 }
3808 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
3809
3810 free_devinfo_mem(info);
3811 spin_lock_irqsave(&device_domain_lock, flags1);
3812 }
3813 spin_unlock_irqrestore(&device_domain_lock, flags1);
3814}
3815
5e98c4b1
WH
3816/* domain id for virtual machine, it won't be set in context */
3817static unsigned long vm_domid;
3818
3819static struct dmar_domain *iommu_alloc_vm_domain(void)
3820{
3821 struct dmar_domain *domain;
3822
3823 domain = alloc_domain_mem();
3824 if (!domain)
3825 return NULL;
3826
3827 domain->id = vm_domid++;
4c923d47 3828 domain->nid = -1;
1b198bb0 3829 memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp));
5e98c4b1
WH
3830 domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
3831
3832 return domain;
3833}
3834
2c2e2c38 3835static int md_domain_init(struct dmar_domain *domain, int guest_width)
5e98c4b1
WH
3836{
3837 int adjust_width;
3838
3839 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
5e98c4b1
WH
3840 spin_lock_init(&domain->iommu_lock);
3841
3842 domain_reserve_special_ranges(domain);
3843
3844 /* calculate AGAW */
3845 domain->gaw = guest_width;
3846 adjust_width = guestwidth_to_adjustwidth(guest_width);
3847 domain->agaw = width_to_agaw(adjust_width);
3848
3849 INIT_LIST_HEAD(&domain->devices);
3850
3851 domain->iommu_count = 0;
3852 domain->iommu_coherency = 0;
c5b15255 3853 domain->iommu_snooping = 0;
6dd9a7c7 3854 domain->iommu_superpage = 0;
fe40f1e0 3855 domain->max_addr = 0;
4c923d47 3856 domain->nid = -1;
5e98c4b1
WH
3857
3858 /* always allocate the top pgd */
4c923d47 3859 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5e98c4b1
WH
3860 if (!domain->pgd)
3861 return -ENOMEM;
3862 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
3863 return 0;
3864}
3865
3866static void iommu_free_vm_domain(struct dmar_domain *domain)
3867{
3868 unsigned long flags;
3869 struct dmar_drhd_unit *drhd;
3870 struct intel_iommu *iommu;
3871 unsigned long i;
3872 unsigned long ndomains;
3873
3874 for_each_drhd_unit(drhd) {
3875 if (drhd->ignored)
3876 continue;
3877 iommu = drhd->iommu;
3878
3879 ndomains = cap_ndoms(iommu->cap);
a45946ab 3880 for_each_set_bit(i, iommu->domain_ids, ndomains) {
5e98c4b1
WH
3881 if (iommu->domains[i] == domain) {
3882 spin_lock_irqsave(&iommu->lock, flags);
3883 clear_bit(i, iommu->domain_ids);
3884 iommu->domains[i] = NULL;
3885 spin_unlock_irqrestore(&iommu->lock, flags);
3886 break;
3887 }
5e98c4b1
WH
3888 }
3889 }
3890}
3891
3892static void vm_domain_exit(struct dmar_domain *domain)
3893{
5e98c4b1
WH
3894 /* Domain 0 is reserved, so dont process it */
3895 if (!domain)
3896 return;
3897
3898 vm_domain_remove_all_dev_info(domain);
3899 /* destroy iovas */
3900 put_iova_domain(&domain->iovad);
5e98c4b1
WH
3901
3902 /* clear ptes */
595badf5 3903 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
5e98c4b1
WH
3904
3905 /* free page tables */
d794dc9b 3906 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
5e98c4b1
WH
3907
3908 iommu_free_vm_domain(domain);
3909 free_domain_mem(domain);
3910}
3911
5d450806 3912static int intel_iommu_domain_init(struct iommu_domain *domain)
38717946 3913{
5d450806 3914 struct dmar_domain *dmar_domain;
38717946 3915
5d450806
JR
3916 dmar_domain = iommu_alloc_vm_domain();
3917 if (!dmar_domain) {
38717946 3918 printk(KERN_ERR
5d450806
JR
3919 "intel_iommu_domain_init: dmar_domain == NULL\n");
3920 return -ENOMEM;
38717946 3921 }
2c2e2c38 3922 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
38717946 3923 printk(KERN_ERR
5d450806
JR
3924 "intel_iommu_domain_init() failed\n");
3925 vm_domain_exit(dmar_domain);
3926 return -ENOMEM;
38717946 3927 }
8140a95d 3928 domain_update_iommu_cap(dmar_domain);
5d450806 3929 domain->priv = dmar_domain;
faa3d6f5 3930
5d450806 3931 return 0;
38717946 3932}
38717946 3933
5d450806 3934static void intel_iommu_domain_destroy(struct iommu_domain *domain)
38717946 3935{
5d450806
JR
3936 struct dmar_domain *dmar_domain = domain->priv;
3937
3938 domain->priv = NULL;
3939 vm_domain_exit(dmar_domain);
38717946 3940}
38717946 3941
4c5478c9
JR
3942static int intel_iommu_attach_device(struct iommu_domain *domain,
3943 struct device *dev)
38717946 3944{
4c5478c9
JR
3945 struct dmar_domain *dmar_domain = domain->priv;
3946 struct pci_dev *pdev = to_pci_dev(dev);
fe40f1e0
WH
3947 struct intel_iommu *iommu;
3948 int addr_width;
faa3d6f5
WH
3949
3950 /* normally pdev is not mapped */
3951 if (unlikely(domain_context_mapped(pdev))) {
3952 struct dmar_domain *old_domain;
3953
3954 old_domain = find_domain(pdev);
3955 if (old_domain) {
2c2e2c38
FY
3956 if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
3957 dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
3958 domain_remove_one_dev_info(old_domain, pdev);
faa3d6f5
WH
3959 else
3960 domain_remove_dev_info(old_domain);
3961 }
3962 }
3963
276dbf99
DW
3964 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3965 pdev->devfn);
fe40f1e0
WH
3966 if (!iommu)
3967 return -ENODEV;
3968
3969 /* check if this iommu agaw is sufficient for max mapped address */
3970 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
3971 if (addr_width > cap_mgaw(iommu->cap))
3972 addr_width = cap_mgaw(iommu->cap);
3973
3974 if (dmar_domain->max_addr > (1LL << addr_width)) {
3975 printk(KERN_ERR "%s: iommu width (%d) is not "
fe40f1e0 3976 "sufficient for the mapped address (%llx)\n",
a99c47a2 3977 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
3978 return -EFAULT;
3979 }
a99c47a2
TL
3980 dmar_domain->gaw = addr_width;
3981
3982 /*
3983 * Knock out extra levels of page tables if necessary
3984 */
3985 while (iommu->agaw < dmar_domain->agaw) {
3986 struct dma_pte *pte;
3987
3988 pte = dmar_domain->pgd;
3989 if (dma_pte_present(pte)) {
25cbff16
SY
3990 dmar_domain->pgd = (struct dma_pte *)
3991 phys_to_virt(dma_pte_addr(pte));
7a661013 3992 free_pgtable_page(pte);
a99c47a2
TL
3993 }
3994 dmar_domain->agaw--;
3995 }
fe40f1e0 3996
5fe60f4e 3997 return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
38717946 3998}
38717946 3999
4c5478c9
JR
4000static void intel_iommu_detach_device(struct iommu_domain *domain,
4001 struct device *dev)
38717946 4002{
4c5478c9
JR
4003 struct dmar_domain *dmar_domain = domain->priv;
4004 struct pci_dev *pdev = to_pci_dev(dev);
4005
2c2e2c38 4006 domain_remove_one_dev_info(dmar_domain, pdev);
faa3d6f5 4007}
c7151a8d 4008
b146a1c9
JR
4009static int intel_iommu_map(struct iommu_domain *domain,
4010 unsigned long iova, phys_addr_t hpa,
5009065d 4011 size_t size, int iommu_prot)
faa3d6f5 4012{
dde57a21 4013 struct dmar_domain *dmar_domain = domain->priv;
fe40f1e0 4014 u64 max_addr;
dde57a21 4015 int prot = 0;
faa3d6f5 4016 int ret;
fe40f1e0 4017
dde57a21
JR
4018 if (iommu_prot & IOMMU_READ)
4019 prot |= DMA_PTE_READ;
4020 if (iommu_prot & IOMMU_WRITE)
4021 prot |= DMA_PTE_WRITE;
9cf06697
SY
4022 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4023 prot |= DMA_PTE_SNP;
dde57a21 4024
163cc52c 4025 max_addr = iova + size;
dde57a21 4026 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
4027 u64 end;
4028
4029 /* check if minimum agaw is sufficient for mapped address */
8954da1f 4030 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 4031 if (end < max_addr) {
8954da1f 4032 printk(KERN_ERR "%s: iommu width (%d) is not "
fe40f1e0 4033 "sufficient for the mapped address (%llx)\n",
8954da1f 4034 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
4035 return -EFAULT;
4036 }
dde57a21 4037 dmar_domain->max_addr = max_addr;
fe40f1e0 4038 }
ad051221
DW
4039 /* Round up size to next multiple of PAGE_SIZE, if it and
4040 the low bits of hpa would take us onto the next page */
88cb6a74 4041 size = aligned_nrpages(hpa, size);
ad051221
DW
4042 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4043 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 4044 return ret;
38717946 4045}
38717946 4046
5009065d
OBC
4047static size_t intel_iommu_unmap(struct iommu_domain *domain,
4048 unsigned long iova, size_t size)
38717946 4049{
dde57a21 4050 struct dmar_domain *dmar_domain = domain->priv;
292827cb 4051 int order;
4b99d352 4052
292827cb 4053 order = dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
163cc52c 4054 (iova + size - 1) >> VTD_PAGE_SHIFT);
fe40f1e0 4055
163cc52c
DW
4056 if (dmar_domain->max_addr == iova + size)
4057 dmar_domain->max_addr = iova;
b146a1c9 4058
5009065d 4059 return PAGE_SIZE << order;
38717946 4060}
38717946 4061
d14d6577
JR
4062static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
4063 unsigned long iova)
38717946 4064{
d14d6577 4065 struct dmar_domain *dmar_domain = domain->priv;
38717946 4066 struct dma_pte *pte;
faa3d6f5 4067 u64 phys = 0;
38717946 4068
6dd9a7c7 4069 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0);
38717946 4070 if (pte)
faa3d6f5 4071 phys = dma_pte_addr(pte);
38717946 4072
faa3d6f5 4073 return phys;
38717946 4074}
a8bcbb0d 4075
dbb9fd86
SY
4076static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
4077 unsigned long cap)
4078{
4079 struct dmar_domain *dmar_domain = domain->priv;
4080
4081 if (cap == IOMMU_CAP_CACHE_COHERENCY)
4082 return dmar_domain->iommu_snooping;
323f99cb
TL
4083 if (cap == IOMMU_CAP_INTR_REMAP)
4084 return intr_remapping_enabled;
dbb9fd86
SY
4085
4086 return 0;
4087}
4088
70ae6f0d
AW
4089/*
4090 * Group numbers are arbitrary. Device with the same group number
4091 * indicate the iommu cannot differentiate between them. To avoid
4092 * tracking used groups we just use the seg|bus|devfn of the lowest
4093 * level we're able to differentiate devices
4094 */
4095static int intel_iommu_device_group(struct device *dev, unsigned int *groupid)
4096{
4097 struct pci_dev *pdev = to_pci_dev(dev);
4098 struct pci_dev *bridge;
4099 union {
4100 struct {
4101 u8 devfn;
4102 u8 bus;
4103 u16 segment;
4104 } pci;
4105 u32 group;
4106 } id;
4107
4108 if (iommu_no_mapping(dev))
4109 return -ENODEV;
4110
4111 id.pci.segment = pci_domain_nr(pdev->bus);
4112 id.pci.bus = pdev->bus->number;
4113 id.pci.devfn = pdev->devfn;
4114
4115 if (!device_to_iommu(id.pci.segment, id.pci.bus, id.pci.devfn))
4116 return -ENODEV;
4117
4118 bridge = pci_find_upstream_pcie_bridge(pdev);
4119 if (bridge) {
4120 if (pci_is_pcie(bridge)) {
4121 id.pci.bus = bridge->subordinate->number;
4122 id.pci.devfn = 0;
4123 } else {
4124 id.pci.bus = bridge->bus->number;
4125 id.pci.devfn = bridge->devfn;
4126 }
4127 }
4128
bcb71abe
AW
4129 if (!pdev->is_virtfn && iommu_group_mf)
4130 id.pci.devfn = PCI_DEVFN(PCI_SLOT(id.pci.devfn), 0);
4131
70ae6f0d
AW
4132 *groupid = id.group;
4133
4134 return 0;
4135}
4136
a8bcbb0d
JR
4137static struct iommu_ops intel_iommu_ops = {
4138 .domain_init = intel_iommu_domain_init,
4139 .domain_destroy = intel_iommu_domain_destroy,
4140 .attach_dev = intel_iommu_attach_device,
4141 .detach_dev = intel_iommu_detach_device,
b146a1c9
JR
4142 .map = intel_iommu_map,
4143 .unmap = intel_iommu_unmap,
a8bcbb0d 4144 .iova_to_phys = intel_iommu_iova_to_phys,
dbb9fd86 4145 .domain_has_cap = intel_iommu_domain_has_cap,
70ae6f0d 4146 .device_group = intel_iommu_device_group,
6d1c56a9 4147 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
a8bcbb0d 4148};
9af88143
DW
4149
4150static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)
4151{
4152 /*
4153 * Mobile 4 Series Chipset neglects to set RWBF capability,
4154 * but needs it:
4155 */
4156 printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
4157 rwbf_quirk = 1;
2d9e667e
DW
4158
4159 /* https://bugzilla.redhat.com/show_bug.cgi?id=538163 */
4160 if (dev->revision == 0x07) {
4161 printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
4162 dmar_map_gfx = 0;
4163 }
9af88143
DW
4164}
4165
4166DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
e0fc7e0b 4167
eecfd57f
AJ
4168#define GGC 0x52
4169#define GGC_MEMORY_SIZE_MASK (0xf << 8)
4170#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
4171#define GGC_MEMORY_SIZE_1M (0x1 << 8)
4172#define GGC_MEMORY_SIZE_2M (0x3 << 8)
4173#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
4174#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
4175#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
4176#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
4177
9eecabcb
DW
4178static void __devinit quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
4179{
4180 unsigned short ggc;
4181
eecfd57f 4182 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
4183 return;
4184
eecfd57f 4185 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
9eecabcb
DW
4186 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4187 dmar_map_gfx = 0;
6fbcfb3e
DW
4188 } else if (dmar_map_gfx) {
4189 /* we have to ensure the gfx device is idle before we flush */
4190 printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
4191 intel_iommu_strict = 1;
4192 }
9eecabcb
DW
4193}
4194DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4195DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4196DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4197DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4198
e0fc7e0b
DW
4199/* On Tylersburg chipsets, some BIOSes have been known to enable the
4200 ISOCH DMAR unit for the Azalia sound device, but not give it any
4201 TLB entries, which causes it to deadlock. Check for that. We do
4202 this in a function called from init_dmars(), instead of in a PCI
4203 quirk, because we don't want to print the obnoxious "BIOS broken"
4204 message if VT-d is actually disabled.
4205*/
4206static void __init check_tylersburg_isoch(void)
4207{
4208 struct pci_dev *pdev;
4209 uint32_t vtisochctrl;
4210
4211 /* If there's no Azalia in the system anyway, forget it. */
4212 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4213 if (!pdev)
4214 return;
4215 pci_dev_put(pdev);
4216
4217 /* System Management Registers. Might be hidden, in which case
4218 we can't do the sanity check. But that's OK, because the
4219 known-broken BIOSes _don't_ actually hide it, so far. */
4220 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4221 if (!pdev)
4222 return;
4223
4224 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4225 pci_dev_put(pdev);
4226 return;
4227 }
4228
4229 pci_dev_put(pdev);
4230
4231 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4232 if (vtisochctrl & 1)
4233 return;
4234
4235 /* Drop all bits other than the number of TLB entries */
4236 vtisochctrl &= 0x1c;
4237
4238 /* If we have the recommended number of TLB entries (16), fine. */
4239 if (vtisochctrl == 0x10)
4240 return;
4241
4242 /* Zero TLB entries? You get to ride the short bus to school. */
4243 if (!vtisochctrl) {
4244 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4245 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4246 dmi_get_system_info(DMI_BIOS_VENDOR),
4247 dmi_get_system_info(DMI_BIOS_VERSION),
4248 dmi_get_system_info(DMI_PRODUCT_VERSION));
4249 iommu_identity_mapping |= IDENTMAP_AZALIA;
4250 return;
4251 }
4252
4253 printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4254 vtisochctrl);
4255}
This page took 0.832193 seconds and 5 git commands to generate.