Linux 3.9-rc5
[deliverable/linux.git] / drivers / iommu / intel-iommu.c
CommitLineData
ba395927
KA
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
98bcef56 17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
5b6985ce 21 * Author: Fenghua Yu <fenghua.yu@intel.com>
ba395927
KA
22 */
23
24#include <linux/init.h>
25#include <linux/bitmap.h>
5e0d2a6f 26#include <linux/debugfs.h>
54485c30 27#include <linux/export.h>
ba395927
KA
28#include <linux/slab.h>
29#include <linux/irq.h>
30#include <linux/interrupt.h>
ba395927
KA
31#include <linux/spinlock.h>
32#include <linux/pci.h>
33#include <linux/dmar.h>
34#include <linux/dma-mapping.h>
35#include <linux/mempool.h>
5e0d2a6f 36#include <linux/timer.h>
38717946 37#include <linux/iova.h>
5d450806 38#include <linux/iommu.h>
38717946 39#include <linux/intel-iommu.h>
134fac3f 40#include <linux/syscore_ops.h>
69575d38 41#include <linux/tboot.h>
adb2fe02 42#include <linux/dmi.h>
5cdede24 43#include <linux/pci-ats.h>
0ee332c1 44#include <linux/memblock.h>
8a8f422d 45#include <asm/irq_remapping.h>
ba395927 46#include <asm/cacheflush.h>
46a7fa27 47#include <asm/iommu.h>
ba395927 48
078e1ee2
JR
49#include "irq_remapping.h"
50
5b6985ce
FY
51#define ROOT_SIZE VTD_PAGE_SIZE
52#define CONTEXT_SIZE VTD_PAGE_SIZE
53
ba395927
KA
54#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
55#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 56#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
57
58#define IOAPIC_RANGE_START (0xfee00000)
59#define IOAPIC_RANGE_END (0xfeefffff)
60#define IOVA_START_ADDR (0x1000)
61
62#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
63
4ed0d3e6
FY
64#define MAX_AGAW_WIDTH 64
65
2ebe3151
DW
66#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
67#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
68
69/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
70 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
71#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
72 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
73#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 74
f27be03b 75#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
284901a9 76#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
6a35528a 77#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
5e0d2a6f 78
df08cdc7
AM
79/* page table handling */
80#define LEVEL_STRIDE (9)
81#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
82
6d1c56a9
OBC
83/*
84 * This bitmap is used to advertise the page sizes our hardware support
85 * to the IOMMU core, which will then use this information to split
86 * physically contiguous memory regions it is mapping into page sizes
87 * that we support.
88 *
89 * Traditionally the IOMMU core just handed us the mappings directly,
90 * after making sure the size is an order of a 4KiB page and that the
91 * mapping has natural alignment.
92 *
93 * To retain this behavior, we currently advertise that we support
94 * all page sizes that are an order of 4KiB.
95 *
96 * If at some point we'd like to utilize the IOMMU core's new behavior,
97 * we could change this to advertise the real page sizes we support.
98 */
99#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
100
df08cdc7
AM
101static inline int agaw_to_level(int agaw)
102{
103 return agaw + 2;
104}
105
106static inline int agaw_to_width(int agaw)
107{
108 return 30 + agaw * LEVEL_STRIDE;
109}
110
111static inline int width_to_agaw(int width)
112{
113 return (width - 30) / LEVEL_STRIDE;
114}
115
116static inline unsigned int level_to_offset_bits(int level)
117{
118 return (level - 1) * LEVEL_STRIDE;
119}
120
121static inline int pfn_level_offset(unsigned long pfn, int level)
122{
123 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
124}
125
126static inline unsigned long level_mask(int level)
127{
128 return -1UL << level_to_offset_bits(level);
129}
130
131static inline unsigned long level_size(int level)
132{
133 return 1UL << level_to_offset_bits(level);
134}
135
136static inline unsigned long align_to_level(unsigned long pfn, int level)
137{
138 return (pfn + level_size(level) - 1) & level_mask(level);
139}
fd18de50 140
6dd9a7c7
YS
141static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
142{
143 return 1 << ((lvl - 1) * LEVEL_STRIDE);
144}
145
dd4e8319
DW
146/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
147 are never going to work. */
148static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
149{
150 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
151}
152
153static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
154{
155 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
156}
157static inline unsigned long page_to_dma_pfn(struct page *pg)
158{
159 return mm_to_dma_pfn(page_to_pfn(pg));
160}
161static inline unsigned long virt_to_dma_pfn(void *p)
162{
163 return page_to_dma_pfn(virt_to_page(p));
164}
165
d9630fe9
WH
166/* global iommu list, set NULL for ignored DMAR units */
167static struct intel_iommu **g_iommus;
168
e0fc7e0b 169static void __init check_tylersburg_isoch(void);
9af88143
DW
170static int rwbf_quirk;
171
b779260b
JC
172/*
173 * set to 1 to panic kernel if can't successfully enable VT-d
174 * (used when kernel is launched w/ TXT)
175 */
176static int force_on = 0;
177
46b08e1a
MM
178/*
179 * 0: Present
180 * 1-11: Reserved
181 * 12-63: Context Ptr (12 - (haw-1))
182 * 64-127: Reserved
183 */
184struct root_entry {
185 u64 val;
186 u64 rsvd1;
187};
188#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
189static inline bool root_present(struct root_entry *root)
190{
191 return (root->val & 1);
192}
193static inline void set_root_present(struct root_entry *root)
194{
195 root->val |= 1;
196}
197static inline void set_root_value(struct root_entry *root, unsigned long value)
198{
199 root->val |= value & VTD_PAGE_MASK;
200}
201
202static inline struct context_entry *
203get_context_addr_from_root(struct root_entry *root)
204{
205 return (struct context_entry *)
206 (root_present(root)?phys_to_virt(
207 root->val & VTD_PAGE_MASK) :
208 NULL);
209}
210
7a8fc25e
MM
211/*
212 * low 64 bits:
213 * 0: present
214 * 1: fault processing disable
215 * 2-3: translation type
216 * 12-63: address space root
217 * high 64 bits:
218 * 0-2: address width
219 * 3-6: aval
220 * 8-23: domain id
221 */
222struct context_entry {
223 u64 lo;
224 u64 hi;
225};
c07e7d21
MM
226
227static inline bool context_present(struct context_entry *context)
228{
229 return (context->lo & 1);
230}
231static inline void context_set_present(struct context_entry *context)
232{
233 context->lo |= 1;
234}
235
236static inline void context_set_fault_enable(struct context_entry *context)
237{
238 context->lo &= (((u64)-1) << 2) | 1;
239}
240
c07e7d21
MM
241static inline void context_set_translation_type(struct context_entry *context,
242 unsigned long value)
243{
244 context->lo &= (((u64)-1) << 4) | 3;
245 context->lo |= (value & 3) << 2;
246}
247
248static inline void context_set_address_root(struct context_entry *context,
249 unsigned long value)
250{
251 context->lo |= value & VTD_PAGE_MASK;
252}
253
254static inline void context_set_address_width(struct context_entry *context,
255 unsigned long value)
256{
257 context->hi |= value & 7;
258}
259
260static inline void context_set_domain_id(struct context_entry *context,
261 unsigned long value)
262{
263 context->hi |= (value & ((1 << 16) - 1)) << 8;
264}
265
266static inline void context_clear_entry(struct context_entry *context)
267{
268 context->lo = 0;
269 context->hi = 0;
270}
7a8fc25e 271
622ba12a
MM
272/*
273 * 0: readable
274 * 1: writable
275 * 2-6: reserved
276 * 7: super page
9cf06697
SY
277 * 8-10: available
278 * 11: snoop behavior
622ba12a
MM
279 * 12-63: Host physcial address
280 */
281struct dma_pte {
282 u64 val;
283};
622ba12a 284
19c239ce
MM
285static inline void dma_clear_pte(struct dma_pte *pte)
286{
287 pte->val = 0;
288}
289
290static inline void dma_set_pte_readable(struct dma_pte *pte)
291{
292 pte->val |= DMA_PTE_READ;
293}
294
295static inline void dma_set_pte_writable(struct dma_pte *pte)
296{
297 pte->val |= DMA_PTE_WRITE;
298}
299
9cf06697
SY
300static inline void dma_set_pte_snp(struct dma_pte *pte)
301{
302 pte->val |= DMA_PTE_SNP;
303}
304
19c239ce
MM
305static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
306{
307 pte->val = (pte->val & ~3) | (prot & 3);
308}
309
310static inline u64 dma_pte_addr(struct dma_pte *pte)
311{
c85994e4
DW
312#ifdef CONFIG_64BIT
313 return pte->val & VTD_PAGE_MASK;
314#else
315 /* Must have a full atomic 64-bit read */
1a8bd481 316 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
c85994e4 317#endif
19c239ce
MM
318}
319
dd4e8319 320static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn)
19c239ce 321{
dd4e8319 322 pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT;
19c239ce
MM
323}
324
325static inline bool dma_pte_present(struct dma_pte *pte)
326{
327 return (pte->val & 3) != 0;
328}
622ba12a 329
4399c8bf
AK
330static inline bool dma_pte_superpage(struct dma_pte *pte)
331{
332 return (pte->val & (1 << 7));
333}
334
75e6bf96
DW
335static inline int first_pte_in_page(struct dma_pte *pte)
336{
337 return !((unsigned long)pte & ~VTD_PAGE_MASK);
338}
339
2c2e2c38
FY
340/*
341 * This domain is a statically identity mapping domain.
342 * 1. This domain creats a static 1:1 mapping to all usable memory.
343 * 2. It maps to each iommu if successful.
344 * 3. Each iommu mapps to this domain if successful.
345 */
19943b0e
DW
346static struct dmar_domain *si_domain;
347static int hw_pass_through = 1;
2c2e2c38 348
3b5410e7 349/* devices under the same p2p bridge are owned in one domain */
cdc7b837 350#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
3b5410e7 351
1ce28feb
WH
352/* domain represents a virtual machine, more than one devices
353 * across iommus may be owned in one domain, e.g. kvm guest.
354 */
355#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
356
2c2e2c38
FY
357/* si_domain contains mulitple devices */
358#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2)
359
1b198bb0
MT
360/* define the limit of IOMMUs supported in each domain */
361#ifdef CONFIG_X86
362# define IOMMU_UNITS_SUPPORTED MAX_IO_APICS
363#else
364# define IOMMU_UNITS_SUPPORTED 64
365#endif
366
99126f7c
MM
367struct dmar_domain {
368 int id; /* domain id */
4c923d47 369 int nid; /* node id */
1b198bb0
MT
370 DECLARE_BITMAP(iommu_bmp, IOMMU_UNITS_SUPPORTED);
371 /* bitmap of iommus this domain uses*/
99126f7c
MM
372
373 struct list_head devices; /* all devices' list */
374 struct iova_domain iovad; /* iova's that belong to this domain */
375
376 struct dma_pte *pgd; /* virtual address */
99126f7c
MM
377 int gaw; /* max guest address width */
378
379 /* adjusted guest address width, 0 is level 2 30-bit */
380 int agaw;
381
3b5410e7 382 int flags; /* flags to find out type of domain */
8e604097
WH
383
384 int iommu_coherency;/* indicate coherency of iommu access */
58c610bd 385 int iommu_snooping; /* indicate snooping control feature*/
c7151a8d 386 int iommu_count; /* reference count of iommu */
6dd9a7c7
YS
387 int iommu_superpage;/* Level of superpages supported:
388 0 == 4KiB (no superpages), 1 == 2MiB,
389 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
c7151a8d 390 spinlock_t iommu_lock; /* protect iommu set in domain */
fe40f1e0 391 u64 max_addr; /* maximum mapped address */
99126f7c
MM
392};
393
a647dacb
MM
394/* PCI domain-device relationship */
395struct device_domain_info {
396 struct list_head link; /* link to domain siblings */
397 struct list_head global; /* link to global list */
276dbf99
DW
398 int segment; /* PCI domain */
399 u8 bus; /* PCI bus number */
a647dacb 400 u8 devfn; /* PCI devfn number */
45e829ea 401 struct pci_dev *dev; /* it's NULL for PCIe-to-PCI bridge */
93a23a72 402 struct intel_iommu *iommu; /* IOMMU used by this device */
a647dacb
MM
403 struct dmar_domain *domain; /* pointer to domain */
404};
405
5e0d2a6f 406static void flush_unmaps_timeout(unsigned long data);
407
408DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
409
80b20dd8 410#define HIGH_WATER_MARK 250
411struct deferred_flush_tables {
412 int next;
413 struct iova *iova[HIGH_WATER_MARK];
414 struct dmar_domain *domain[HIGH_WATER_MARK];
415};
416
417static struct deferred_flush_tables *deferred_flush;
418
5e0d2a6f 419/* bitmap for indexing intel_iommus */
5e0d2a6f 420static int g_num_of_iommus;
421
422static DEFINE_SPINLOCK(async_umap_flush_lock);
423static LIST_HEAD(unmaps_to_do);
424
425static int timer_on;
426static long list_size;
5e0d2a6f 427
ba395927
KA
428static void domain_remove_dev_info(struct dmar_domain *domain);
429
d3f13810 430#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
0cd5c3c8
KM
431int dmar_disabled = 0;
432#else
433int dmar_disabled = 1;
d3f13810 434#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
0cd5c3c8 435
8bc1f85c
ED
436int intel_iommu_enabled = 0;
437EXPORT_SYMBOL_GPL(intel_iommu_enabled);
438
2d9e667e 439static int dmar_map_gfx = 1;
7d3b03ce 440static int dmar_forcedac;
5e0d2a6f 441static int intel_iommu_strict;
6dd9a7c7 442static int intel_iommu_superpage = 1;
ba395927 443
c0771df8
DW
444int intel_iommu_gfx_mapped;
445EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
446
ba395927
KA
447#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
448static DEFINE_SPINLOCK(device_domain_lock);
449static LIST_HEAD(device_domain_list);
450
a8bcbb0d
JR
451static struct iommu_ops intel_iommu_ops;
452
ba395927
KA
453static int __init intel_iommu_setup(char *str)
454{
455 if (!str)
456 return -EINVAL;
457 while (*str) {
0cd5c3c8
KM
458 if (!strncmp(str, "on", 2)) {
459 dmar_disabled = 0;
460 printk(KERN_INFO "Intel-IOMMU: enabled\n");
461 } else if (!strncmp(str, "off", 3)) {
ba395927 462 dmar_disabled = 1;
0cd5c3c8 463 printk(KERN_INFO "Intel-IOMMU: disabled\n");
ba395927
KA
464 } else if (!strncmp(str, "igfx_off", 8)) {
465 dmar_map_gfx = 0;
466 printk(KERN_INFO
467 "Intel-IOMMU: disable GFX device mapping\n");
7d3b03ce 468 } else if (!strncmp(str, "forcedac", 8)) {
5e0d2a6f 469 printk(KERN_INFO
7d3b03ce
KA
470 "Intel-IOMMU: Forcing DAC for PCI devices\n");
471 dmar_forcedac = 1;
5e0d2a6f 472 } else if (!strncmp(str, "strict", 6)) {
473 printk(KERN_INFO
474 "Intel-IOMMU: disable batched IOTLB flush\n");
475 intel_iommu_strict = 1;
6dd9a7c7
YS
476 } else if (!strncmp(str, "sp_off", 6)) {
477 printk(KERN_INFO
478 "Intel-IOMMU: disable supported super page\n");
479 intel_iommu_superpage = 0;
ba395927
KA
480 }
481
482 str += strcspn(str, ",");
483 while (*str == ',')
484 str++;
485 }
486 return 0;
487}
488__setup("intel_iommu=", intel_iommu_setup);
489
490static struct kmem_cache *iommu_domain_cache;
491static struct kmem_cache *iommu_devinfo_cache;
492static struct kmem_cache *iommu_iova_cache;
493
4c923d47 494static inline void *alloc_pgtable_page(int node)
eb3fa7cb 495{
4c923d47
SS
496 struct page *page;
497 void *vaddr = NULL;
eb3fa7cb 498
4c923d47
SS
499 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
500 if (page)
501 vaddr = page_address(page);
eb3fa7cb 502 return vaddr;
ba395927
KA
503}
504
505static inline void free_pgtable_page(void *vaddr)
506{
507 free_page((unsigned long)vaddr);
508}
509
510static inline void *alloc_domain_mem(void)
511{
354bb65e 512 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
513}
514
38717946 515static void free_domain_mem(void *vaddr)
ba395927
KA
516{
517 kmem_cache_free(iommu_domain_cache, vaddr);
518}
519
520static inline void * alloc_devinfo_mem(void)
521{
354bb65e 522 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
523}
524
525static inline void free_devinfo_mem(void *vaddr)
526{
527 kmem_cache_free(iommu_devinfo_cache, vaddr);
528}
529
530struct iova *alloc_iova_mem(void)
531{
354bb65e 532 return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
ba395927
KA
533}
534
535void free_iova_mem(struct iova *iova)
536{
537 kmem_cache_free(iommu_iova_cache, iova);
538}
539
1b573683 540
4ed0d3e6 541static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
542{
543 unsigned long sagaw;
544 int agaw = -1;
545
546 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 547 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
548 agaw >= 0; agaw--) {
549 if (test_bit(agaw, &sagaw))
550 break;
551 }
552
553 return agaw;
554}
555
4ed0d3e6
FY
556/*
557 * Calculate max SAGAW for each iommu.
558 */
559int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
560{
561 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
562}
563
564/*
565 * calculate agaw for each iommu.
566 * "SAGAW" may be different across iommus, use a default agaw, and
567 * get a supported less agaw for iommus that don't support the default agaw.
568 */
569int iommu_calculate_agaw(struct intel_iommu *iommu)
570{
571 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
572}
573
2c2e2c38 574/* This functionin only returns single iommu in a domain */
8c11e798
WH
575static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
576{
577 int iommu_id;
578
2c2e2c38 579 /* si_domain and vm domain should not get here. */
1ce28feb 580 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
2c2e2c38 581 BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
1ce28feb 582
1b198bb0 583 iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
8c11e798
WH
584 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
585 return NULL;
586
587 return g_iommus[iommu_id];
588}
589
8e604097
WH
590static void domain_update_iommu_coherency(struct dmar_domain *domain)
591{
592 int i;
593
2e12bc29
AW
594 i = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
595
596 domain->iommu_coherency = i < g_num_of_iommus ? 1 : 0;
8e604097 597
1b198bb0 598 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
8e604097
WH
599 if (!ecap_coherent(g_iommus[i]->ecap)) {
600 domain->iommu_coherency = 0;
601 break;
602 }
8e604097
WH
603 }
604}
605
58c610bd
SY
606static void domain_update_iommu_snooping(struct dmar_domain *domain)
607{
608 int i;
609
610 domain->iommu_snooping = 1;
611
1b198bb0 612 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
58c610bd
SY
613 if (!ecap_sc_support(g_iommus[i]->ecap)) {
614 domain->iommu_snooping = 0;
615 break;
616 }
58c610bd
SY
617 }
618}
619
6dd9a7c7
YS
620static void domain_update_iommu_superpage(struct dmar_domain *domain)
621{
8140a95d
AK
622 struct dmar_drhd_unit *drhd;
623 struct intel_iommu *iommu = NULL;
624 int mask = 0xf;
6dd9a7c7
YS
625
626 if (!intel_iommu_superpage) {
627 domain->iommu_superpage = 0;
628 return;
629 }
630
8140a95d
AK
631 /* set iommu_superpage to the smallest common denominator */
632 for_each_active_iommu(iommu, drhd) {
633 mask &= cap_super_page_val(iommu->cap);
6dd9a7c7
YS
634 if (!mask) {
635 break;
636 }
637 }
638 domain->iommu_superpage = fls(mask);
639}
640
58c610bd
SY
641/* Some capabilities may be different across iommus */
642static void domain_update_iommu_cap(struct dmar_domain *domain)
643{
644 domain_update_iommu_coherency(domain);
645 domain_update_iommu_snooping(domain);
6dd9a7c7 646 domain_update_iommu_superpage(domain);
58c610bd
SY
647}
648
276dbf99 649static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
c7151a8d
WH
650{
651 struct dmar_drhd_unit *drhd = NULL;
652 int i;
653
654 for_each_drhd_unit(drhd) {
655 if (drhd->ignored)
656 continue;
276dbf99
DW
657 if (segment != drhd->segment)
658 continue;
c7151a8d 659
924b6231 660 for (i = 0; i < drhd->devices_cnt; i++) {
288e4877
DH
661 if (drhd->devices[i] &&
662 drhd->devices[i]->bus->number == bus &&
c7151a8d
WH
663 drhd->devices[i]->devfn == devfn)
664 return drhd->iommu;
4958c5dc
DW
665 if (drhd->devices[i] &&
666 drhd->devices[i]->subordinate &&
924b6231 667 drhd->devices[i]->subordinate->number <= bus &&
b918c62e 668 drhd->devices[i]->subordinate->busn_res.end >= bus)
924b6231
DW
669 return drhd->iommu;
670 }
c7151a8d
WH
671
672 if (drhd->include_all)
673 return drhd->iommu;
674 }
675
676 return NULL;
677}
678
5331fe6f
WH
679static void domain_flush_cache(struct dmar_domain *domain,
680 void *addr, int size)
681{
682 if (!domain->iommu_coherency)
683 clflush_cache_range(addr, size);
684}
685
ba395927
KA
686/* Gets context entry for a given bus and devfn */
687static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
688 u8 bus, u8 devfn)
689{
690 struct root_entry *root;
691 struct context_entry *context;
692 unsigned long phy_addr;
693 unsigned long flags;
694
695 spin_lock_irqsave(&iommu->lock, flags);
696 root = &iommu->root_entry[bus];
697 context = get_context_addr_from_root(root);
698 if (!context) {
4c923d47
SS
699 context = (struct context_entry *)
700 alloc_pgtable_page(iommu->node);
ba395927
KA
701 if (!context) {
702 spin_unlock_irqrestore(&iommu->lock, flags);
703 return NULL;
704 }
5b6985ce 705 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
ba395927
KA
706 phy_addr = virt_to_phys((void *)context);
707 set_root_value(root, phy_addr);
708 set_root_present(root);
709 __iommu_flush_cache(iommu, root, sizeof(*root));
710 }
711 spin_unlock_irqrestore(&iommu->lock, flags);
712 return &context[devfn];
713}
714
715static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
716{
717 struct root_entry *root;
718 struct context_entry *context;
719 int ret;
720 unsigned long flags;
721
722 spin_lock_irqsave(&iommu->lock, flags);
723 root = &iommu->root_entry[bus];
724 context = get_context_addr_from_root(root);
725 if (!context) {
726 ret = 0;
727 goto out;
728 }
c07e7d21 729 ret = context_present(&context[devfn]);
ba395927
KA
730out:
731 spin_unlock_irqrestore(&iommu->lock, flags);
732 return ret;
733}
734
735static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
736{
737 struct root_entry *root;
738 struct context_entry *context;
739 unsigned long flags;
740
741 spin_lock_irqsave(&iommu->lock, flags);
742 root = &iommu->root_entry[bus];
743 context = get_context_addr_from_root(root);
744 if (context) {
c07e7d21 745 context_clear_entry(&context[devfn]);
ba395927
KA
746 __iommu_flush_cache(iommu, &context[devfn], \
747 sizeof(*context));
748 }
749 spin_unlock_irqrestore(&iommu->lock, flags);
750}
751
752static void free_context_table(struct intel_iommu *iommu)
753{
754 struct root_entry *root;
755 int i;
756 unsigned long flags;
757 struct context_entry *context;
758
759 spin_lock_irqsave(&iommu->lock, flags);
760 if (!iommu->root_entry) {
761 goto out;
762 }
763 for (i = 0; i < ROOT_ENTRY_NR; i++) {
764 root = &iommu->root_entry[i];
765 context = get_context_addr_from_root(root);
766 if (context)
767 free_pgtable_page(context);
768 }
769 free_pgtable_page(iommu->root_entry);
770 iommu->root_entry = NULL;
771out:
772 spin_unlock_irqrestore(&iommu->lock, flags);
773}
774
b026fd28 775static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
4399c8bf 776 unsigned long pfn, int target_level)
ba395927 777{
b026fd28 778 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
ba395927
KA
779 struct dma_pte *parent, *pte = NULL;
780 int level = agaw_to_level(domain->agaw);
4399c8bf 781 int offset;
ba395927
KA
782
783 BUG_ON(!domain->pgd);
b026fd28 784 BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
ba395927
KA
785 parent = domain->pgd;
786
ba395927
KA
787 while (level > 0) {
788 void *tmp_page;
789
b026fd28 790 offset = pfn_level_offset(pfn, level);
ba395927 791 pte = &parent[offset];
4399c8bf 792 if (!target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
6dd9a7c7
YS
793 break;
794 if (level == target_level)
ba395927
KA
795 break;
796
19c239ce 797 if (!dma_pte_present(pte)) {
c85994e4
DW
798 uint64_t pteval;
799
4c923d47 800 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 801
206a73c1 802 if (!tmp_page)
ba395927 803 return NULL;
206a73c1 804
c85994e4 805 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 806 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
c85994e4
DW
807 if (cmpxchg64(&pte->val, 0ULL, pteval)) {
808 /* Someone else set it while we were thinking; use theirs. */
809 free_pgtable_page(tmp_page);
810 } else {
811 dma_pte_addr(pte);
812 domain_flush_cache(domain, pte, sizeof(*pte));
813 }
ba395927 814 }
19c239ce 815 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
816 level--;
817 }
818
ba395927
KA
819 return pte;
820}
821
6dd9a7c7 822
ba395927 823/* return address's pte at specific level */
90dcfb5e
DW
824static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
825 unsigned long pfn,
6dd9a7c7 826 int level, int *large_page)
ba395927
KA
827{
828 struct dma_pte *parent, *pte = NULL;
829 int total = agaw_to_level(domain->agaw);
830 int offset;
831
832 parent = domain->pgd;
833 while (level <= total) {
90dcfb5e 834 offset = pfn_level_offset(pfn, total);
ba395927
KA
835 pte = &parent[offset];
836 if (level == total)
837 return pte;
838
6dd9a7c7
YS
839 if (!dma_pte_present(pte)) {
840 *large_page = total;
ba395927 841 break;
6dd9a7c7
YS
842 }
843
844 if (pte->val & DMA_PTE_LARGE_PAGE) {
845 *large_page = total;
846 return pte;
847 }
848
19c239ce 849 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
850 total--;
851 }
852 return NULL;
853}
854
ba395927 855/* clear last level pte, a tlb flush should be followed */
292827cb 856static int dma_pte_clear_range(struct dmar_domain *domain,
595badf5
DW
857 unsigned long start_pfn,
858 unsigned long last_pfn)
ba395927 859{
04b18e65 860 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
6dd9a7c7 861 unsigned int large_page = 1;
310a5ab9 862 struct dma_pte *first_pte, *pte;
292827cb 863 int order;
66eae846 864
04b18e65 865 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
595badf5 866 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
59c36286 867 BUG_ON(start_pfn > last_pfn);
ba395927 868
04b18e65 869 /* we don't need lock here; nobody else touches the iova range */
59c36286 870 do {
6dd9a7c7
YS
871 large_page = 1;
872 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
310a5ab9 873 if (!pte) {
6dd9a7c7 874 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
310a5ab9
DW
875 continue;
876 }
6dd9a7c7 877 do {
310a5ab9 878 dma_clear_pte(pte);
6dd9a7c7 879 start_pfn += lvl_to_nr_pages(large_page);
310a5ab9 880 pte++;
75e6bf96
DW
881 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
882
310a5ab9
DW
883 domain_flush_cache(domain, first_pte,
884 (void *)pte - (void *)first_pte);
59c36286
DW
885
886 } while (start_pfn && start_pfn <= last_pfn);
292827cb
AK
887
888 order = (large_page - 1) * 9;
889 return order;
ba395927
KA
890}
891
892/* free page table pages. last level pte should already be cleared */
893static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b
DW
894 unsigned long start_pfn,
895 unsigned long last_pfn)
ba395927 896{
6660c63a 897 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
f3a0a52f 898 struct dma_pte *first_pte, *pte;
ba395927
KA
899 int total = agaw_to_level(domain->agaw);
900 int level;
6660c63a 901 unsigned long tmp;
6dd9a7c7 902 int large_page = 2;
ba395927 903
6660c63a
DW
904 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
905 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
59c36286 906 BUG_ON(start_pfn > last_pfn);
ba395927 907
f3a0a52f 908 /* We don't need lock here; nobody else touches the iova range */
ba395927
KA
909 level = 2;
910 while (level <= total) {
6660c63a
DW
911 tmp = align_to_level(start_pfn, level);
912
f3a0a52f 913 /* If we can't even clear one PTE at this level, we're done */
6660c63a 914 if (tmp + level_size(level) - 1 > last_pfn)
ba395927
KA
915 return;
916
59c36286 917 do {
6dd9a7c7
YS
918 large_page = level;
919 first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page);
920 if (large_page > level)
921 level = large_page + 1;
f3a0a52f
DW
922 if (!pte) {
923 tmp = align_to_level(tmp + 1, level + 1);
924 continue;
925 }
75e6bf96 926 do {
6a43e574
DW
927 if (dma_pte_present(pte)) {
928 free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
929 dma_clear_pte(pte);
930 }
f3a0a52f
DW
931 pte++;
932 tmp += level_size(level);
75e6bf96
DW
933 } while (!first_pte_in_page(pte) &&
934 tmp + level_size(level) - 1 <= last_pfn);
935
f3a0a52f
DW
936 domain_flush_cache(domain, first_pte,
937 (void *)pte - (void *)first_pte);
938
59c36286 939 } while (tmp && tmp + level_size(level) - 1 <= last_pfn);
ba395927
KA
940 level++;
941 }
942 /* free pgd */
d794dc9b 943 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
944 free_pgtable_page(domain->pgd);
945 domain->pgd = NULL;
946 }
947}
948
949/* iommu handling */
950static int iommu_alloc_root_entry(struct intel_iommu *iommu)
951{
952 struct root_entry *root;
953 unsigned long flags;
954
4c923d47 955 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ba395927
KA
956 if (!root)
957 return -ENOMEM;
958
5b6985ce 959 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
960
961 spin_lock_irqsave(&iommu->lock, flags);
962 iommu->root_entry = root;
963 spin_unlock_irqrestore(&iommu->lock, flags);
964
965 return 0;
966}
967
ba395927
KA
968static void iommu_set_root_entry(struct intel_iommu *iommu)
969{
970 void *addr;
c416daa9 971 u32 sts;
ba395927
KA
972 unsigned long flag;
973
974 addr = iommu->root_entry;
975
1f5b3c3f 976 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
977 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
978
c416daa9 979 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
980
981 /* Make sure hardware complete it */
982 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 983 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927 984
1f5b3c3f 985 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
986}
987
988static void iommu_flush_write_buffer(struct intel_iommu *iommu)
989{
990 u32 val;
991 unsigned long flag;
992
9af88143 993 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 994 return;
ba395927 995
1f5b3c3f 996 raw_spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 997 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
998
999 /* Make sure hardware complete it */
1000 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1001 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927 1002
1f5b3c3f 1003 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1004}
1005
1006/* return value determine if we need a write buffer flush */
4c25a2c1
DW
1007static void __iommu_flush_context(struct intel_iommu *iommu,
1008 u16 did, u16 source_id, u8 function_mask,
1009 u64 type)
ba395927
KA
1010{
1011 u64 val = 0;
1012 unsigned long flag;
1013
ba395927
KA
1014 switch (type) {
1015 case DMA_CCMD_GLOBAL_INVL:
1016 val = DMA_CCMD_GLOBAL_INVL;
1017 break;
1018 case DMA_CCMD_DOMAIN_INVL:
1019 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1020 break;
1021 case DMA_CCMD_DEVICE_INVL:
1022 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1023 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1024 break;
1025 default:
1026 BUG();
1027 }
1028 val |= DMA_CCMD_ICC;
1029
1f5b3c3f 1030 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1031 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1032
1033 /* Make sure hardware complete it */
1034 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1035 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1036
1f5b3c3f 1037 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1038}
1039
ba395927 1040/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
1041static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1042 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
1043{
1044 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1045 u64 val = 0, val_iva = 0;
1046 unsigned long flag;
1047
ba395927
KA
1048 switch (type) {
1049 case DMA_TLB_GLOBAL_FLUSH:
1050 /* global flush doesn't need set IVA_REG */
1051 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1052 break;
1053 case DMA_TLB_DSI_FLUSH:
1054 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1055 break;
1056 case DMA_TLB_PSI_FLUSH:
1057 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1058 /* Note: always flush non-leaf currently */
1059 val_iva = size_order | addr;
1060 break;
1061 default:
1062 BUG();
1063 }
1064 /* Note: set drain read/write */
1065#if 0
1066 /*
1067 * This is probably to be super secure.. Looks like we can
1068 * ignore it without any impact.
1069 */
1070 if (cap_read_drain(iommu->cap))
1071 val |= DMA_TLB_READ_DRAIN;
1072#endif
1073 if (cap_write_drain(iommu->cap))
1074 val |= DMA_TLB_WRITE_DRAIN;
1075
1f5b3c3f 1076 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1077 /* Note: Only uses first TLB reg currently */
1078 if (val_iva)
1079 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1080 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1081
1082 /* Make sure hardware complete it */
1083 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1084 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1085
1f5b3c3f 1086 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1087
1088 /* check IOTLB invalidation granularity */
1089 if (DMA_TLB_IAIG(val) == 0)
1090 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1091 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1092 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
5b6985ce
FY
1093 (unsigned long long)DMA_TLB_IIRG(type),
1094 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1095}
1096
93a23a72
YZ
1097static struct device_domain_info *iommu_support_dev_iotlb(
1098 struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
1099{
1100 int found = 0;
1101 unsigned long flags;
1102 struct device_domain_info *info;
1103 struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
1104
1105 if (!ecap_dev_iotlb_support(iommu->ecap))
1106 return NULL;
1107
1108 if (!iommu->qi)
1109 return NULL;
1110
1111 spin_lock_irqsave(&device_domain_lock, flags);
1112 list_for_each_entry(info, &domain->devices, link)
1113 if (info->bus == bus && info->devfn == devfn) {
1114 found = 1;
1115 break;
1116 }
1117 spin_unlock_irqrestore(&device_domain_lock, flags);
1118
1119 if (!found || !info->dev)
1120 return NULL;
1121
1122 if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
1123 return NULL;
1124
1125 if (!dmar_find_matched_atsr_unit(info->dev))
1126 return NULL;
1127
1128 info->iommu = iommu;
1129
1130 return info;
1131}
1132
1133static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1134{
93a23a72
YZ
1135 if (!info)
1136 return;
1137
1138 pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
1139}
1140
1141static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1142{
1143 if (!info->dev || !pci_ats_enabled(info->dev))
1144 return;
1145
1146 pci_disable_ats(info->dev);
1147}
1148
1149static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1150 u64 addr, unsigned mask)
1151{
1152 u16 sid, qdep;
1153 unsigned long flags;
1154 struct device_domain_info *info;
1155
1156 spin_lock_irqsave(&device_domain_lock, flags);
1157 list_for_each_entry(info, &domain->devices, link) {
1158 if (!info->dev || !pci_ats_enabled(info->dev))
1159 continue;
1160
1161 sid = info->bus << 8 | info->devfn;
1162 qdep = pci_ats_queue_depth(info->dev);
1163 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1164 }
1165 spin_unlock_irqrestore(&device_domain_lock, flags);
1166}
1167
1f0ef2aa 1168static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
82653633 1169 unsigned long pfn, unsigned int pages, int map)
ba395927 1170{
9dd2fe89 1171 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1172 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
ba395927 1173
ba395927
KA
1174 BUG_ON(pages == 0);
1175
ba395927 1176 /*
9dd2fe89
YZ
1177 * Fallback to domain selective flush if no PSI support or the size is
1178 * too big.
ba395927
KA
1179 * PSI requires page size to be 2 ^ x, and the base address is naturally
1180 * aligned to the size
1181 */
9dd2fe89
YZ
1182 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1183 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1184 DMA_TLB_DSI_FLUSH);
9dd2fe89
YZ
1185 else
1186 iommu->flush.flush_iotlb(iommu, did, addr, mask,
1187 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1188
1189 /*
82653633
NA
1190 * In caching mode, changes of pages from non-present to present require
1191 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1192 */
82653633 1193 if (!cap_caching_mode(iommu->cap) || !map)
93a23a72 1194 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
ba395927
KA
1195}
1196
f8bab735 1197static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1198{
1199 u32 pmen;
1200 unsigned long flags;
1201
1f5b3c3f 1202 raw_spin_lock_irqsave(&iommu->register_lock, flags);
f8bab735 1203 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1204 pmen &= ~DMA_PMEN_EPM;
1205 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1206
1207 /* wait for the protected region status bit to clear */
1208 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1209 readl, !(pmen & DMA_PMEN_PRS), pmen);
1210
1f5b3c3f 1211 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
f8bab735 1212}
1213
ba395927
KA
1214static int iommu_enable_translation(struct intel_iommu *iommu)
1215{
1216 u32 sts;
1217 unsigned long flags;
1218
1f5b3c3f 1219 raw_spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1220 iommu->gcmd |= DMA_GCMD_TE;
1221 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1222
1223 /* Make sure hardware complete it */
1224 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1225 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1226
1f5b3c3f 1227 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
ba395927
KA
1228 return 0;
1229}
1230
1231static int iommu_disable_translation(struct intel_iommu *iommu)
1232{
1233 u32 sts;
1234 unsigned long flag;
1235
1f5b3c3f 1236 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1237 iommu->gcmd &= ~DMA_GCMD_TE;
1238 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1239
1240 /* Make sure hardware complete it */
1241 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1242 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927 1243
1f5b3c3f 1244 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1245 return 0;
1246}
1247
3460a6d9 1248
ba395927
KA
1249static int iommu_init_domains(struct intel_iommu *iommu)
1250{
1251 unsigned long ndomains;
1252 unsigned long nlongs;
1253
1254 ndomains = cap_ndoms(iommu->cap);
68aeb968 1255 pr_debug("IOMMU %d: Number of Domains supported <%ld>\n", iommu->seq_id,
680a7524 1256 ndomains);
ba395927
KA
1257 nlongs = BITS_TO_LONGS(ndomains);
1258
94a91b50
DD
1259 spin_lock_init(&iommu->lock);
1260
ba395927
KA
1261 /* TBD: there might be 64K domains,
1262 * consider other allocation for future chip
1263 */
1264 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1265 if (!iommu->domain_ids) {
1266 printk(KERN_ERR "Allocating domain id array failed\n");
1267 return -ENOMEM;
1268 }
1269 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1270 GFP_KERNEL);
1271 if (!iommu->domains) {
1272 printk(KERN_ERR "Allocating domain array failed\n");
ba395927
KA
1273 return -ENOMEM;
1274 }
1275
1276 /*
1277 * if Caching mode is set, then invalid translations are tagged
1278 * with domainid 0. Hence we need to pre-allocate it.
1279 */
1280 if (cap_caching_mode(iommu->cap))
1281 set_bit(0, iommu->domain_ids);
1282 return 0;
1283}
ba395927 1284
ba395927
KA
1285
1286static void domain_exit(struct dmar_domain *domain);
5e98c4b1 1287static void vm_domain_exit(struct dmar_domain *domain);
e61d98d8
SS
1288
1289void free_dmar_iommu(struct intel_iommu *iommu)
ba395927
KA
1290{
1291 struct dmar_domain *domain;
1292 int i;
c7151a8d 1293 unsigned long flags;
ba395927 1294
94a91b50 1295 if ((iommu->domains) && (iommu->domain_ids)) {
a45946ab 1296 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
94a91b50
DD
1297 domain = iommu->domains[i];
1298 clear_bit(i, iommu->domain_ids);
1299
1300 spin_lock_irqsave(&domain->iommu_lock, flags);
1301 if (--domain->iommu_count == 0) {
1302 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1303 vm_domain_exit(domain);
1304 else
1305 domain_exit(domain);
1306 }
1307 spin_unlock_irqrestore(&domain->iommu_lock, flags);
5e98c4b1 1308 }
ba395927
KA
1309 }
1310
1311 if (iommu->gcmd & DMA_GCMD_TE)
1312 iommu_disable_translation(iommu);
1313
1314 if (iommu->irq) {
dced35ae 1315 irq_set_handler_data(iommu->irq, NULL);
ba395927
KA
1316 /* This will mask the irq */
1317 free_irq(iommu->irq, iommu);
1318 destroy_irq(iommu->irq);
1319 }
1320
1321 kfree(iommu->domains);
1322 kfree(iommu->domain_ids);
1323
d9630fe9
WH
1324 g_iommus[iommu->seq_id] = NULL;
1325
1326 /* if all iommus are freed, free g_iommus */
1327 for (i = 0; i < g_num_of_iommus; i++) {
1328 if (g_iommus[i])
1329 break;
1330 }
1331
1332 if (i == g_num_of_iommus)
1333 kfree(g_iommus);
1334
ba395927
KA
1335 /* free context mapping */
1336 free_context_table(iommu);
ba395927
KA
1337}
1338
2c2e2c38 1339static struct dmar_domain *alloc_domain(void)
ba395927 1340{
ba395927 1341 struct dmar_domain *domain;
ba395927
KA
1342
1343 domain = alloc_domain_mem();
1344 if (!domain)
1345 return NULL;
1346
4c923d47 1347 domain->nid = -1;
1b198bb0 1348 memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp));
2c2e2c38
FY
1349 domain->flags = 0;
1350
1351 return domain;
1352}
1353
1354static int iommu_attach_domain(struct dmar_domain *domain,
1355 struct intel_iommu *iommu)
1356{
1357 int num;
1358 unsigned long ndomains;
1359 unsigned long flags;
1360
ba395927
KA
1361 ndomains = cap_ndoms(iommu->cap);
1362
1363 spin_lock_irqsave(&iommu->lock, flags);
2c2e2c38 1364
ba395927
KA
1365 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1366 if (num >= ndomains) {
1367 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927 1368 printk(KERN_ERR "IOMMU: no free domain ids\n");
2c2e2c38 1369 return -ENOMEM;
ba395927
KA
1370 }
1371
ba395927 1372 domain->id = num;
2c2e2c38 1373 set_bit(num, iommu->domain_ids);
1b198bb0 1374 set_bit(iommu->seq_id, domain->iommu_bmp);
ba395927
KA
1375 iommu->domains[num] = domain;
1376 spin_unlock_irqrestore(&iommu->lock, flags);
1377
2c2e2c38 1378 return 0;
ba395927
KA
1379}
1380
2c2e2c38
FY
1381static void iommu_detach_domain(struct dmar_domain *domain,
1382 struct intel_iommu *iommu)
ba395927
KA
1383{
1384 unsigned long flags;
2c2e2c38
FY
1385 int num, ndomains;
1386 int found = 0;
ba395927 1387
8c11e798 1388 spin_lock_irqsave(&iommu->lock, flags);
2c2e2c38 1389 ndomains = cap_ndoms(iommu->cap);
a45946ab 1390 for_each_set_bit(num, iommu->domain_ids, ndomains) {
2c2e2c38
FY
1391 if (iommu->domains[num] == domain) {
1392 found = 1;
1393 break;
1394 }
2c2e2c38
FY
1395 }
1396
1397 if (found) {
1398 clear_bit(num, iommu->domain_ids);
1b198bb0 1399 clear_bit(iommu->seq_id, domain->iommu_bmp);
2c2e2c38
FY
1400 iommu->domains[num] = NULL;
1401 }
8c11e798 1402 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927
KA
1403}
1404
1405static struct iova_domain reserved_iova_list;
8a443df4 1406static struct lock_class_key reserved_rbtree_key;
ba395927 1407
51a63e67 1408static int dmar_init_reserved_ranges(void)
ba395927
KA
1409{
1410 struct pci_dev *pdev = NULL;
1411 struct iova *iova;
1412 int i;
ba395927 1413
f661197e 1414 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
ba395927 1415
8a443df4
MG
1416 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1417 &reserved_rbtree_key);
1418
ba395927
KA
1419 /* IOAPIC ranges shouldn't be accessed by DMA */
1420 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1421 IOVA_PFN(IOAPIC_RANGE_END));
51a63e67 1422 if (!iova) {
ba395927 1423 printk(KERN_ERR "Reserve IOAPIC range failed\n");
51a63e67
JC
1424 return -ENODEV;
1425 }
ba395927
KA
1426
1427 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1428 for_each_pci_dev(pdev) {
1429 struct resource *r;
1430
1431 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1432 r = &pdev->resource[i];
1433 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1434 continue;
1a4a4551
DW
1435 iova = reserve_iova(&reserved_iova_list,
1436 IOVA_PFN(r->start),
1437 IOVA_PFN(r->end));
51a63e67 1438 if (!iova) {
ba395927 1439 printk(KERN_ERR "Reserve iova failed\n");
51a63e67
JC
1440 return -ENODEV;
1441 }
ba395927
KA
1442 }
1443 }
51a63e67 1444 return 0;
ba395927
KA
1445}
1446
1447static void domain_reserve_special_ranges(struct dmar_domain *domain)
1448{
1449 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1450}
1451
1452static inline int guestwidth_to_adjustwidth(int gaw)
1453{
1454 int agaw;
1455 int r = (gaw - 12) % 9;
1456
1457 if (r == 0)
1458 agaw = gaw;
1459 else
1460 agaw = gaw + 9 - r;
1461 if (agaw > 64)
1462 agaw = 64;
1463 return agaw;
1464}
1465
1466static int domain_init(struct dmar_domain *domain, int guest_width)
1467{
1468 struct intel_iommu *iommu;
1469 int adjust_width, agaw;
1470 unsigned long sagaw;
1471
f661197e 1472 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
c7151a8d 1473 spin_lock_init(&domain->iommu_lock);
ba395927
KA
1474
1475 domain_reserve_special_ranges(domain);
1476
1477 /* calculate AGAW */
8c11e798 1478 iommu = domain_get_iommu(domain);
ba395927
KA
1479 if (guest_width > cap_mgaw(iommu->cap))
1480 guest_width = cap_mgaw(iommu->cap);
1481 domain->gaw = guest_width;
1482 adjust_width = guestwidth_to_adjustwidth(guest_width);
1483 agaw = width_to_agaw(adjust_width);
1484 sagaw = cap_sagaw(iommu->cap);
1485 if (!test_bit(agaw, &sagaw)) {
1486 /* hardware doesn't support it, choose a bigger one */
1487 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1488 agaw = find_next_bit(&sagaw, 5, agaw);
1489 if (agaw >= 5)
1490 return -ENODEV;
1491 }
1492 domain->agaw = agaw;
1493 INIT_LIST_HEAD(&domain->devices);
1494
8e604097
WH
1495 if (ecap_coherent(iommu->ecap))
1496 domain->iommu_coherency = 1;
1497 else
1498 domain->iommu_coherency = 0;
1499
58c610bd
SY
1500 if (ecap_sc_support(iommu->ecap))
1501 domain->iommu_snooping = 1;
1502 else
1503 domain->iommu_snooping = 0;
1504
6dd9a7c7 1505 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
c7151a8d 1506 domain->iommu_count = 1;
4c923d47 1507 domain->nid = iommu->node;
c7151a8d 1508
ba395927 1509 /* always allocate the top pgd */
4c923d47 1510 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
ba395927
KA
1511 if (!domain->pgd)
1512 return -ENOMEM;
5b6985ce 1513 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1514 return 0;
1515}
1516
1517static void domain_exit(struct dmar_domain *domain)
1518{
2c2e2c38
FY
1519 struct dmar_drhd_unit *drhd;
1520 struct intel_iommu *iommu;
ba395927
KA
1521
1522 /* Domain 0 is reserved, so dont process it */
1523 if (!domain)
1524 return;
1525
7b668357
AW
1526 /* Flush any lazy unmaps that may reference this domain */
1527 if (!intel_iommu_strict)
1528 flush_unmaps_timeout(0);
1529
ba395927
KA
1530 domain_remove_dev_info(domain);
1531 /* destroy iovas */
1532 put_iova_domain(&domain->iovad);
ba395927
KA
1533
1534 /* clear ptes */
595badf5 1535 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927
KA
1536
1537 /* free page tables */
d794dc9b 1538 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927 1539
2c2e2c38 1540 for_each_active_iommu(iommu, drhd)
1b198bb0 1541 if (test_bit(iommu->seq_id, domain->iommu_bmp))
2c2e2c38
FY
1542 iommu_detach_domain(domain, iommu);
1543
ba395927
KA
1544 free_domain_mem(domain);
1545}
1546
4ed0d3e6
FY
1547static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1548 u8 bus, u8 devfn, int translation)
ba395927
KA
1549{
1550 struct context_entry *context;
ba395927 1551 unsigned long flags;
5331fe6f 1552 struct intel_iommu *iommu;
ea6606b0
WH
1553 struct dma_pte *pgd;
1554 unsigned long num;
1555 unsigned long ndomains;
1556 int id;
1557 int agaw;
93a23a72 1558 struct device_domain_info *info = NULL;
ba395927
KA
1559
1560 pr_debug("Set context mapping for %02x:%02x.%d\n",
1561 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 1562
ba395927 1563 BUG_ON(!domain->pgd);
4ed0d3e6
FY
1564 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1565 translation != CONTEXT_TT_MULTI_LEVEL);
5331fe6f 1566
276dbf99 1567 iommu = device_to_iommu(segment, bus, devfn);
5331fe6f
WH
1568 if (!iommu)
1569 return -ENODEV;
1570
ba395927
KA
1571 context = device_to_context_entry(iommu, bus, devfn);
1572 if (!context)
1573 return -ENOMEM;
1574 spin_lock_irqsave(&iommu->lock, flags);
c07e7d21 1575 if (context_present(context)) {
ba395927
KA
1576 spin_unlock_irqrestore(&iommu->lock, flags);
1577 return 0;
1578 }
1579
ea6606b0
WH
1580 id = domain->id;
1581 pgd = domain->pgd;
1582
2c2e2c38
FY
1583 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1584 domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
ea6606b0
WH
1585 int found = 0;
1586
1587 /* find an available domain id for this device in iommu */
1588 ndomains = cap_ndoms(iommu->cap);
a45946ab 1589 for_each_set_bit(num, iommu->domain_ids, ndomains) {
ea6606b0
WH
1590 if (iommu->domains[num] == domain) {
1591 id = num;
1592 found = 1;
1593 break;
1594 }
ea6606b0
WH
1595 }
1596
1597 if (found == 0) {
1598 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1599 if (num >= ndomains) {
1600 spin_unlock_irqrestore(&iommu->lock, flags);
1601 printk(KERN_ERR "IOMMU: no free domain ids\n");
1602 return -EFAULT;
1603 }
1604
1605 set_bit(num, iommu->domain_ids);
1606 iommu->domains[num] = domain;
1607 id = num;
1608 }
1609
1610 /* Skip top levels of page tables for
1611 * iommu which has less agaw than default.
1672af11 1612 * Unnecessary for PT mode.
ea6606b0 1613 */
1672af11
CW
1614 if (translation != CONTEXT_TT_PASS_THROUGH) {
1615 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1616 pgd = phys_to_virt(dma_pte_addr(pgd));
1617 if (!dma_pte_present(pgd)) {
1618 spin_unlock_irqrestore(&iommu->lock, flags);
1619 return -ENOMEM;
1620 }
ea6606b0
WH
1621 }
1622 }
1623 }
1624
1625 context_set_domain_id(context, id);
4ed0d3e6 1626
93a23a72
YZ
1627 if (translation != CONTEXT_TT_PASS_THROUGH) {
1628 info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
1629 translation = info ? CONTEXT_TT_DEV_IOTLB :
1630 CONTEXT_TT_MULTI_LEVEL;
1631 }
4ed0d3e6
FY
1632 /*
1633 * In pass through mode, AW must be programmed to indicate the largest
1634 * AGAW value supported by hardware. And ASR is ignored by hardware.
1635 */
93a23a72 1636 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
4ed0d3e6 1637 context_set_address_width(context, iommu->msagaw);
93a23a72
YZ
1638 else {
1639 context_set_address_root(context, virt_to_phys(pgd));
1640 context_set_address_width(context, iommu->agaw);
1641 }
4ed0d3e6
FY
1642
1643 context_set_translation_type(context, translation);
c07e7d21
MM
1644 context_set_fault_enable(context);
1645 context_set_present(context);
5331fe6f 1646 domain_flush_cache(domain, context, sizeof(*context));
ba395927 1647
4c25a2c1
DW
1648 /*
1649 * It's a non-present to present mapping. If hardware doesn't cache
1650 * non-present entry we only need to flush the write-buffer. If the
1651 * _does_ cache non-present entries, then it does so in the special
1652 * domain #0, which we have to flush:
1653 */
1654 if (cap_caching_mode(iommu->cap)) {
1655 iommu->flush.flush_context(iommu, 0,
1656 (((u16)bus) << 8) | devfn,
1657 DMA_CCMD_MASK_NOBIT,
1658 DMA_CCMD_DEVICE_INVL);
82653633 1659 iommu->flush.flush_iotlb(iommu, domain->id, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 1660 } else {
ba395927 1661 iommu_flush_write_buffer(iommu);
4c25a2c1 1662 }
93a23a72 1663 iommu_enable_dev_iotlb(info);
ba395927 1664 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d
WH
1665
1666 spin_lock_irqsave(&domain->iommu_lock, flags);
1b198bb0 1667 if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
c7151a8d 1668 domain->iommu_count++;
4c923d47
SS
1669 if (domain->iommu_count == 1)
1670 domain->nid = iommu->node;
58c610bd 1671 domain_update_iommu_cap(domain);
c7151a8d
WH
1672 }
1673 spin_unlock_irqrestore(&domain->iommu_lock, flags);
ba395927
KA
1674 return 0;
1675}
1676
1677static int
4ed0d3e6
FY
1678domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
1679 int translation)
ba395927
KA
1680{
1681 int ret;
1682 struct pci_dev *tmp, *parent;
1683
276dbf99 1684 ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
4ed0d3e6
FY
1685 pdev->bus->number, pdev->devfn,
1686 translation);
ba395927
KA
1687 if (ret)
1688 return ret;
1689
1690 /* dependent device mapping */
1691 tmp = pci_find_upstream_pcie_bridge(pdev);
1692 if (!tmp)
1693 return 0;
1694 /* Secondary interface's bus number and devfn 0 */
1695 parent = pdev->bus->self;
1696 while (parent != tmp) {
276dbf99
DW
1697 ret = domain_context_mapping_one(domain,
1698 pci_domain_nr(parent->bus),
1699 parent->bus->number,
4ed0d3e6 1700 parent->devfn, translation);
ba395927
KA
1701 if (ret)
1702 return ret;
1703 parent = parent->bus->self;
1704 }
45e829ea 1705 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
ba395927 1706 return domain_context_mapping_one(domain,
276dbf99 1707 pci_domain_nr(tmp->subordinate),
4ed0d3e6
FY
1708 tmp->subordinate->number, 0,
1709 translation);
ba395927
KA
1710 else /* this is a legacy PCI bridge */
1711 return domain_context_mapping_one(domain,
276dbf99
DW
1712 pci_domain_nr(tmp->bus),
1713 tmp->bus->number,
4ed0d3e6
FY
1714 tmp->devfn,
1715 translation);
ba395927
KA
1716}
1717
5331fe6f 1718static int domain_context_mapped(struct pci_dev *pdev)
ba395927
KA
1719{
1720 int ret;
1721 struct pci_dev *tmp, *parent;
5331fe6f
WH
1722 struct intel_iommu *iommu;
1723
276dbf99
DW
1724 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
1725 pdev->devfn);
5331fe6f
WH
1726 if (!iommu)
1727 return -ENODEV;
ba395927 1728
276dbf99 1729 ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
ba395927
KA
1730 if (!ret)
1731 return ret;
1732 /* dependent device mapping */
1733 tmp = pci_find_upstream_pcie_bridge(pdev);
1734 if (!tmp)
1735 return ret;
1736 /* Secondary interface's bus number and devfn 0 */
1737 parent = pdev->bus->self;
1738 while (parent != tmp) {
8c11e798 1739 ret = device_context_mapped(iommu, parent->bus->number,
276dbf99 1740 parent->devfn);
ba395927
KA
1741 if (!ret)
1742 return ret;
1743 parent = parent->bus->self;
1744 }
5f4d91a1 1745 if (pci_is_pcie(tmp))
276dbf99
DW
1746 return device_context_mapped(iommu, tmp->subordinate->number,
1747 0);
ba395927 1748 else
276dbf99
DW
1749 return device_context_mapped(iommu, tmp->bus->number,
1750 tmp->devfn);
ba395927
KA
1751}
1752
f532959b
FY
1753/* Returns a number of VTD pages, but aligned to MM page size */
1754static inline unsigned long aligned_nrpages(unsigned long host_addr,
1755 size_t size)
1756{
1757 host_addr &= ~PAGE_MASK;
1758 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1759}
1760
6dd9a7c7
YS
1761/* Return largest possible superpage level for a given mapping */
1762static inline int hardware_largepage_caps(struct dmar_domain *domain,
1763 unsigned long iov_pfn,
1764 unsigned long phy_pfn,
1765 unsigned long pages)
1766{
1767 int support, level = 1;
1768 unsigned long pfnmerge;
1769
1770 support = domain->iommu_superpage;
1771
1772 /* To use a large page, the virtual *and* physical addresses
1773 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1774 of them will mean we have to use smaller pages. So just
1775 merge them and check both at once. */
1776 pfnmerge = iov_pfn | phy_pfn;
1777
1778 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1779 pages >>= VTD_STRIDE_SHIFT;
1780 if (!pages)
1781 break;
1782 pfnmerge >>= VTD_STRIDE_SHIFT;
1783 level++;
1784 support--;
1785 }
1786 return level;
1787}
1788
9051aa02
DW
1789static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1790 struct scatterlist *sg, unsigned long phys_pfn,
1791 unsigned long nr_pages, int prot)
e1605495
DW
1792{
1793 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 1794 phys_addr_t uninitialized_var(pteval);
e1605495 1795 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
9051aa02 1796 unsigned long sg_res;
6dd9a7c7
YS
1797 unsigned int largepage_lvl = 0;
1798 unsigned long lvl_pages = 0;
e1605495
DW
1799
1800 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1801
1802 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1803 return -EINVAL;
1804
1805 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1806
9051aa02
DW
1807 if (sg)
1808 sg_res = 0;
1809 else {
1810 sg_res = nr_pages + 1;
1811 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1812 }
1813
6dd9a7c7 1814 while (nr_pages > 0) {
c85994e4
DW
1815 uint64_t tmp;
1816
e1605495 1817 if (!sg_res) {
f532959b 1818 sg_res = aligned_nrpages(sg->offset, sg->length);
e1605495
DW
1819 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1820 sg->dma_length = sg->length;
1821 pteval = page_to_phys(sg_page(sg)) | prot;
6dd9a7c7 1822 phys_pfn = pteval >> VTD_PAGE_SHIFT;
e1605495 1823 }
6dd9a7c7 1824
e1605495 1825 if (!pte) {
6dd9a7c7
YS
1826 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
1827
1828 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl);
e1605495
DW
1829 if (!pte)
1830 return -ENOMEM;
6dd9a7c7 1831 /* It is large page*/
6491d4d0 1832 if (largepage_lvl > 1) {
6dd9a7c7 1833 pteval |= DMA_PTE_LARGE_PAGE;
6491d4d0
WD
1834 /* Ensure that old small page tables are removed to make room
1835 for superpage, if they exist. */
1836 dma_pte_clear_range(domain, iov_pfn,
1837 iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
1838 dma_pte_free_pagetable(domain, iov_pfn,
1839 iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
1840 } else {
6dd9a7c7 1841 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
6491d4d0 1842 }
6dd9a7c7 1843
e1605495
DW
1844 }
1845 /* We don't need lock here, nobody else
1846 * touches the iova range
1847 */
7766a3fb 1848 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 1849 if (tmp) {
1bf20f0d 1850 static int dumps = 5;
c85994e4
DW
1851 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1852 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
1853 if (dumps) {
1854 dumps--;
1855 debug_dma_dump_mappings(NULL);
1856 }
1857 WARN_ON(1);
1858 }
6dd9a7c7
YS
1859
1860 lvl_pages = lvl_to_nr_pages(largepage_lvl);
1861
1862 BUG_ON(nr_pages < lvl_pages);
1863 BUG_ON(sg_res < lvl_pages);
1864
1865 nr_pages -= lvl_pages;
1866 iov_pfn += lvl_pages;
1867 phys_pfn += lvl_pages;
1868 pteval += lvl_pages * VTD_PAGE_SIZE;
1869 sg_res -= lvl_pages;
1870
1871 /* If the next PTE would be the first in a new page, then we
1872 need to flush the cache on the entries we've just written.
1873 And then we'll need to recalculate 'pte', so clear it and
1874 let it get set again in the if (!pte) block above.
1875
1876 If we're done (!nr_pages) we need to flush the cache too.
1877
1878 Also if we've been setting superpages, we may need to
1879 recalculate 'pte' and switch back to smaller pages for the
1880 end of the mapping, if the trailing size is not enough to
1881 use another superpage (i.e. sg_res < lvl_pages). */
e1605495 1882 pte++;
6dd9a7c7
YS
1883 if (!nr_pages || first_pte_in_page(pte) ||
1884 (largepage_lvl > 1 && sg_res < lvl_pages)) {
e1605495
DW
1885 domain_flush_cache(domain, first_pte,
1886 (void *)pte - (void *)first_pte);
1887 pte = NULL;
1888 }
6dd9a7c7
YS
1889
1890 if (!sg_res && nr_pages)
e1605495
DW
1891 sg = sg_next(sg);
1892 }
1893 return 0;
1894}
1895
9051aa02
DW
1896static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1897 struct scatterlist *sg, unsigned long nr_pages,
1898 int prot)
ba395927 1899{
9051aa02
DW
1900 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1901}
6f6a00e4 1902
9051aa02
DW
1903static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1904 unsigned long phys_pfn, unsigned long nr_pages,
1905 int prot)
1906{
1907 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
1908}
1909
c7151a8d 1910static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 1911{
c7151a8d
WH
1912 if (!iommu)
1913 return;
8c11e798
WH
1914
1915 clear_context_table(iommu, bus, devfn);
1916 iommu->flush.flush_context(iommu, 0, 0, 0,
4c25a2c1 1917 DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 1918 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
ba395927
KA
1919}
1920
109b9b04
DW
1921static inline void unlink_domain_info(struct device_domain_info *info)
1922{
1923 assert_spin_locked(&device_domain_lock);
1924 list_del(&info->link);
1925 list_del(&info->global);
1926 if (info->dev)
1927 info->dev->dev.archdata.iommu = NULL;
1928}
1929
ba395927
KA
1930static void domain_remove_dev_info(struct dmar_domain *domain)
1931{
1932 struct device_domain_info *info;
1933 unsigned long flags;
c7151a8d 1934 struct intel_iommu *iommu;
ba395927
KA
1935
1936 spin_lock_irqsave(&device_domain_lock, flags);
1937 while (!list_empty(&domain->devices)) {
1938 info = list_entry(domain->devices.next,
1939 struct device_domain_info, link);
109b9b04 1940 unlink_domain_info(info);
ba395927
KA
1941 spin_unlock_irqrestore(&device_domain_lock, flags);
1942
93a23a72 1943 iommu_disable_dev_iotlb(info);
276dbf99 1944 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
c7151a8d 1945 iommu_detach_dev(iommu, info->bus, info->devfn);
ba395927
KA
1946 free_devinfo_mem(info);
1947
1948 spin_lock_irqsave(&device_domain_lock, flags);
1949 }
1950 spin_unlock_irqrestore(&device_domain_lock, flags);
1951}
1952
1953/*
1954 * find_domain
358dd8ac 1955 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
ba395927 1956 */
38717946 1957static struct dmar_domain *
ba395927
KA
1958find_domain(struct pci_dev *pdev)
1959{
1960 struct device_domain_info *info;
1961
1962 /* No lock here, assumes no domain exit in normal case */
358dd8ac 1963 info = pdev->dev.archdata.iommu;
ba395927
KA
1964 if (info)
1965 return info->domain;
1966 return NULL;
1967}
1968
ba395927
KA
1969/* domain is initialized */
1970static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1971{
1972 struct dmar_domain *domain, *found = NULL;
1973 struct intel_iommu *iommu;
1974 struct dmar_drhd_unit *drhd;
1975 struct device_domain_info *info, *tmp;
1976 struct pci_dev *dev_tmp;
1977 unsigned long flags;
1978 int bus = 0, devfn = 0;
276dbf99 1979 int segment;
2c2e2c38 1980 int ret;
ba395927
KA
1981
1982 domain = find_domain(pdev);
1983 if (domain)
1984 return domain;
1985
276dbf99
DW
1986 segment = pci_domain_nr(pdev->bus);
1987
ba395927
KA
1988 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1989 if (dev_tmp) {
5f4d91a1 1990 if (pci_is_pcie(dev_tmp)) {
ba395927
KA
1991 bus = dev_tmp->subordinate->number;
1992 devfn = 0;
1993 } else {
1994 bus = dev_tmp->bus->number;
1995 devfn = dev_tmp->devfn;
1996 }
1997 spin_lock_irqsave(&device_domain_lock, flags);
1998 list_for_each_entry(info, &device_domain_list, global) {
276dbf99
DW
1999 if (info->segment == segment &&
2000 info->bus == bus && info->devfn == devfn) {
ba395927
KA
2001 found = info->domain;
2002 break;
2003 }
2004 }
2005 spin_unlock_irqrestore(&device_domain_lock, flags);
2006 /* pcie-pci bridge already has a domain, uses it */
2007 if (found) {
2008 domain = found;
2009 goto found_domain;
2010 }
2011 }
2012
2c2e2c38
FY
2013 domain = alloc_domain();
2014 if (!domain)
2015 goto error;
2016
ba395927
KA
2017 /* Allocate new domain for the device */
2018 drhd = dmar_find_matched_drhd_unit(pdev);
2019 if (!drhd) {
2020 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
2021 pci_name(pdev));
d2900bd6 2022 free_domain_mem(domain);
ba395927
KA
2023 return NULL;
2024 }
2025 iommu = drhd->iommu;
2026
2c2e2c38
FY
2027 ret = iommu_attach_domain(domain, iommu);
2028 if (ret) {
2fe9723d 2029 free_domain_mem(domain);
ba395927 2030 goto error;
2c2e2c38 2031 }
ba395927
KA
2032
2033 if (domain_init(domain, gaw)) {
2034 domain_exit(domain);
2035 goto error;
2036 }
2037
2038 /* register pcie-to-pci device */
2039 if (dev_tmp) {
2040 info = alloc_devinfo_mem();
2041 if (!info) {
2042 domain_exit(domain);
2043 goto error;
2044 }
276dbf99 2045 info->segment = segment;
ba395927
KA
2046 info->bus = bus;
2047 info->devfn = devfn;
2048 info->dev = NULL;
2049 info->domain = domain;
2050 /* This domain is shared by devices under p2p bridge */
3b5410e7 2051 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
ba395927
KA
2052
2053 /* pcie-to-pci bridge already has a domain, uses it */
2054 found = NULL;
2055 spin_lock_irqsave(&device_domain_lock, flags);
2056 list_for_each_entry(tmp, &device_domain_list, global) {
276dbf99
DW
2057 if (tmp->segment == segment &&
2058 tmp->bus == bus && tmp->devfn == devfn) {
ba395927
KA
2059 found = tmp->domain;
2060 break;
2061 }
2062 }
2063 if (found) {
00dfff77 2064 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927
KA
2065 free_devinfo_mem(info);
2066 domain_exit(domain);
2067 domain = found;
2068 } else {
2069 list_add(&info->link, &domain->devices);
2070 list_add(&info->global, &device_domain_list);
00dfff77 2071 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927 2072 }
ba395927
KA
2073 }
2074
2075found_domain:
2076 info = alloc_devinfo_mem();
2077 if (!info)
2078 goto error;
276dbf99 2079 info->segment = segment;
ba395927
KA
2080 info->bus = pdev->bus->number;
2081 info->devfn = pdev->devfn;
2082 info->dev = pdev;
2083 info->domain = domain;
2084 spin_lock_irqsave(&device_domain_lock, flags);
2085 /* somebody is fast */
2086 found = find_domain(pdev);
2087 if (found != NULL) {
2088 spin_unlock_irqrestore(&device_domain_lock, flags);
2089 if (found != domain) {
2090 domain_exit(domain);
2091 domain = found;
2092 }
2093 free_devinfo_mem(info);
2094 return domain;
2095 }
2096 list_add(&info->link, &domain->devices);
2097 list_add(&info->global, &device_domain_list);
358dd8ac 2098 pdev->dev.archdata.iommu = info;
ba395927
KA
2099 spin_unlock_irqrestore(&device_domain_lock, flags);
2100 return domain;
2101error:
2102 /* recheck it here, maybe others set it */
2103 return find_domain(pdev);
2104}
2105
2c2e2c38 2106static int iommu_identity_mapping;
e0fc7e0b
DW
2107#define IDENTMAP_ALL 1
2108#define IDENTMAP_GFX 2
2109#define IDENTMAP_AZALIA 4
2c2e2c38 2110
b213203e
DW
2111static int iommu_domain_identity_map(struct dmar_domain *domain,
2112 unsigned long long start,
2113 unsigned long long end)
ba395927 2114{
c5395d5c
DW
2115 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2116 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2117
2118 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2119 dma_to_mm_pfn(last_vpfn))) {
ba395927 2120 printk(KERN_ERR "IOMMU: reserve iova failed\n");
b213203e 2121 return -ENOMEM;
ba395927
KA
2122 }
2123
c5395d5c
DW
2124 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2125 start, end, domain->id);
ba395927
KA
2126 /*
2127 * RMRR range might have overlap with physical memory range,
2128 * clear it first
2129 */
c5395d5c 2130 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 2131
c5395d5c
DW
2132 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2133 last_vpfn - first_vpfn + 1,
61df7443 2134 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
2135}
2136
2137static int iommu_prepare_identity_map(struct pci_dev *pdev,
2138 unsigned long long start,
2139 unsigned long long end)
2140{
2141 struct dmar_domain *domain;
2142 int ret;
2143
c7ab48d2 2144 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
b213203e
DW
2145 if (!domain)
2146 return -ENOMEM;
2147
19943b0e
DW
2148 /* For _hardware_ passthrough, don't bother. But for software
2149 passthrough, we do it anyway -- it may indicate a memory
2150 range which is reserved in E820, so which didn't get set
2151 up to start with in si_domain */
2152 if (domain == si_domain && hw_pass_through) {
2153 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2154 pci_name(pdev), start, end);
2155 return 0;
2156 }
2157
2158 printk(KERN_INFO
2159 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2160 pci_name(pdev), start, end);
2ff729f5 2161
5595b528
DW
2162 if (end < start) {
2163 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2164 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2165 dmi_get_system_info(DMI_BIOS_VENDOR),
2166 dmi_get_system_info(DMI_BIOS_VERSION),
2167 dmi_get_system_info(DMI_PRODUCT_VERSION));
2168 ret = -EIO;
2169 goto error;
2170 }
2171
2ff729f5
DW
2172 if (end >> agaw_to_width(domain->agaw)) {
2173 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2174 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2175 agaw_to_width(domain->agaw),
2176 dmi_get_system_info(DMI_BIOS_VENDOR),
2177 dmi_get_system_info(DMI_BIOS_VERSION),
2178 dmi_get_system_info(DMI_PRODUCT_VERSION));
2179 ret = -EIO;
2180 goto error;
2181 }
19943b0e 2182
b213203e 2183 ret = iommu_domain_identity_map(domain, start, end);
ba395927
KA
2184 if (ret)
2185 goto error;
2186
2187 /* context entry init */
4ed0d3e6 2188 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
b213203e
DW
2189 if (ret)
2190 goto error;
2191
2192 return 0;
2193
2194 error:
ba395927
KA
2195 domain_exit(domain);
2196 return ret;
ba395927
KA
2197}
2198
2199static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2200 struct pci_dev *pdev)
2201{
358dd8ac 2202 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
2203 return 0;
2204 return iommu_prepare_identity_map(pdev, rmrr->base_address,
70e535d1 2205 rmrr->end_address);
ba395927
KA
2206}
2207
d3f13810 2208#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
49a0429e
KA
2209static inline void iommu_prepare_isa(void)
2210{
2211 struct pci_dev *pdev;
2212 int ret;
2213
2214 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2215 if (!pdev)
2216 return;
2217
c7ab48d2 2218 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
70e535d1 2219 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1);
49a0429e
KA
2220
2221 if (ret)
c7ab48d2
DW
2222 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2223 "floppy might not work\n");
49a0429e
KA
2224
2225}
2226#else
2227static inline void iommu_prepare_isa(void)
2228{
2229 return;
2230}
d3f13810 2231#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
49a0429e 2232
2c2e2c38 2233static int md_domain_init(struct dmar_domain *domain, int guest_width);
c7ab48d2 2234
071e1374 2235static int __init si_domain_init(int hw)
2c2e2c38
FY
2236{
2237 struct dmar_drhd_unit *drhd;
2238 struct intel_iommu *iommu;
c7ab48d2 2239 int nid, ret = 0;
2c2e2c38
FY
2240
2241 si_domain = alloc_domain();
2242 if (!si_domain)
2243 return -EFAULT;
2244
c7ab48d2 2245 pr_debug("Identity mapping domain is domain %d\n", si_domain->id);
2c2e2c38
FY
2246
2247 for_each_active_iommu(iommu, drhd) {
2248 ret = iommu_attach_domain(si_domain, iommu);
2249 if (ret) {
2250 domain_exit(si_domain);
2251 return -EFAULT;
2252 }
2253 }
2254
2255 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2256 domain_exit(si_domain);
2257 return -EFAULT;
2258 }
2259
2260 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2261
19943b0e
DW
2262 if (hw)
2263 return 0;
2264
c7ab48d2 2265 for_each_online_node(nid) {
5dfe8660
TH
2266 unsigned long start_pfn, end_pfn;
2267 int i;
2268
2269 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2270 ret = iommu_domain_identity_map(si_domain,
2271 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2272 if (ret)
2273 return ret;
2274 }
c7ab48d2
DW
2275 }
2276
2c2e2c38
FY
2277 return 0;
2278}
2279
2280static void domain_remove_one_dev_info(struct dmar_domain *domain,
2281 struct pci_dev *pdev);
2282static int identity_mapping(struct pci_dev *pdev)
2283{
2284 struct device_domain_info *info;
2285
2286 if (likely(!iommu_identity_mapping))
2287 return 0;
2288
cb452a40
MT
2289 info = pdev->dev.archdata.iommu;
2290 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2291 return (info->domain == si_domain);
2c2e2c38 2292
2c2e2c38
FY
2293 return 0;
2294}
2295
2296static int domain_add_dev_info(struct dmar_domain *domain,
5fe60f4e
DW
2297 struct pci_dev *pdev,
2298 int translation)
2c2e2c38
FY
2299{
2300 struct device_domain_info *info;
2301 unsigned long flags;
5fe60f4e 2302 int ret;
2c2e2c38
FY
2303
2304 info = alloc_devinfo_mem();
2305 if (!info)
2306 return -ENOMEM;
2307
2308 info->segment = pci_domain_nr(pdev->bus);
2309 info->bus = pdev->bus->number;
2310 info->devfn = pdev->devfn;
2311 info->dev = pdev;
2312 info->domain = domain;
2313
2314 spin_lock_irqsave(&device_domain_lock, flags);
2315 list_add(&info->link, &domain->devices);
2316 list_add(&info->global, &device_domain_list);
2317 pdev->dev.archdata.iommu = info;
2318 spin_unlock_irqrestore(&device_domain_lock, flags);
2319
e2ad23d0
DW
2320 ret = domain_context_mapping(domain, pdev, translation);
2321 if (ret) {
2322 spin_lock_irqsave(&device_domain_lock, flags);
109b9b04 2323 unlink_domain_info(info);
e2ad23d0
DW
2324 spin_unlock_irqrestore(&device_domain_lock, flags);
2325 free_devinfo_mem(info);
2326 return ret;
2327 }
2328
2c2e2c38
FY
2329 return 0;
2330}
2331
ea2447f7
TM
2332static bool device_has_rmrr(struct pci_dev *dev)
2333{
2334 struct dmar_rmrr_unit *rmrr;
2335 int i;
2336
2337 for_each_rmrr_units(rmrr) {
2338 for (i = 0; i < rmrr->devices_cnt; i++) {
2339 /*
2340 * Return TRUE if this RMRR contains the device that
2341 * is passed in.
2342 */
2343 if (rmrr->devices[i] == dev)
2344 return true;
2345 }
2346 }
2347 return false;
2348}
2349
6941af28
DW
2350static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2351{
ea2447f7
TM
2352
2353 /*
2354 * We want to prevent any device associated with an RMRR from
2355 * getting placed into the SI Domain. This is done because
2356 * problems exist when devices are moved in and out of domains
2357 * and their respective RMRR info is lost. We exempt USB devices
2358 * from this process due to their usage of RMRRs that are known
2359 * to not be needed after BIOS hand-off to OS.
2360 */
2361 if (device_has_rmrr(pdev) &&
2362 (pdev->class >> 8) != PCI_CLASS_SERIAL_USB)
2363 return 0;
2364
e0fc7e0b
DW
2365 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2366 return 1;
2367
2368 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2369 return 1;
2370
2371 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2372 return 0;
6941af28 2373
3dfc813d
DW
2374 /*
2375 * We want to start off with all devices in the 1:1 domain, and
2376 * take them out later if we find they can't access all of memory.
2377 *
2378 * However, we can't do this for PCI devices behind bridges,
2379 * because all PCI devices behind the same bridge will end up
2380 * with the same source-id on their transactions.
2381 *
2382 * Practically speaking, we can't change things around for these
2383 * devices at run-time, because we can't be sure there'll be no
2384 * DMA transactions in flight for any of their siblings.
2385 *
2386 * So PCI devices (unless they're on the root bus) as well as
2387 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2388 * the 1:1 domain, just in _case_ one of their siblings turns out
2389 * not to be able to map all of memory.
2390 */
5f4d91a1 2391 if (!pci_is_pcie(pdev)) {
3dfc813d
DW
2392 if (!pci_is_root_bus(pdev->bus))
2393 return 0;
2394 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2395 return 0;
62f87c0e 2396 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
3dfc813d
DW
2397 return 0;
2398
2399 /*
2400 * At boot time, we don't yet know if devices will be 64-bit capable.
2401 * Assume that they will -- if they turn out not to be, then we can
2402 * take them out of the 1:1 domain later.
2403 */
8fcc5372
CW
2404 if (!startup) {
2405 /*
2406 * If the device's dma_mask is less than the system's memory
2407 * size then this is not a candidate for identity mapping.
2408 */
2409 u64 dma_mask = pdev->dma_mask;
2410
2411 if (pdev->dev.coherent_dma_mask &&
2412 pdev->dev.coherent_dma_mask < dma_mask)
2413 dma_mask = pdev->dev.coherent_dma_mask;
2414
2415 return dma_mask >= dma_get_required_mask(&pdev->dev);
2416 }
6941af28
DW
2417
2418 return 1;
2419}
2420
071e1374 2421static int __init iommu_prepare_static_identity_mapping(int hw)
2c2e2c38 2422{
2c2e2c38
FY
2423 struct pci_dev *pdev = NULL;
2424 int ret;
2425
19943b0e 2426 ret = si_domain_init(hw);
2c2e2c38
FY
2427 if (ret)
2428 return -EFAULT;
2429
2c2e2c38 2430 for_each_pci_dev(pdev) {
6941af28 2431 if (iommu_should_identity_map(pdev, 1)) {
5fe60f4e 2432 ret = domain_add_dev_info(si_domain, pdev,
eae460b6
MT
2433 hw ? CONTEXT_TT_PASS_THROUGH :
2434 CONTEXT_TT_MULTI_LEVEL);
2435 if (ret) {
2436 /* device not associated with an iommu */
2437 if (ret == -ENODEV)
2438 continue;
62edf5dc 2439 return ret;
eae460b6
MT
2440 }
2441 pr_info("IOMMU: %s identity mapping for device %s\n",
2442 hw ? "hardware" : "software", pci_name(pdev));
62edf5dc 2443 }
2c2e2c38
FY
2444 }
2445
2446 return 0;
2447}
2448
b779260b 2449static int __init init_dmars(void)
ba395927
KA
2450{
2451 struct dmar_drhd_unit *drhd;
2452 struct dmar_rmrr_unit *rmrr;
2453 struct pci_dev *pdev;
2454 struct intel_iommu *iommu;
9d783ba0 2455 int i, ret;
2c2e2c38 2456
ba395927
KA
2457 /*
2458 * for each drhd
2459 * allocate root
2460 * initialize and program root entry to not present
2461 * endfor
2462 */
2463 for_each_drhd_unit(drhd) {
5e0d2a6f 2464 /*
2465 * lock not needed as this is only incremented in the single
2466 * threaded kernel __init code path all other access are read
2467 * only
2468 */
1b198bb0
MT
2469 if (g_num_of_iommus < IOMMU_UNITS_SUPPORTED) {
2470 g_num_of_iommus++;
2471 continue;
2472 }
2473 printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n",
2474 IOMMU_UNITS_SUPPORTED);
5e0d2a6f 2475 }
2476
d9630fe9
WH
2477 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2478 GFP_KERNEL);
2479 if (!g_iommus) {
2480 printk(KERN_ERR "Allocating global iommu array failed\n");
2481 ret = -ENOMEM;
2482 goto error;
2483 }
2484
80b20dd8 2485 deferred_flush = kzalloc(g_num_of_iommus *
2486 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2487 if (!deferred_flush) {
5e0d2a6f 2488 ret = -ENOMEM;
2489 goto error;
2490 }
2491
5e0d2a6f 2492 for_each_drhd_unit(drhd) {
2493 if (drhd->ignored)
2494 continue;
1886e8a9
SS
2495
2496 iommu = drhd->iommu;
d9630fe9 2497 g_iommus[iommu->seq_id] = iommu;
ba395927 2498
e61d98d8
SS
2499 ret = iommu_init_domains(iommu);
2500 if (ret)
2501 goto error;
2502
ba395927
KA
2503 /*
2504 * TBD:
2505 * we could share the same root & context tables
25985edc 2506 * among all IOMMU's. Need to Split it later.
ba395927
KA
2507 */
2508 ret = iommu_alloc_root_entry(iommu);
2509 if (ret) {
2510 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2511 goto error;
2512 }
4ed0d3e6 2513 if (!ecap_pass_through(iommu->ecap))
19943b0e 2514 hw_pass_through = 0;
ba395927
KA
2515 }
2516
1531a6a6
SS
2517 /*
2518 * Start from the sane iommu hardware state.
2519 */
a77b67d4
YS
2520 for_each_drhd_unit(drhd) {
2521 if (drhd->ignored)
2522 continue;
2523
2524 iommu = drhd->iommu;
1531a6a6
SS
2525
2526 /*
2527 * If the queued invalidation is already initialized by us
2528 * (for example, while enabling interrupt-remapping) then
2529 * we got the things already rolling from a sane state.
2530 */
2531 if (iommu->qi)
2532 continue;
2533
2534 /*
2535 * Clear any previous faults.
2536 */
2537 dmar_fault(-1, iommu);
2538 /*
2539 * Disable queued invalidation if supported and already enabled
2540 * before OS handover.
2541 */
2542 dmar_disable_qi(iommu);
2543 }
2544
2545 for_each_drhd_unit(drhd) {
2546 if (drhd->ignored)
2547 continue;
2548
2549 iommu = drhd->iommu;
2550
a77b67d4
YS
2551 if (dmar_enable_qi(iommu)) {
2552 /*
2553 * Queued Invalidate not enabled, use Register Based
2554 * Invalidate
2555 */
2556 iommu->flush.flush_context = __iommu_flush_context;
2557 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
680a7524 2558 printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
b4e0f9eb 2559 "invalidation\n",
680a7524 2560 iommu->seq_id,
b4e0f9eb 2561 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2562 } else {
2563 iommu->flush.flush_context = qi_flush_context;
2564 iommu->flush.flush_iotlb = qi_flush_iotlb;
680a7524 2565 printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
b4e0f9eb 2566 "invalidation\n",
680a7524 2567 iommu->seq_id,
b4e0f9eb 2568 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2569 }
2570 }
2571
19943b0e 2572 if (iommu_pass_through)
e0fc7e0b
DW
2573 iommu_identity_mapping |= IDENTMAP_ALL;
2574
d3f13810 2575#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
e0fc7e0b 2576 iommu_identity_mapping |= IDENTMAP_GFX;
19943b0e 2577#endif
e0fc7e0b
DW
2578
2579 check_tylersburg_isoch();
2580
ba395927 2581 /*
19943b0e
DW
2582 * If pass through is not set or not enabled, setup context entries for
2583 * identity mappings for rmrr, gfx, and isa and may fall back to static
2584 * identity mapping if iommu_identity_mapping is set.
ba395927 2585 */
19943b0e
DW
2586 if (iommu_identity_mapping) {
2587 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
4ed0d3e6 2588 if (ret) {
19943b0e
DW
2589 printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2590 goto error;
ba395927
KA
2591 }
2592 }
ba395927 2593 /*
19943b0e
DW
2594 * For each rmrr
2595 * for each dev attached to rmrr
2596 * do
2597 * locate drhd for dev, alloc domain for dev
2598 * allocate free domain
2599 * allocate page table entries for rmrr
2600 * if context not allocated for bus
2601 * allocate and init context
2602 * set present in root table for this bus
2603 * init context with domain, translation etc
2604 * endfor
2605 * endfor
ba395927 2606 */
19943b0e
DW
2607 printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2608 for_each_rmrr_units(rmrr) {
2609 for (i = 0; i < rmrr->devices_cnt; i++) {
2610 pdev = rmrr->devices[i];
2611 /*
2612 * some BIOS lists non-exist devices in DMAR
2613 * table.
2614 */
2615 if (!pdev)
2616 continue;
2617 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2618 if (ret)
2619 printk(KERN_ERR
2620 "IOMMU: mapping reserved region failed\n");
ba395927 2621 }
4ed0d3e6 2622 }
49a0429e 2623
19943b0e
DW
2624 iommu_prepare_isa();
2625
ba395927
KA
2626 /*
2627 * for each drhd
2628 * enable fault log
2629 * global invalidate context cache
2630 * global invalidate iotlb
2631 * enable translation
2632 */
2633 for_each_drhd_unit(drhd) {
51a63e67
JC
2634 if (drhd->ignored) {
2635 /*
2636 * we always have to disable PMRs or DMA may fail on
2637 * this device
2638 */
2639 if (force_on)
2640 iommu_disable_protect_mem_regions(drhd->iommu);
ba395927 2641 continue;
51a63e67 2642 }
ba395927 2643 iommu = drhd->iommu;
ba395927
KA
2644
2645 iommu_flush_write_buffer(iommu);
2646
3460a6d9
KA
2647 ret = dmar_set_interrupt(iommu);
2648 if (ret)
2649 goto error;
2650
ba395927
KA
2651 iommu_set_root_entry(iommu);
2652
4c25a2c1 2653 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 2654 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
f8bab735 2655
ba395927
KA
2656 ret = iommu_enable_translation(iommu);
2657 if (ret)
2658 goto error;
b94996c9
DW
2659
2660 iommu_disable_protect_mem_regions(iommu);
ba395927
KA
2661 }
2662
2663 return 0;
2664error:
2665 for_each_drhd_unit(drhd) {
2666 if (drhd->ignored)
2667 continue;
2668 iommu = drhd->iommu;
2669 free_iommu(iommu);
2670 }
d9630fe9 2671 kfree(g_iommus);
ba395927
KA
2672 return ret;
2673}
2674
5a5e02a6 2675/* This takes a number of _MM_ pages, not VTD pages */
875764de
DW
2676static struct iova *intel_alloc_iova(struct device *dev,
2677 struct dmar_domain *domain,
2678 unsigned long nrpages, uint64_t dma_mask)
ba395927 2679{
ba395927 2680 struct pci_dev *pdev = to_pci_dev(dev);
ba395927 2681 struct iova *iova = NULL;
ba395927 2682
875764de
DW
2683 /* Restrict dma_mask to the width that the iommu can handle */
2684 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2685
2686 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
2687 /*
2688 * First try to allocate an io virtual address in
284901a9 2689 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 2690 * from higher range
ba395927 2691 */
875764de
DW
2692 iova = alloc_iova(&domain->iovad, nrpages,
2693 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2694 if (iova)
2695 return iova;
2696 }
2697 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2698 if (unlikely(!iova)) {
2699 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2700 nrpages, pci_name(pdev));
f76aec76
KA
2701 return NULL;
2702 }
2703
2704 return iova;
2705}
2706
147202aa 2707static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)
f76aec76
KA
2708{
2709 struct dmar_domain *domain;
2710 int ret;
2711
2712 domain = get_domain_for_dev(pdev,
2713 DEFAULT_DOMAIN_ADDRESS_WIDTH);
2714 if (!domain) {
2715 printk(KERN_ERR
2716 "Allocating domain for %s failed", pci_name(pdev));
4fe05bbc 2717 return NULL;
ba395927
KA
2718 }
2719
2720 /* make sure context mapping is ok */
5331fe6f 2721 if (unlikely(!domain_context_mapped(pdev))) {
4ed0d3e6
FY
2722 ret = domain_context_mapping(domain, pdev,
2723 CONTEXT_TT_MULTI_LEVEL);
f76aec76
KA
2724 if (ret) {
2725 printk(KERN_ERR
2726 "Domain context map for %s failed",
2727 pci_name(pdev));
4fe05bbc 2728 return NULL;
f76aec76 2729 }
ba395927
KA
2730 }
2731
f76aec76
KA
2732 return domain;
2733}
2734
147202aa
DW
2735static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev)
2736{
2737 struct device_domain_info *info;
2738
2739 /* No lock here, assumes no domain exit in normal case */
2740 info = dev->dev.archdata.iommu;
2741 if (likely(info))
2742 return info->domain;
2743
2744 return __get_valid_domain_for_dev(dev);
2745}
2746
2c2e2c38
FY
2747static int iommu_dummy(struct pci_dev *pdev)
2748{
2749 return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2750}
2751
2752/* Check if the pdev needs to go through non-identity map and unmap process.*/
73676832 2753static int iommu_no_mapping(struct device *dev)
2c2e2c38 2754{
73676832 2755 struct pci_dev *pdev;
2c2e2c38
FY
2756 int found;
2757
73676832
DW
2758 if (unlikely(dev->bus != &pci_bus_type))
2759 return 1;
2760
2761 pdev = to_pci_dev(dev);
1e4c64c4
DW
2762 if (iommu_dummy(pdev))
2763 return 1;
2764
2c2e2c38 2765 if (!iommu_identity_mapping)
1e4c64c4 2766 return 0;
2c2e2c38
FY
2767
2768 found = identity_mapping(pdev);
2769 if (found) {
6941af28 2770 if (iommu_should_identity_map(pdev, 0))
2c2e2c38
FY
2771 return 1;
2772 else {
2773 /*
2774 * 32 bit DMA is removed from si_domain and fall back
2775 * to non-identity mapping.
2776 */
2777 domain_remove_one_dev_info(si_domain, pdev);
2778 printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2779 pci_name(pdev));
2780 return 0;
2781 }
2782 } else {
2783 /*
2784 * In case of a detached 64 bit DMA device from vm, the device
2785 * is put into si_domain for identity mapping.
2786 */
6941af28 2787 if (iommu_should_identity_map(pdev, 0)) {
2c2e2c38 2788 int ret;
5fe60f4e
DW
2789 ret = domain_add_dev_info(si_domain, pdev,
2790 hw_pass_through ?
2791 CONTEXT_TT_PASS_THROUGH :
2792 CONTEXT_TT_MULTI_LEVEL);
2c2e2c38
FY
2793 if (!ret) {
2794 printk(KERN_INFO "64bit %s uses identity mapping\n",
2795 pci_name(pdev));
2796 return 1;
2797 }
2798 }
2799 }
2800
1e4c64c4 2801 return 0;
2c2e2c38
FY
2802}
2803
bb9e6d65
FT
2804static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2805 size_t size, int dir, u64 dma_mask)
f76aec76
KA
2806{
2807 struct pci_dev *pdev = to_pci_dev(hwdev);
f76aec76 2808 struct dmar_domain *domain;
5b6985ce 2809 phys_addr_t start_paddr;
f76aec76
KA
2810 struct iova *iova;
2811 int prot = 0;
6865f0d1 2812 int ret;
8c11e798 2813 struct intel_iommu *iommu;
33041ec0 2814 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
2815
2816 BUG_ON(dir == DMA_NONE);
2c2e2c38 2817
73676832 2818 if (iommu_no_mapping(hwdev))
6865f0d1 2819 return paddr;
f76aec76
KA
2820
2821 domain = get_valid_domain_for_dev(pdev);
2822 if (!domain)
2823 return 0;
2824
8c11e798 2825 iommu = domain_get_iommu(domain);
88cb6a74 2826 size = aligned_nrpages(paddr, size);
f76aec76 2827
c681d0ba 2828 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask);
f76aec76
KA
2829 if (!iova)
2830 goto error;
2831
ba395927
KA
2832 /*
2833 * Check if DMAR supports zero-length reads on write only
2834 * mappings..
2835 */
2836 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 2837 !cap_zlr(iommu->cap))
ba395927
KA
2838 prot |= DMA_PTE_READ;
2839 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2840 prot |= DMA_PTE_WRITE;
2841 /*
6865f0d1 2842 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 2843 * page. Note: if two part of one page are separately mapped, we
6865f0d1 2844 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
2845 * is not a big problem
2846 */
0ab36de2 2847 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
33041ec0 2848 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
2849 if (ret)
2850 goto error;
2851
1f0ef2aa
DW
2852 /* it's a non-present to present mapping. Only flush if caching mode */
2853 if (cap_caching_mode(iommu->cap))
82653633 2854 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 1);
1f0ef2aa 2855 else
8c11e798 2856 iommu_flush_write_buffer(iommu);
f76aec76 2857
03d6a246
DW
2858 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2859 start_paddr += paddr & ~PAGE_MASK;
2860 return start_paddr;
ba395927 2861
ba395927 2862error:
f76aec76
KA
2863 if (iova)
2864 __free_iova(&domain->iovad, iova);
4cf2e75d 2865 printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
5b6985ce 2866 pci_name(pdev), size, (unsigned long long)paddr, dir);
ba395927
KA
2867 return 0;
2868}
2869
ffbbef5c
FT
2870static dma_addr_t intel_map_page(struct device *dev, struct page *page,
2871 unsigned long offset, size_t size,
2872 enum dma_data_direction dir,
2873 struct dma_attrs *attrs)
bb9e6d65 2874{
ffbbef5c
FT
2875 return __intel_map_single(dev, page_to_phys(page) + offset, size,
2876 dir, to_pci_dev(dev)->dma_mask);
bb9e6d65
FT
2877}
2878
5e0d2a6f 2879static void flush_unmaps(void)
2880{
80b20dd8 2881 int i, j;
5e0d2a6f 2882
5e0d2a6f 2883 timer_on = 0;
2884
2885 /* just flush them all */
2886 for (i = 0; i < g_num_of_iommus; i++) {
a2bb8459
WH
2887 struct intel_iommu *iommu = g_iommus[i];
2888 if (!iommu)
2889 continue;
c42d9f32 2890
9dd2fe89
YZ
2891 if (!deferred_flush[i].next)
2892 continue;
2893
78d5f0f5
NA
2894 /* In caching mode, global flushes turn emulation expensive */
2895 if (!cap_caching_mode(iommu->cap))
2896 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
93a23a72 2897 DMA_TLB_GLOBAL_FLUSH);
9dd2fe89 2898 for (j = 0; j < deferred_flush[i].next; j++) {
93a23a72
YZ
2899 unsigned long mask;
2900 struct iova *iova = deferred_flush[i].iova[j];
78d5f0f5
NA
2901 struct dmar_domain *domain = deferred_flush[i].domain[j];
2902
2903 /* On real hardware multiple invalidations are expensive */
2904 if (cap_caching_mode(iommu->cap))
2905 iommu_flush_iotlb_psi(iommu, domain->id,
2906 iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1, 0);
2907 else {
2908 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
2909 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
2910 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
2911 }
93a23a72 2912 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
80b20dd8 2913 }
9dd2fe89 2914 deferred_flush[i].next = 0;
5e0d2a6f 2915 }
2916
5e0d2a6f 2917 list_size = 0;
5e0d2a6f 2918}
2919
2920static void flush_unmaps_timeout(unsigned long data)
2921{
80b20dd8 2922 unsigned long flags;
2923
2924 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 2925 flush_unmaps();
80b20dd8 2926 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 2927}
2928
2929static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2930{
2931 unsigned long flags;
80b20dd8 2932 int next, iommu_id;
8c11e798 2933 struct intel_iommu *iommu;
5e0d2a6f 2934
2935 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 2936 if (list_size == HIGH_WATER_MARK)
2937 flush_unmaps();
2938
8c11e798
WH
2939 iommu = domain_get_iommu(dom);
2940 iommu_id = iommu->seq_id;
c42d9f32 2941
80b20dd8 2942 next = deferred_flush[iommu_id].next;
2943 deferred_flush[iommu_id].domain[next] = dom;
2944 deferred_flush[iommu_id].iova[next] = iova;
2945 deferred_flush[iommu_id].next++;
5e0d2a6f 2946
2947 if (!timer_on) {
2948 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2949 timer_on = 1;
2950 }
2951 list_size++;
2952 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2953}
2954
ffbbef5c
FT
2955static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2956 size_t size, enum dma_data_direction dir,
2957 struct dma_attrs *attrs)
ba395927 2958{
ba395927 2959 struct pci_dev *pdev = to_pci_dev(dev);
f76aec76 2960 struct dmar_domain *domain;
d794dc9b 2961 unsigned long start_pfn, last_pfn;
ba395927 2962 struct iova *iova;
8c11e798 2963 struct intel_iommu *iommu;
ba395927 2964
73676832 2965 if (iommu_no_mapping(dev))
f76aec76 2966 return;
2c2e2c38 2967
ba395927
KA
2968 domain = find_domain(pdev);
2969 BUG_ON(!domain);
2970
8c11e798
WH
2971 iommu = domain_get_iommu(domain);
2972
ba395927 2973 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
85b98276
DW
2974 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2975 (unsigned long long)dev_addr))
ba395927 2976 return;
ba395927 2977
d794dc9b
DW
2978 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2979 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
ba395927 2980
d794dc9b
DW
2981 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2982 pci_name(pdev), start_pfn, last_pfn);
ba395927 2983
f76aec76 2984 /* clear the whole page */
d794dc9b
DW
2985 dma_pte_clear_range(domain, start_pfn, last_pfn);
2986
f76aec76 2987 /* free page tables */
d794dc9b
DW
2988 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2989
5e0d2a6f 2990 if (intel_iommu_strict) {
03d6a246 2991 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
82653633 2992 last_pfn - start_pfn + 1, 0);
5e0d2a6f 2993 /* free iova */
2994 __free_iova(&domain->iovad, iova);
2995 } else {
2996 add_unmap(domain, iova);
2997 /*
2998 * queue up the release of the unmap to save the 1/6th of the
2999 * cpu used up by the iotlb flush operation...
3000 */
5e0d2a6f 3001 }
ba395927
KA
3002}
3003
d7ab5c46 3004static void *intel_alloc_coherent(struct device *hwdev, size_t size,
baa676fc
AP
3005 dma_addr_t *dma_handle, gfp_t flags,
3006 struct dma_attrs *attrs)
ba395927
KA
3007{
3008 void *vaddr;
3009 int order;
3010
5b6985ce 3011 size = PAGE_ALIGN(size);
ba395927 3012 order = get_order(size);
e8bb910d
AW
3013
3014 if (!iommu_no_mapping(hwdev))
3015 flags &= ~(GFP_DMA | GFP_DMA32);
3016 else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) {
3017 if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32))
3018 flags |= GFP_DMA;
3019 else
3020 flags |= GFP_DMA32;
3021 }
ba395927
KA
3022
3023 vaddr = (void *)__get_free_pages(flags, order);
3024 if (!vaddr)
3025 return NULL;
3026 memset(vaddr, 0, size);
3027
bb9e6d65
FT
3028 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
3029 DMA_BIDIRECTIONAL,
3030 hwdev->coherent_dma_mask);
ba395927
KA
3031 if (*dma_handle)
3032 return vaddr;
3033 free_pages((unsigned long)vaddr, order);
3034 return NULL;
3035}
3036
d7ab5c46 3037static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
baa676fc 3038 dma_addr_t dma_handle, struct dma_attrs *attrs)
ba395927
KA
3039{
3040 int order;
3041
5b6985ce 3042 size = PAGE_ALIGN(size);
ba395927
KA
3043 order = get_order(size);
3044
0db9b7ae 3045 intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
ba395927
KA
3046 free_pages((unsigned long)vaddr, order);
3047}
3048
d7ab5c46
FT
3049static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
3050 int nelems, enum dma_data_direction dir,
3051 struct dma_attrs *attrs)
ba395927 3052{
ba395927
KA
3053 struct pci_dev *pdev = to_pci_dev(hwdev);
3054 struct dmar_domain *domain;
d794dc9b 3055 unsigned long start_pfn, last_pfn;
f76aec76 3056 struct iova *iova;
8c11e798 3057 struct intel_iommu *iommu;
ba395927 3058
73676832 3059 if (iommu_no_mapping(hwdev))
ba395927
KA
3060 return;
3061
3062 domain = find_domain(pdev);
8c11e798
WH
3063 BUG_ON(!domain);
3064
3065 iommu = domain_get_iommu(domain);
ba395927 3066
c03ab37c 3067 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
85b98276
DW
3068 if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
3069 (unsigned long long)sglist[0].dma_address))
f76aec76 3070 return;
f76aec76 3071
d794dc9b
DW
3072 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3073 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
f76aec76
KA
3074
3075 /* clear the whole page */
d794dc9b
DW
3076 dma_pte_clear_range(domain, start_pfn, last_pfn);
3077
f76aec76 3078 /* free page tables */
d794dc9b 3079 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
f76aec76 3080
acea0018
DW
3081 if (intel_iommu_strict) {
3082 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
82653633 3083 last_pfn - start_pfn + 1, 0);
acea0018
DW
3084 /* free iova */
3085 __free_iova(&domain->iovad, iova);
3086 } else {
3087 add_unmap(domain, iova);
3088 /*
3089 * queue up the release of the unmap to save the 1/6th of the
3090 * cpu used up by the iotlb flush operation...
3091 */
3092 }
ba395927
KA
3093}
3094
ba395927 3095static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 3096 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
3097{
3098 int i;
c03ab37c 3099 struct scatterlist *sg;
ba395927 3100
c03ab37c 3101 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 3102 BUG_ON(!sg_page(sg));
4cf2e75d 3103 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
c03ab37c 3104 sg->dma_length = sg->length;
ba395927
KA
3105 }
3106 return nelems;
3107}
3108
d7ab5c46
FT
3109static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
3110 enum dma_data_direction dir, struct dma_attrs *attrs)
ba395927 3111{
ba395927 3112 int i;
ba395927
KA
3113 struct pci_dev *pdev = to_pci_dev(hwdev);
3114 struct dmar_domain *domain;
f76aec76
KA
3115 size_t size = 0;
3116 int prot = 0;
f76aec76
KA
3117 struct iova *iova = NULL;
3118 int ret;
c03ab37c 3119 struct scatterlist *sg;
b536d24d 3120 unsigned long start_vpfn;
8c11e798 3121 struct intel_iommu *iommu;
ba395927
KA
3122
3123 BUG_ON(dir == DMA_NONE);
73676832 3124 if (iommu_no_mapping(hwdev))
c03ab37c 3125 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
ba395927 3126
f76aec76
KA
3127 domain = get_valid_domain_for_dev(pdev);
3128 if (!domain)
3129 return 0;
3130
8c11e798
WH
3131 iommu = domain_get_iommu(domain);
3132
b536d24d 3133 for_each_sg(sglist, sg, nelems, i)
88cb6a74 3134 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 3135
5a5e02a6
DW
3136 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
3137 pdev->dma_mask);
f76aec76 3138 if (!iova) {
c03ab37c 3139 sglist->dma_length = 0;
f76aec76
KA
3140 return 0;
3141 }
3142
3143 /*
3144 * Check if DMAR supports zero-length reads on write only
3145 * mappings..
3146 */
3147 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3148 !cap_zlr(iommu->cap))
f76aec76
KA
3149 prot |= DMA_PTE_READ;
3150 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3151 prot |= DMA_PTE_WRITE;
3152
b536d24d 3153 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
e1605495 3154
f532959b 3155 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495
DW
3156 if (unlikely(ret)) {
3157 /* clear the page */
3158 dma_pte_clear_range(domain, start_vpfn,
3159 start_vpfn + size - 1);
3160 /* free page tables */
3161 dma_pte_free_pagetable(domain, start_vpfn,
3162 start_vpfn + size - 1);
3163 /* free iova */
3164 __free_iova(&domain->iovad, iova);
3165 return 0;
ba395927
KA
3166 }
3167
1f0ef2aa
DW
3168 /* it's a non-present to present mapping. Only flush if caching mode */
3169 if (cap_caching_mode(iommu->cap))
82653633 3170 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 1);
1f0ef2aa 3171 else
8c11e798 3172 iommu_flush_write_buffer(iommu);
1f0ef2aa 3173
ba395927
KA
3174 return nelems;
3175}
3176
dfb805e8
FT
3177static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3178{
3179 return !dma_addr;
3180}
3181
160c1d8e 3182struct dma_map_ops intel_dma_ops = {
baa676fc
AP
3183 .alloc = intel_alloc_coherent,
3184 .free = intel_free_coherent,
ba395927
KA
3185 .map_sg = intel_map_sg,
3186 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
3187 .map_page = intel_map_page,
3188 .unmap_page = intel_unmap_page,
dfb805e8 3189 .mapping_error = intel_mapping_error,
ba395927
KA
3190};
3191
3192static inline int iommu_domain_cache_init(void)
3193{
3194 int ret = 0;
3195
3196 iommu_domain_cache = kmem_cache_create("iommu_domain",
3197 sizeof(struct dmar_domain),
3198 0,
3199 SLAB_HWCACHE_ALIGN,
3200
3201 NULL);
3202 if (!iommu_domain_cache) {
3203 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3204 ret = -ENOMEM;
3205 }
3206
3207 return ret;
3208}
3209
3210static inline int iommu_devinfo_cache_init(void)
3211{
3212 int ret = 0;
3213
3214 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3215 sizeof(struct device_domain_info),
3216 0,
3217 SLAB_HWCACHE_ALIGN,
ba395927
KA
3218 NULL);
3219 if (!iommu_devinfo_cache) {
3220 printk(KERN_ERR "Couldn't create devinfo cache\n");
3221 ret = -ENOMEM;
3222 }
3223
3224 return ret;
3225}
3226
3227static inline int iommu_iova_cache_init(void)
3228{
3229 int ret = 0;
3230
3231 iommu_iova_cache = kmem_cache_create("iommu_iova",
3232 sizeof(struct iova),
3233 0,
3234 SLAB_HWCACHE_ALIGN,
ba395927
KA
3235 NULL);
3236 if (!iommu_iova_cache) {
3237 printk(KERN_ERR "Couldn't create iova cache\n");
3238 ret = -ENOMEM;
3239 }
3240
3241 return ret;
3242}
3243
3244static int __init iommu_init_mempool(void)
3245{
3246 int ret;
3247 ret = iommu_iova_cache_init();
3248 if (ret)
3249 return ret;
3250
3251 ret = iommu_domain_cache_init();
3252 if (ret)
3253 goto domain_error;
3254
3255 ret = iommu_devinfo_cache_init();
3256 if (!ret)
3257 return ret;
3258
3259 kmem_cache_destroy(iommu_domain_cache);
3260domain_error:
3261 kmem_cache_destroy(iommu_iova_cache);
3262
3263 return -ENOMEM;
3264}
3265
3266static void __init iommu_exit_mempool(void)
3267{
3268 kmem_cache_destroy(iommu_devinfo_cache);
3269 kmem_cache_destroy(iommu_domain_cache);
3270 kmem_cache_destroy(iommu_iova_cache);
3271
3272}
3273
556ab45f
DW
3274static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3275{
3276 struct dmar_drhd_unit *drhd;
3277 u32 vtbar;
3278 int rc;
3279
3280 /* We know that this device on this chipset has its own IOMMU.
3281 * If we find it under a different IOMMU, then the BIOS is lying
3282 * to us. Hope that the IOMMU for this device is actually
3283 * disabled, and it needs no translation...
3284 */
3285 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3286 if (rc) {
3287 /* "can't" happen */
3288 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3289 return;
3290 }
3291 vtbar &= 0xffff0000;
3292
3293 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3294 drhd = dmar_find_matched_drhd_unit(pdev);
3295 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3296 TAINT_FIRMWARE_WORKAROUND,
3297 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3298 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3299}
3300DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3301
ba395927
KA
3302static void __init init_no_remapping_devices(void)
3303{
3304 struct dmar_drhd_unit *drhd;
3305
3306 for_each_drhd_unit(drhd) {
3307 if (!drhd->include_all) {
3308 int i;
3309 for (i = 0; i < drhd->devices_cnt; i++)
3310 if (drhd->devices[i] != NULL)
3311 break;
3312 /* ignore DMAR unit if no pci devices exist */
3313 if (i == drhd->devices_cnt)
3314 drhd->ignored = 1;
3315 }
3316 }
3317
ba395927
KA
3318 for_each_drhd_unit(drhd) {
3319 int i;
3320 if (drhd->ignored || drhd->include_all)
3321 continue;
3322
3323 for (i = 0; i < drhd->devices_cnt; i++)
3324 if (drhd->devices[i] &&
c0771df8 3325 !IS_GFX_DEVICE(drhd->devices[i]))
ba395927
KA
3326 break;
3327
3328 if (i < drhd->devices_cnt)
3329 continue;
3330
c0771df8
DW
3331 /* This IOMMU has *only* gfx devices. Either bypass it or
3332 set the gfx_mapped flag, as appropriate */
3333 if (dmar_map_gfx) {
3334 intel_iommu_gfx_mapped = 1;
3335 } else {
3336 drhd->ignored = 1;
3337 for (i = 0; i < drhd->devices_cnt; i++) {
3338 if (!drhd->devices[i])
3339 continue;
3340 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3341 }
ba395927
KA
3342 }
3343 }
3344}
3345
f59c7b69
FY
3346#ifdef CONFIG_SUSPEND
3347static int init_iommu_hw(void)
3348{
3349 struct dmar_drhd_unit *drhd;
3350 struct intel_iommu *iommu = NULL;
3351
3352 for_each_active_iommu(iommu, drhd)
3353 if (iommu->qi)
3354 dmar_reenable_qi(iommu);
3355
b779260b
JC
3356 for_each_iommu(iommu, drhd) {
3357 if (drhd->ignored) {
3358 /*
3359 * we always have to disable PMRs or DMA may fail on
3360 * this device
3361 */
3362 if (force_on)
3363 iommu_disable_protect_mem_regions(iommu);
3364 continue;
3365 }
3366
f59c7b69
FY
3367 iommu_flush_write_buffer(iommu);
3368
3369 iommu_set_root_entry(iommu);
3370
3371 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3372 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3373 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3374 DMA_TLB_GLOBAL_FLUSH);
b779260b
JC
3375 if (iommu_enable_translation(iommu))
3376 return 1;
b94996c9 3377 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
3378 }
3379
3380 return 0;
3381}
3382
3383static void iommu_flush_all(void)
3384{
3385 struct dmar_drhd_unit *drhd;
3386 struct intel_iommu *iommu;
3387
3388 for_each_active_iommu(iommu, drhd) {
3389 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3390 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3391 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3392 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
3393 }
3394}
3395
134fac3f 3396static int iommu_suspend(void)
f59c7b69
FY
3397{
3398 struct dmar_drhd_unit *drhd;
3399 struct intel_iommu *iommu = NULL;
3400 unsigned long flag;
3401
3402 for_each_active_iommu(iommu, drhd) {
3403 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3404 GFP_ATOMIC);
3405 if (!iommu->iommu_state)
3406 goto nomem;
3407 }
3408
3409 iommu_flush_all();
3410
3411 for_each_active_iommu(iommu, drhd) {
3412 iommu_disable_translation(iommu);
3413
1f5b3c3f 3414 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
3415
3416 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3417 readl(iommu->reg + DMAR_FECTL_REG);
3418 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3419 readl(iommu->reg + DMAR_FEDATA_REG);
3420 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3421 readl(iommu->reg + DMAR_FEADDR_REG);
3422 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3423 readl(iommu->reg + DMAR_FEUADDR_REG);
3424
1f5b3c3f 3425 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
3426 }
3427 return 0;
3428
3429nomem:
3430 for_each_active_iommu(iommu, drhd)
3431 kfree(iommu->iommu_state);
3432
3433 return -ENOMEM;
3434}
3435
134fac3f 3436static void iommu_resume(void)
f59c7b69
FY
3437{
3438 struct dmar_drhd_unit *drhd;
3439 struct intel_iommu *iommu = NULL;
3440 unsigned long flag;
3441
3442 if (init_iommu_hw()) {
b779260b
JC
3443 if (force_on)
3444 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3445 else
3446 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 3447 return;
f59c7b69
FY
3448 }
3449
3450 for_each_active_iommu(iommu, drhd) {
3451
1f5b3c3f 3452 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
3453
3454 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3455 iommu->reg + DMAR_FECTL_REG);
3456 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3457 iommu->reg + DMAR_FEDATA_REG);
3458 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3459 iommu->reg + DMAR_FEADDR_REG);
3460 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3461 iommu->reg + DMAR_FEUADDR_REG);
3462
1f5b3c3f 3463 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
3464 }
3465
3466 for_each_active_iommu(iommu, drhd)
3467 kfree(iommu->iommu_state);
f59c7b69
FY
3468}
3469
134fac3f 3470static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
3471 .resume = iommu_resume,
3472 .suspend = iommu_suspend,
3473};
3474
134fac3f 3475static void __init init_iommu_pm_ops(void)
f59c7b69 3476{
134fac3f 3477 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
3478}
3479
3480#else
99592ba4 3481static inline void init_iommu_pm_ops(void) {}
f59c7b69
FY
3482#endif /* CONFIG_PM */
3483
318fe7df
SS
3484LIST_HEAD(dmar_rmrr_units);
3485
3486static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
3487{
3488 list_add(&rmrr->list, &dmar_rmrr_units);
3489}
3490
3491
3492int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
3493{
3494 struct acpi_dmar_reserved_memory *rmrr;
3495 struct dmar_rmrr_unit *rmrru;
3496
3497 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3498 if (!rmrru)
3499 return -ENOMEM;
3500
3501 rmrru->hdr = header;
3502 rmrr = (struct acpi_dmar_reserved_memory *)header;
3503 rmrru->base_address = rmrr->base_address;
3504 rmrru->end_address = rmrr->end_address;
3505
3506 dmar_register_rmrr_unit(rmrru);
3507 return 0;
3508}
3509
3510static int __init
3511rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
3512{
3513 struct acpi_dmar_reserved_memory *rmrr;
3514 int ret;
3515
3516 rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
3517 ret = dmar_parse_dev_scope((void *)(rmrr + 1),
3518 ((void *)rmrr) + rmrr->header.length,
3519 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
3520
3521 if (ret || (rmrru->devices_cnt == 0)) {
3522 list_del(&rmrru->list);
3523 kfree(rmrru);
3524 }
3525 return ret;
3526}
3527
3528static LIST_HEAD(dmar_atsr_units);
3529
3530int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
3531{
3532 struct acpi_dmar_atsr *atsr;
3533 struct dmar_atsr_unit *atsru;
3534
3535 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3536 atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
3537 if (!atsru)
3538 return -ENOMEM;
3539
3540 atsru->hdr = hdr;
3541 atsru->include_all = atsr->flags & 0x1;
3542
3543 list_add(&atsru->list, &dmar_atsr_units);
3544
3545 return 0;
3546}
3547
3548static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
3549{
3550 int rc;
3551 struct acpi_dmar_atsr *atsr;
3552
3553 if (atsru->include_all)
3554 return 0;
3555
3556 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3557 rc = dmar_parse_dev_scope((void *)(atsr + 1),
3558 (void *)atsr + atsr->header.length,
3559 &atsru->devices_cnt, &atsru->devices,
3560 atsr->segment);
3561 if (rc || !atsru->devices_cnt) {
3562 list_del(&atsru->list);
3563 kfree(atsru);
3564 }
3565
3566 return rc;
3567}
3568
3569int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3570{
3571 int i;
3572 struct pci_bus *bus;
3573 struct acpi_dmar_atsr *atsr;
3574 struct dmar_atsr_unit *atsru;
3575
3576 dev = pci_physfn(dev);
3577
3578 list_for_each_entry(atsru, &dmar_atsr_units, list) {
3579 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3580 if (atsr->segment == pci_domain_nr(dev->bus))
3581 goto found;
3582 }
3583
3584 return 0;
3585
3586found:
3587 for (bus = dev->bus; bus; bus = bus->parent) {
3588 struct pci_dev *bridge = bus->self;
3589
3590 if (!bridge || !pci_is_pcie(bridge) ||
62f87c0e 3591 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
318fe7df
SS
3592 return 0;
3593
62f87c0e 3594 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT) {
318fe7df
SS
3595 for (i = 0; i < atsru->devices_cnt; i++)
3596 if (atsru->devices[i] == bridge)
3597 return 1;
3598 break;
3599 }
3600 }
3601
3602 if (atsru->include_all)
3603 return 1;
3604
3605 return 0;
3606}
3607
c8f369ab 3608int __init dmar_parse_rmrr_atsr_dev(void)
318fe7df
SS
3609{
3610 struct dmar_rmrr_unit *rmrr, *rmrr_n;
3611 struct dmar_atsr_unit *atsr, *atsr_n;
3612 int ret = 0;
3613
3614 list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
3615 ret = rmrr_parse_dev(rmrr);
3616 if (ret)
3617 return ret;
3618 }
3619
3620 list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) {
3621 ret = atsr_parse_dev(atsr);
3622 if (ret)
3623 return ret;
3624 }
3625
3626 return ret;
3627}
3628
99dcaded
FY
3629/*
3630 * Here we only respond to action of unbound device from driver.
3631 *
3632 * Added device is not attached to its DMAR domain here yet. That will happen
3633 * when mapping the device to iova.
3634 */
3635static int device_notifier(struct notifier_block *nb,
3636 unsigned long action, void *data)
3637{
3638 struct device *dev = data;
3639 struct pci_dev *pdev = to_pci_dev(dev);
3640 struct dmar_domain *domain;
3641
44cd613c
DW
3642 if (iommu_no_mapping(dev))
3643 return 0;
3644
99dcaded
FY
3645 domain = find_domain(pdev);
3646 if (!domain)
3647 return 0;
3648
a97590e5 3649 if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through) {
99dcaded
FY
3650 domain_remove_one_dev_info(domain, pdev);
3651
a97590e5
AW
3652 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3653 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
3654 list_empty(&domain->devices))
3655 domain_exit(domain);
3656 }
3657
99dcaded
FY
3658 return 0;
3659}
3660
3661static struct notifier_block device_nb = {
3662 .notifier_call = device_notifier,
3663};
3664
ba395927
KA
3665int __init intel_iommu_init(void)
3666{
3667 int ret = 0;
3668
a59b50e9
JC
3669 /* VT-d is required for a TXT/tboot launch, so enforce that */
3670 force_on = tboot_force_iommu();
3671
3672 if (dmar_table_init()) {
3673 if (force_on)
3674 panic("tboot: Failed to initialize DMAR table\n");
ba395927 3675 return -ENODEV;
a59b50e9 3676 }
ba395927 3677
c2c7286a 3678 if (dmar_dev_scope_init() < 0) {
a59b50e9
JC
3679 if (force_on)
3680 panic("tboot: Failed to initialize DMAR device scope\n");
1886e8a9 3681 return -ENODEV;
a59b50e9 3682 }
1886e8a9 3683
75f1cdf1 3684 if (no_iommu || dmar_disabled)
2ae21010
SS
3685 return -ENODEV;
3686
51a63e67
JC
3687 if (iommu_init_mempool()) {
3688 if (force_on)
3689 panic("tboot: Failed to initialize iommu memory\n");
3690 return -ENODEV;
3691 }
3692
318fe7df
SS
3693 if (list_empty(&dmar_rmrr_units))
3694 printk(KERN_INFO "DMAR: No RMRR found\n");
3695
3696 if (list_empty(&dmar_atsr_units))
3697 printk(KERN_INFO "DMAR: No ATSR found\n");
3698
51a63e67
JC
3699 if (dmar_init_reserved_ranges()) {
3700 if (force_on)
3701 panic("tboot: Failed to reserve iommu ranges\n");
3702 return -ENODEV;
3703 }
ba395927
KA
3704
3705 init_no_remapping_devices();
3706
b779260b 3707 ret = init_dmars();
ba395927 3708 if (ret) {
a59b50e9
JC
3709 if (force_on)
3710 panic("tboot: Failed to initialize DMARs\n");
ba395927
KA
3711 printk(KERN_ERR "IOMMU: dmar init failed\n");
3712 put_iova_domain(&reserved_iova_list);
3713 iommu_exit_mempool();
3714 return ret;
3715 }
3716 printk(KERN_INFO
3717 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
3718
5e0d2a6f 3719 init_timer(&unmap_timer);
75f1cdf1
FT
3720#ifdef CONFIG_SWIOTLB
3721 swiotlb = 0;
3722#endif
19943b0e 3723 dma_ops = &intel_dma_ops;
4ed0d3e6 3724
134fac3f 3725 init_iommu_pm_ops();
a8bcbb0d 3726
4236d97d 3727 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
a8bcbb0d 3728
99dcaded
FY
3729 bus_register_notifier(&pci_bus_type, &device_nb);
3730
8bc1f85c
ED
3731 intel_iommu_enabled = 1;
3732
ba395927
KA
3733 return 0;
3734}
e820482c 3735
3199aa6b
HW
3736static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
3737 struct pci_dev *pdev)
3738{
3739 struct pci_dev *tmp, *parent;
3740
3741 if (!iommu || !pdev)
3742 return;
3743
3744 /* dependent device detach */
3745 tmp = pci_find_upstream_pcie_bridge(pdev);
3746 /* Secondary interface's bus number and devfn 0 */
3747 if (tmp) {
3748 parent = pdev->bus->self;
3749 while (parent != tmp) {
3750 iommu_detach_dev(iommu, parent->bus->number,
276dbf99 3751 parent->devfn);
3199aa6b
HW
3752 parent = parent->bus->self;
3753 }
45e829ea 3754 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
3199aa6b
HW
3755 iommu_detach_dev(iommu,
3756 tmp->subordinate->number, 0);
3757 else /* this is a legacy PCI bridge */
276dbf99
DW
3758 iommu_detach_dev(iommu, tmp->bus->number,
3759 tmp->devfn);
3199aa6b
HW
3760 }
3761}
3762
2c2e2c38 3763static void domain_remove_one_dev_info(struct dmar_domain *domain,
c7151a8d
WH
3764 struct pci_dev *pdev)
3765{
3766 struct device_domain_info *info;
3767 struct intel_iommu *iommu;
3768 unsigned long flags;
3769 int found = 0;
3770 struct list_head *entry, *tmp;
3771
276dbf99
DW
3772 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3773 pdev->devfn);
c7151a8d
WH
3774 if (!iommu)
3775 return;
3776
3777 spin_lock_irqsave(&device_domain_lock, flags);
3778 list_for_each_safe(entry, tmp, &domain->devices) {
3779 info = list_entry(entry, struct device_domain_info, link);
8519dc44
MH
3780 if (info->segment == pci_domain_nr(pdev->bus) &&
3781 info->bus == pdev->bus->number &&
c7151a8d 3782 info->devfn == pdev->devfn) {
109b9b04 3783 unlink_domain_info(info);
c7151a8d
WH
3784 spin_unlock_irqrestore(&device_domain_lock, flags);
3785
93a23a72 3786 iommu_disable_dev_iotlb(info);
c7151a8d 3787 iommu_detach_dev(iommu, info->bus, info->devfn);
3199aa6b 3788 iommu_detach_dependent_devices(iommu, pdev);
c7151a8d
WH
3789 free_devinfo_mem(info);
3790
3791 spin_lock_irqsave(&device_domain_lock, flags);
3792
3793 if (found)
3794 break;
3795 else
3796 continue;
3797 }
3798
3799 /* if there is no other devices under the same iommu
3800 * owned by this domain, clear this iommu in iommu_bmp
3801 * update iommu count and coherency
3802 */
276dbf99
DW
3803 if (iommu == device_to_iommu(info->segment, info->bus,
3804 info->devfn))
c7151a8d
WH
3805 found = 1;
3806 }
3807
3e7abe25
RD
3808 spin_unlock_irqrestore(&device_domain_lock, flags);
3809
c7151a8d
WH
3810 if (found == 0) {
3811 unsigned long tmp_flags;
3812 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
1b198bb0 3813 clear_bit(iommu->seq_id, domain->iommu_bmp);
c7151a8d 3814 domain->iommu_count--;
58c610bd 3815 domain_update_iommu_cap(domain);
c7151a8d 3816 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
a97590e5 3817
9b4554b2
AW
3818 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3819 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
3820 spin_lock_irqsave(&iommu->lock, tmp_flags);
3821 clear_bit(domain->id, iommu->domain_ids);
3822 iommu->domains[domain->id] = NULL;
3823 spin_unlock_irqrestore(&iommu->lock, tmp_flags);
3824 }
c7151a8d 3825 }
c7151a8d
WH
3826}
3827
3828static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
3829{
3830 struct device_domain_info *info;
3831 struct intel_iommu *iommu;
3832 unsigned long flags1, flags2;
3833
3834 spin_lock_irqsave(&device_domain_lock, flags1);
3835 while (!list_empty(&domain->devices)) {
3836 info = list_entry(domain->devices.next,
3837 struct device_domain_info, link);
109b9b04 3838 unlink_domain_info(info);
c7151a8d
WH
3839 spin_unlock_irqrestore(&device_domain_lock, flags1);
3840
93a23a72 3841 iommu_disable_dev_iotlb(info);
276dbf99 3842 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
c7151a8d 3843 iommu_detach_dev(iommu, info->bus, info->devfn);
3199aa6b 3844 iommu_detach_dependent_devices(iommu, info->dev);
c7151a8d
WH
3845
3846 /* clear this iommu in iommu_bmp, update iommu count
58c610bd 3847 * and capabilities
c7151a8d
WH
3848 */
3849 spin_lock_irqsave(&domain->iommu_lock, flags2);
3850 if (test_and_clear_bit(iommu->seq_id,
1b198bb0 3851 domain->iommu_bmp)) {
c7151a8d 3852 domain->iommu_count--;
58c610bd 3853 domain_update_iommu_cap(domain);
c7151a8d
WH
3854 }
3855 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
3856
3857 free_devinfo_mem(info);
3858 spin_lock_irqsave(&device_domain_lock, flags1);
3859 }
3860 spin_unlock_irqrestore(&device_domain_lock, flags1);
3861}
3862
5e98c4b1
WH
3863/* domain id for virtual machine, it won't be set in context */
3864static unsigned long vm_domid;
3865
3866static struct dmar_domain *iommu_alloc_vm_domain(void)
3867{
3868 struct dmar_domain *domain;
3869
3870 domain = alloc_domain_mem();
3871 if (!domain)
3872 return NULL;
3873
3874 domain->id = vm_domid++;
4c923d47 3875 domain->nid = -1;
1b198bb0 3876 memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp));
5e98c4b1
WH
3877 domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
3878
3879 return domain;
3880}
3881
2c2e2c38 3882static int md_domain_init(struct dmar_domain *domain, int guest_width)
5e98c4b1
WH
3883{
3884 int adjust_width;
3885
3886 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
5e98c4b1
WH
3887 spin_lock_init(&domain->iommu_lock);
3888
3889 domain_reserve_special_ranges(domain);
3890
3891 /* calculate AGAW */
3892 domain->gaw = guest_width;
3893 adjust_width = guestwidth_to_adjustwidth(guest_width);
3894 domain->agaw = width_to_agaw(adjust_width);
3895
3896 INIT_LIST_HEAD(&domain->devices);
3897
3898 domain->iommu_count = 0;
3899 domain->iommu_coherency = 0;
c5b15255 3900 domain->iommu_snooping = 0;
6dd9a7c7 3901 domain->iommu_superpage = 0;
fe40f1e0 3902 domain->max_addr = 0;
4c923d47 3903 domain->nid = -1;
5e98c4b1
WH
3904
3905 /* always allocate the top pgd */
4c923d47 3906 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5e98c4b1
WH
3907 if (!domain->pgd)
3908 return -ENOMEM;
3909 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
3910 return 0;
3911}
3912
3913static void iommu_free_vm_domain(struct dmar_domain *domain)
3914{
3915 unsigned long flags;
3916 struct dmar_drhd_unit *drhd;
3917 struct intel_iommu *iommu;
3918 unsigned long i;
3919 unsigned long ndomains;
3920
3921 for_each_drhd_unit(drhd) {
3922 if (drhd->ignored)
3923 continue;
3924 iommu = drhd->iommu;
3925
3926 ndomains = cap_ndoms(iommu->cap);
a45946ab 3927 for_each_set_bit(i, iommu->domain_ids, ndomains) {
5e98c4b1
WH
3928 if (iommu->domains[i] == domain) {
3929 spin_lock_irqsave(&iommu->lock, flags);
3930 clear_bit(i, iommu->domain_ids);
3931 iommu->domains[i] = NULL;
3932 spin_unlock_irqrestore(&iommu->lock, flags);
3933 break;
3934 }
5e98c4b1
WH
3935 }
3936 }
3937}
3938
3939static void vm_domain_exit(struct dmar_domain *domain)
3940{
5e98c4b1
WH
3941 /* Domain 0 is reserved, so dont process it */
3942 if (!domain)
3943 return;
3944
3945 vm_domain_remove_all_dev_info(domain);
3946 /* destroy iovas */
3947 put_iova_domain(&domain->iovad);
5e98c4b1
WH
3948
3949 /* clear ptes */
595badf5 3950 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
5e98c4b1
WH
3951
3952 /* free page tables */
d794dc9b 3953 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
5e98c4b1
WH
3954
3955 iommu_free_vm_domain(domain);
3956 free_domain_mem(domain);
3957}
3958
5d450806 3959static int intel_iommu_domain_init(struct iommu_domain *domain)
38717946 3960{
5d450806 3961 struct dmar_domain *dmar_domain;
38717946 3962
5d450806
JR
3963 dmar_domain = iommu_alloc_vm_domain();
3964 if (!dmar_domain) {
38717946 3965 printk(KERN_ERR
5d450806
JR
3966 "intel_iommu_domain_init: dmar_domain == NULL\n");
3967 return -ENOMEM;
38717946 3968 }
2c2e2c38 3969 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
38717946 3970 printk(KERN_ERR
5d450806
JR
3971 "intel_iommu_domain_init() failed\n");
3972 vm_domain_exit(dmar_domain);
3973 return -ENOMEM;
38717946 3974 }
8140a95d 3975 domain_update_iommu_cap(dmar_domain);
5d450806 3976 domain->priv = dmar_domain;
faa3d6f5 3977
8a0e715b
JR
3978 domain->geometry.aperture_start = 0;
3979 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
3980 domain->geometry.force_aperture = true;
3981
5d450806 3982 return 0;
38717946 3983}
38717946 3984
5d450806 3985static void intel_iommu_domain_destroy(struct iommu_domain *domain)
38717946 3986{
5d450806
JR
3987 struct dmar_domain *dmar_domain = domain->priv;
3988
3989 domain->priv = NULL;
3990 vm_domain_exit(dmar_domain);
38717946 3991}
38717946 3992
4c5478c9
JR
3993static int intel_iommu_attach_device(struct iommu_domain *domain,
3994 struct device *dev)
38717946 3995{
4c5478c9
JR
3996 struct dmar_domain *dmar_domain = domain->priv;
3997 struct pci_dev *pdev = to_pci_dev(dev);
fe40f1e0
WH
3998 struct intel_iommu *iommu;
3999 int addr_width;
faa3d6f5
WH
4000
4001 /* normally pdev is not mapped */
4002 if (unlikely(domain_context_mapped(pdev))) {
4003 struct dmar_domain *old_domain;
4004
4005 old_domain = find_domain(pdev);
4006 if (old_domain) {
2c2e2c38
FY
4007 if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
4008 dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
4009 domain_remove_one_dev_info(old_domain, pdev);
faa3d6f5
WH
4010 else
4011 domain_remove_dev_info(old_domain);
4012 }
4013 }
4014
276dbf99
DW
4015 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
4016 pdev->devfn);
fe40f1e0
WH
4017 if (!iommu)
4018 return -ENODEV;
4019
4020 /* check if this iommu agaw is sufficient for max mapped address */
4021 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
4022 if (addr_width > cap_mgaw(iommu->cap))
4023 addr_width = cap_mgaw(iommu->cap);
4024
4025 if (dmar_domain->max_addr > (1LL << addr_width)) {
4026 printk(KERN_ERR "%s: iommu width (%d) is not "
fe40f1e0 4027 "sufficient for the mapped address (%llx)\n",
a99c47a2 4028 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
4029 return -EFAULT;
4030 }
a99c47a2
TL
4031 dmar_domain->gaw = addr_width;
4032
4033 /*
4034 * Knock out extra levels of page tables if necessary
4035 */
4036 while (iommu->agaw < dmar_domain->agaw) {
4037 struct dma_pte *pte;
4038
4039 pte = dmar_domain->pgd;
4040 if (dma_pte_present(pte)) {
25cbff16
SY
4041 dmar_domain->pgd = (struct dma_pte *)
4042 phys_to_virt(dma_pte_addr(pte));
7a661013 4043 free_pgtable_page(pte);
a99c47a2
TL
4044 }
4045 dmar_domain->agaw--;
4046 }
fe40f1e0 4047
5fe60f4e 4048 return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
38717946 4049}
38717946 4050
4c5478c9
JR
4051static void intel_iommu_detach_device(struct iommu_domain *domain,
4052 struct device *dev)
38717946 4053{
4c5478c9
JR
4054 struct dmar_domain *dmar_domain = domain->priv;
4055 struct pci_dev *pdev = to_pci_dev(dev);
4056
2c2e2c38 4057 domain_remove_one_dev_info(dmar_domain, pdev);
faa3d6f5 4058}
c7151a8d 4059
b146a1c9
JR
4060static int intel_iommu_map(struct iommu_domain *domain,
4061 unsigned long iova, phys_addr_t hpa,
5009065d 4062 size_t size, int iommu_prot)
faa3d6f5 4063{
dde57a21 4064 struct dmar_domain *dmar_domain = domain->priv;
fe40f1e0 4065 u64 max_addr;
dde57a21 4066 int prot = 0;
faa3d6f5 4067 int ret;
fe40f1e0 4068
dde57a21
JR
4069 if (iommu_prot & IOMMU_READ)
4070 prot |= DMA_PTE_READ;
4071 if (iommu_prot & IOMMU_WRITE)
4072 prot |= DMA_PTE_WRITE;
9cf06697
SY
4073 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4074 prot |= DMA_PTE_SNP;
dde57a21 4075
163cc52c 4076 max_addr = iova + size;
dde57a21 4077 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
4078 u64 end;
4079
4080 /* check if minimum agaw is sufficient for mapped address */
8954da1f 4081 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 4082 if (end < max_addr) {
8954da1f 4083 printk(KERN_ERR "%s: iommu width (%d) is not "
fe40f1e0 4084 "sufficient for the mapped address (%llx)\n",
8954da1f 4085 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
4086 return -EFAULT;
4087 }
dde57a21 4088 dmar_domain->max_addr = max_addr;
fe40f1e0 4089 }
ad051221
DW
4090 /* Round up size to next multiple of PAGE_SIZE, if it and
4091 the low bits of hpa would take us onto the next page */
88cb6a74 4092 size = aligned_nrpages(hpa, size);
ad051221
DW
4093 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4094 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 4095 return ret;
38717946 4096}
38717946 4097
5009065d
OBC
4098static size_t intel_iommu_unmap(struct iommu_domain *domain,
4099 unsigned long iova, size_t size)
38717946 4100{
dde57a21 4101 struct dmar_domain *dmar_domain = domain->priv;
292827cb 4102 int order;
4b99d352 4103
292827cb 4104 order = dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
163cc52c 4105 (iova + size - 1) >> VTD_PAGE_SHIFT);
fe40f1e0 4106
163cc52c
DW
4107 if (dmar_domain->max_addr == iova + size)
4108 dmar_domain->max_addr = iova;
b146a1c9 4109
5009065d 4110 return PAGE_SIZE << order;
38717946 4111}
38717946 4112
d14d6577
JR
4113static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
4114 unsigned long iova)
38717946 4115{
d14d6577 4116 struct dmar_domain *dmar_domain = domain->priv;
38717946 4117 struct dma_pte *pte;
faa3d6f5 4118 u64 phys = 0;
38717946 4119
6dd9a7c7 4120 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0);
38717946 4121 if (pte)
faa3d6f5 4122 phys = dma_pte_addr(pte);
38717946 4123
faa3d6f5 4124 return phys;
38717946 4125}
a8bcbb0d 4126
dbb9fd86
SY
4127static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
4128 unsigned long cap)
4129{
4130 struct dmar_domain *dmar_domain = domain->priv;
4131
4132 if (cap == IOMMU_CAP_CACHE_COHERENCY)
4133 return dmar_domain->iommu_snooping;
323f99cb 4134 if (cap == IOMMU_CAP_INTR_REMAP)
95a02e97 4135 return irq_remapping_enabled;
dbb9fd86
SY
4136
4137 return 0;
4138}
4139
783f157b 4140static void swap_pci_ref(struct pci_dev **from, struct pci_dev *to)
70ae6f0d 4141{
783f157b
AW
4142 pci_dev_put(*from);
4143 *from = to;
4144}
70ae6f0d 4145
783f157b 4146#define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
70ae6f0d 4147
abdfdde2
AW
4148static int intel_iommu_add_device(struct device *dev)
4149{
4150 struct pci_dev *pdev = to_pci_dev(dev);
3da4af0a 4151 struct pci_dev *bridge, *dma_pdev = NULL;
abdfdde2
AW
4152 struct iommu_group *group;
4153 int ret;
70ae6f0d 4154
abdfdde2
AW
4155 if (!device_to_iommu(pci_domain_nr(pdev->bus),
4156 pdev->bus->number, pdev->devfn))
70ae6f0d
AW
4157 return -ENODEV;
4158
4159 bridge = pci_find_upstream_pcie_bridge(pdev);
4160 if (bridge) {
abdfdde2
AW
4161 if (pci_is_pcie(bridge))
4162 dma_pdev = pci_get_domain_bus_and_slot(
4163 pci_domain_nr(pdev->bus),
4164 bridge->subordinate->number, 0);
3da4af0a 4165 if (!dma_pdev)
abdfdde2
AW
4166 dma_pdev = pci_dev_get(bridge);
4167 } else
4168 dma_pdev = pci_dev_get(pdev);
4169
a4ff1fc2 4170 /* Account for quirked devices */
783f157b
AW
4171 swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev));
4172
a4ff1fc2
AW
4173 /*
4174 * If it's a multifunction device that does not support our
4175 * required ACS flags, add to the same group as function 0.
4176 */
783f157b
AW
4177 if (dma_pdev->multifunction &&
4178 !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS))
4179 swap_pci_ref(&dma_pdev,
4180 pci_get_slot(dma_pdev->bus,
4181 PCI_DEVFN(PCI_SLOT(dma_pdev->devfn),
4182 0)));
4183
a4ff1fc2
AW
4184 /*
4185 * Devices on the root bus go through the iommu. If that's not us,
4186 * find the next upstream device and test ACS up to the root bus.
4187 * Finding the next device may require skipping virtual buses.
4188 */
783f157b 4189 while (!pci_is_root_bus(dma_pdev->bus)) {
a4ff1fc2
AW
4190 struct pci_bus *bus = dma_pdev->bus;
4191
4192 while (!bus->self) {
4193 if (!pci_is_root_bus(bus))
4194 bus = bus->parent;
4195 else
4196 goto root_bus;
4197 }
4198
4199 if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
783f157b
AW
4200 break;
4201
a4ff1fc2 4202 swap_pci_ref(&dma_pdev, pci_dev_get(bus->self));
783f157b
AW
4203 }
4204
a4ff1fc2 4205root_bus:
abdfdde2
AW
4206 group = iommu_group_get(&dma_pdev->dev);
4207 pci_dev_put(dma_pdev);
4208 if (!group) {
4209 group = iommu_group_alloc();
4210 if (IS_ERR(group))
4211 return PTR_ERR(group);
70ae6f0d
AW
4212 }
4213
abdfdde2 4214 ret = iommu_group_add_device(group, dev);
bcb71abe 4215
abdfdde2
AW
4216 iommu_group_put(group);
4217 return ret;
4218}
70ae6f0d 4219
abdfdde2
AW
4220static void intel_iommu_remove_device(struct device *dev)
4221{
4222 iommu_group_remove_device(dev);
70ae6f0d
AW
4223}
4224
a8bcbb0d
JR
4225static struct iommu_ops intel_iommu_ops = {
4226 .domain_init = intel_iommu_domain_init,
4227 .domain_destroy = intel_iommu_domain_destroy,
4228 .attach_dev = intel_iommu_attach_device,
4229 .detach_dev = intel_iommu_detach_device,
b146a1c9
JR
4230 .map = intel_iommu_map,
4231 .unmap = intel_iommu_unmap,
a8bcbb0d 4232 .iova_to_phys = intel_iommu_iova_to_phys,
dbb9fd86 4233 .domain_has_cap = intel_iommu_domain_has_cap,
abdfdde2
AW
4234 .add_device = intel_iommu_add_device,
4235 .remove_device = intel_iommu_remove_device,
6d1c56a9 4236 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
a8bcbb0d 4237};
9af88143 4238
9452618e
DV
4239static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
4240{
4241 /* G4x/GM45 integrated gfx dmar support is totally busted. */
4242 printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
4243 dmar_map_gfx = 0;
4244}
4245
4246DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
4247DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
4248DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
4249DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
4250DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
4251DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
4252DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
4253
d34d6517 4254static void quirk_iommu_rwbf(struct pci_dev *dev)
9af88143
DW
4255{
4256 /*
4257 * Mobile 4 Series Chipset neglects to set RWBF capability,
210561ff 4258 * but needs it. Same seems to hold for the desktop versions.
9af88143
DW
4259 */
4260 printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
4261 rwbf_quirk = 1;
4262}
4263
4264DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
210561ff
DV
4265DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4266DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4267DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4268DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4269DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4270DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
e0fc7e0b 4271
eecfd57f
AJ
4272#define GGC 0x52
4273#define GGC_MEMORY_SIZE_MASK (0xf << 8)
4274#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
4275#define GGC_MEMORY_SIZE_1M (0x1 << 8)
4276#define GGC_MEMORY_SIZE_2M (0x3 << 8)
4277#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
4278#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
4279#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
4280#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
4281
d34d6517 4282static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
9eecabcb
DW
4283{
4284 unsigned short ggc;
4285
eecfd57f 4286 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
4287 return;
4288
eecfd57f 4289 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
9eecabcb
DW
4290 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4291 dmar_map_gfx = 0;
6fbcfb3e
DW
4292 } else if (dmar_map_gfx) {
4293 /* we have to ensure the gfx device is idle before we flush */
4294 printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
4295 intel_iommu_strict = 1;
4296 }
9eecabcb
DW
4297}
4298DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4299DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4300DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4301DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4302
e0fc7e0b
DW
4303/* On Tylersburg chipsets, some BIOSes have been known to enable the
4304 ISOCH DMAR unit for the Azalia sound device, but not give it any
4305 TLB entries, which causes it to deadlock. Check for that. We do
4306 this in a function called from init_dmars(), instead of in a PCI
4307 quirk, because we don't want to print the obnoxious "BIOS broken"
4308 message if VT-d is actually disabled.
4309*/
4310static void __init check_tylersburg_isoch(void)
4311{
4312 struct pci_dev *pdev;
4313 uint32_t vtisochctrl;
4314
4315 /* If there's no Azalia in the system anyway, forget it. */
4316 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4317 if (!pdev)
4318 return;
4319 pci_dev_put(pdev);
4320
4321 /* System Management Registers. Might be hidden, in which case
4322 we can't do the sanity check. But that's OK, because the
4323 known-broken BIOSes _don't_ actually hide it, so far. */
4324 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4325 if (!pdev)
4326 return;
4327
4328 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4329 pci_dev_put(pdev);
4330 return;
4331 }
4332
4333 pci_dev_put(pdev);
4334
4335 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4336 if (vtisochctrl & 1)
4337 return;
4338
4339 /* Drop all bits other than the number of TLB entries */
4340 vtisochctrl &= 0x1c;
4341
4342 /* If we have the recommended number of TLB entries (16), fine. */
4343 if (vtisochctrl == 0x10)
4344 return;
4345
4346 /* Zero TLB entries? You get to ride the short bus to school. */
4347 if (!vtisochctrl) {
4348 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4349 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4350 dmi_get_system_info(DMI_BIOS_VENDOR),
4351 dmi_get_system_info(DMI_BIOS_VERSION),
4352 dmi_get_system_info(DMI_PRODUCT_VERSION));
4353 iommu_identity_mapping |= IDENTMAP_AZALIA;
4354 return;
4355 }
4356
4357 printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4358 vtisochctrl);
4359}
This page took 1.191918 seconds and 5 git commands to generate.