iommu/vt-d: Remove the useless dma_pte_addr
[deliverable/linux.git] / drivers / iommu / intel-iommu.c
CommitLineData
ba395927 1/*
ea8ea460 2 * Copyright © 2006-2014 Intel Corporation.
ba395927
KA
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
ea8ea460
DW
13 * Authors: David Woodhouse <dwmw2@infradead.org>,
14 * Ashok Raj <ashok.raj@intel.com>,
15 * Shaohua Li <shaohua.li@intel.com>,
16 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17 * Fenghua Yu <fenghua.yu@intel.com>
ba395927
KA
18 */
19
20#include <linux/init.h>
21#include <linux/bitmap.h>
5e0d2a6f 22#include <linux/debugfs.h>
54485c30 23#include <linux/export.h>
ba395927
KA
24#include <linux/slab.h>
25#include <linux/irq.h>
26#include <linux/interrupt.h>
ba395927
KA
27#include <linux/spinlock.h>
28#include <linux/pci.h>
29#include <linux/dmar.h>
30#include <linux/dma-mapping.h>
31#include <linux/mempool.h>
75f05569 32#include <linux/memory.h>
5e0d2a6f 33#include <linux/timer.h>
38717946 34#include <linux/iova.h>
5d450806 35#include <linux/iommu.h>
38717946 36#include <linux/intel-iommu.h>
134fac3f 37#include <linux/syscore_ops.h>
69575d38 38#include <linux/tboot.h>
adb2fe02 39#include <linux/dmi.h>
5cdede24 40#include <linux/pci-ats.h>
0ee332c1 41#include <linux/memblock.h>
36746436 42#include <linux/dma-contiguous.h>
8a8f422d 43#include <asm/irq_remapping.h>
ba395927 44#include <asm/cacheflush.h>
46a7fa27 45#include <asm/iommu.h>
ba395927 46
078e1ee2 47#include "irq_remapping.h"
61e015ac 48#include "pci.h"
078e1ee2 49
5b6985ce
FY
50#define ROOT_SIZE VTD_PAGE_SIZE
51#define CONTEXT_SIZE VTD_PAGE_SIZE
52
ba395927
KA
53#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
54#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 55#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
56
57#define IOAPIC_RANGE_START (0xfee00000)
58#define IOAPIC_RANGE_END (0xfeefffff)
59#define IOVA_START_ADDR (0x1000)
60
61#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
62
4ed0d3e6 63#define MAX_AGAW_WIDTH 64
5c645b35 64#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
4ed0d3e6 65
2ebe3151
DW
66#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
67#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
68
69/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
70 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
71#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
72 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
73#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 74
f27be03b 75#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
284901a9 76#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
6a35528a 77#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
5e0d2a6f 78
df08cdc7
AM
79/* page table handling */
80#define LEVEL_STRIDE (9)
81#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
82
6d1c56a9
OBC
83/*
84 * This bitmap is used to advertise the page sizes our hardware support
85 * to the IOMMU core, which will then use this information to split
86 * physically contiguous memory regions it is mapping into page sizes
87 * that we support.
88 *
89 * Traditionally the IOMMU core just handed us the mappings directly,
90 * after making sure the size is an order of a 4KiB page and that the
91 * mapping has natural alignment.
92 *
93 * To retain this behavior, we currently advertise that we support
94 * all page sizes that are an order of 4KiB.
95 *
96 * If at some point we'd like to utilize the IOMMU core's new behavior,
97 * we could change this to advertise the real page sizes we support.
98 */
99#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
100
df08cdc7
AM
101static inline int agaw_to_level(int agaw)
102{
103 return agaw + 2;
104}
105
106static inline int agaw_to_width(int agaw)
107{
5c645b35 108 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
df08cdc7
AM
109}
110
111static inline int width_to_agaw(int width)
112{
5c645b35 113 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
df08cdc7
AM
114}
115
116static inline unsigned int level_to_offset_bits(int level)
117{
118 return (level - 1) * LEVEL_STRIDE;
119}
120
121static inline int pfn_level_offset(unsigned long pfn, int level)
122{
123 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
124}
125
126static inline unsigned long level_mask(int level)
127{
128 return -1UL << level_to_offset_bits(level);
129}
130
131static inline unsigned long level_size(int level)
132{
133 return 1UL << level_to_offset_bits(level);
134}
135
136static inline unsigned long align_to_level(unsigned long pfn, int level)
137{
138 return (pfn + level_size(level) - 1) & level_mask(level);
139}
fd18de50 140
6dd9a7c7
YS
141static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
142{
5c645b35 143 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
6dd9a7c7
YS
144}
145
dd4e8319
DW
146/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
147 are never going to work. */
148static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
149{
150 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
151}
152
153static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
154{
155 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
156}
157static inline unsigned long page_to_dma_pfn(struct page *pg)
158{
159 return mm_to_dma_pfn(page_to_pfn(pg));
160}
161static inline unsigned long virt_to_dma_pfn(void *p)
162{
163 return page_to_dma_pfn(virt_to_page(p));
164}
165
d9630fe9
WH
166/* global iommu list, set NULL for ignored DMAR units */
167static struct intel_iommu **g_iommus;
168
e0fc7e0b 169static void __init check_tylersburg_isoch(void);
9af88143
DW
170static int rwbf_quirk;
171
b779260b
JC
172/*
173 * set to 1 to panic kernel if can't successfully enable VT-d
174 * (used when kernel is launched w/ TXT)
175 */
176static int force_on = 0;
177
46b08e1a
MM
178/*
179 * 0: Present
180 * 1-11: Reserved
181 * 12-63: Context Ptr (12 - (haw-1))
182 * 64-127: Reserved
183 */
184struct root_entry {
185 u64 val;
186 u64 rsvd1;
187};
188#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
189static inline bool root_present(struct root_entry *root)
190{
191 return (root->val & 1);
192}
193static inline void set_root_present(struct root_entry *root)
194{
195 root->val |= 1;
196}
197static inline void set_root_value(struct root_entry *root, unsigned long value)
198{
199 root->val |= value & VTD_PAGE_MASK;
200}
201
202static inline struct context_entry *
203get_context_addr_from_root(struct root_entry *root)
204{
205 return (struct context_entry *)
206 (root_present(root)?phys_to_virt(
207 root->val & VTD_PAGE_MASK) :
208 NULL);
209}
210
7a8fc25e
MM
211/*
212 * low 64 bits:
213 * 0: present
214 * 1: fault processing disable
215 * 2-3: translation type
216 * 12-63: address space root
217 * high 64 bits:
218 * 0-2: address width
219 * 3-6: aval
220 * 8-23: domain id
221 */
222struct context_entry {
223 u64 lo;
224 u64 hi;
225};
c07e7d21
MM
226
227static inline bool context_present(struct context_entry *context)
228{
229 return (context->lo & 1);
230}
231static inline void context_set_present(struct context_entry *context)
232{
233 context->lo |= 1;
234}
235
236static inline void context_set_fault_enable(struct context_entry *context)
237{
238 context->lo &= (((u64)-1) << 2) | 1;
239}
240
c07e7d21
MM
241static inline void context_set_translation_type(struct context_entry *context,
242 unsigned long value)
243{
244 context->lo &= (((u64)-1) << 4) | 3;
245 context->lo |= (value & 3) << 2;
246}
247
248static inline void context_set_address_root(struct context_entry *context,
249 unsigned long value)
250{
251 context->lo |= value & VTD_PAGE_MASK;
252}
253
254static inline void context_set_address_width(struct context_entry *context,
255 unsigned long value)
256{
257 context->hi |= value & 7;
258}
259
260static inline void context_set_domain_id(struct context_entry *context,
261 unsigned long value)
262{
263 context->hi |= (value & ((1 << 16) - 1)) << 8;
264}
265
266static inline void context_clear_entry(struct context_entry *context)
267{
268 context->lo = 0;
269 context->hi = 0;
270}
7a8fc25e 271
622ba12a
MM
272/*
273 * 0: readable
274 * 1: writable
275 * 2-6: reserved
276 * 7: super page
9cf06697
SY
277 * 8-10: available
278 * 11: snoop behavior
622ba12a
MM
279 * 12-63: Host physcial address
280 */
281struct dma_pte {
282 u64 val;
283};
622ba12a 284
19c239ce
MM
285static inline void dma_clear_pte(struct dma_pte *pte)
286{
287 pte->val = 0;
288}
289
19c239ce
MM
290static inline u64 dma_pte_addr(struct dma_pte *pte)
291{
c85994e4
DW
292#ifdef CONFIG_64BIT
293 return pte->val & VTD_PAGE_MASK;
294#else
295 /* Must have a full atomic 64-bit read */
1a8bd481 296 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
c85994e4 297#endif
19c239ce
MM
298}
299
19c239ce
MM
300static inline bool dma_pte_present(struct dma_pte *pte)
301{
302 return (pte->val & 3) != 0;
303}
622ba12a 304
4399c8bf
AK
305static inline bool dma_pte_superpage(struct dma_pte *pte)
306{
c3c75eb7 307 return (pte->val & DMA_PTE_LARGE_PAGE);
4399c8bf
AK
308}
309
75e6bf96
DW
310static inline int first_pte_in_page(struct dma_pte *pte)
311{
312 return !((unsigned long)pte & ~VTD_PAGE_MASK);
313}
314
2c2e2c38
FY
315/*
316 * This domain is a statically identity mapping domain.
317 * 1. This domain creats a static 1:1 mapping to all usable memory.
318 * 2. It maps to each iommu if successful.
319 * 3. Each iommu mapps to this domain if successful.
320 */
19943b0e
DW
321static struct dmar_domain *si_domain;
322static int hw_pass_through = 1;
2c2e2c38 323
3b5410e7 324/* devices under the same p2p bridge are owned in one domain */
cdc7b837 325#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
3b5410e7 326
1ce28feb
WH
327/* domain represents a virtual machine, more than one devices
328 * across iommus may be owned in one domain, e.g. kvm guest.
329 */
330#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
331
2c2e2c38
FY
332/* si_domain contains mulitple devices */
333#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2)
334
1b198bb0
MT
335/* define the limit of IOMMUs supported in each domain */
336#ifdef CONFIG_X86
337# define IOMMU_UNITS_SUPPORTED MAX_IO_APICS
338#else
339# define IOMMU_UNITS_SUPPORTED 64
340#endif
341
99126f7c
MM
342struct dmar_domain {
343 int id; /* domain id */
4c923d47 344 int nid; /* node id */
1b198bb0
MT
345 DECLARE_BITMAP(iommu_bmp, IOMMU_UNITS_SUPPORTED);
346 /* bitmap of iommus this domain uses*/
99126f7c
MM
347
348 struct list_head devices; /* all devices' list */
349 struct iova_domain iovad; /* iova's that belong to this domain */
350
351 struct dma_pte *pgd; /* virtual address */
99126f7c
MM
352 int gaw; /* max guest address width */
353
354 /* adjusted guest address width, 0 is level 2 30-bit */
355 int agaw;
356
3b5410e7 357 int flags; /* flags to find out type of domain */
8e604097
WH
358
359 int iommu_coherency;/* indicate coherency of iommu access */
58c610bd 360 int iommu_snooping; /* indicate snooping control feature*/
c7151a8d 361 int iommu_count; /* reference count of iommu */
6dd9a7c7
YS
362 int iommu_superpage;/* Level of superpages supported:
363 0 == 4KiB (no superpages), 1 == 2MiB,
364 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
c7151a8d 365 spinlock_t iommu_lock; /* protect iommu set in domain */
fe40f1e0 366 u64 max_addr; /* maximum mapped address */
99126f7c
MM
367};
368
a647dacb
MM
369/* PCI domain-device relationship */
370struct device_domain_info {
371 struct list_head link; /* link to domain siblings */
372 struct list_head global; /* link to global list */
276dbf99 373 u8 bus; /* PCI bus number */
a647dacb 374 u8 devfn; /* PCI devfn number */
0bcb3e28 375 struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
93a23a72 376 struct intel_iommu *iommu; /* IOMMU used by this device */
a647dacb
MM
377 struct dmar_domain *domain; /* pointer to domain */
378};
379
b94e4117
JL
380struct dmar_rmrr_unit {
381 struct list_head list; /* list of rmrr units */
382 struct acpi_dmar_header *hdr; /* ACPI header */
383 u64 base_address; /* reserved base address*/
384 u64 end_address; /* reserved end address */
832bd858 385 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
386 int devices_cnt; /* target device count */
387};
388
389struct dmar_atsr_unit {
390 struct list_head list; /* list of ATSR units */
391 struct acpi_dmar_header *hdr; /* ACPI header */
832bd858 392 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
393 int devices_cnt; /* target device count */
394 u8 include_all:1; /* include all ports */
395};
396
397static LIST_HEAD(dmar_atsr_units);
398static LIST_HEAD(dmar_rmrr_units);
399
400#define for_each_rmrr_units(rmrr) \
401 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
402
5e0d2a6f 403static void flush_unmaps_timeout(unsigned long data);
404
b707cb02 405static DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
5e0d2a6f 406
80b20dd8 407#define HIGH_WATER_MARK 250
408struct deferred_flush_tables {
409 int next;
410 struct iova *iova[HIGH_WATER_MARK];
411 struct dmar_domain *domain[HIGH_WATER_MARK];
ea8ea460 412 struct page *freelist[HIGH_WATER_MARK];
80b20dd8 413};
414
415static struct deferred_flush_tables *deferred_flush;
416
5e0d2a6f 417/* bitmap for indexing intel_iommus */
5e0d2a6f 418static int g_num_of_iommus;
419
420static DEFINE_SPINLOCK(async_umap_flush_lock);
421static LIST_HEAD(unmaps_to_do);
422
423static int timer_on;
424static long list_size;
5e0d2a6f 425
92d03cc8 426static void domain_exit(struct dmar_domain *domain);
ba395927 427static void domain_remove_dev_info(struct dmar_domain *domain);
b94e4117 428static void domain_remove_one_dev_info(struct dmar_domain *domain,
bf9c9eda 429 struct device *dev);
92d03cc8 430static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
0bcb3e28 431 struct device *dev);
ba395927 432
d3f13810 433#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
0cd5c3c8
KM
434int dmar_disabled = 0;
435#else
436int dmar_disabled = 1;
d3f13810 437#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
0cd5c3c8 438
8bc1f85c
ED
439int intel_iommu_enabled = 0;
440EXPORT_SYMBOL_GPL(intel_iommu_enabled);
441
2d9e667e 442static int dmar_map_gfx = 1;
7d3b03ce 443static int dmar_forcedac;
5e0d2a6f 444static int intel_iommu_strict;
6dd9a7c7 445static int intel_iommu_superpage = 1;
ba395927 446
c0771df8
DW
447int intel_iommu_gfx_mapped;
448EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
449
ba395927
KA
450#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
451static DEFINE_SPINLOCK(device_domain_lock);
452static LIST_HEAD(device_domain_list);
453
a8bcbb0d
JR
454static struct iommu_ops intel_iommu_ops;
455
ba395927
KA
456static int __init intel_iommu_setup(char *str)
457{
458 if (!str)
459 return -EINVAL;
460 while (*str) {
0cd5c3c8
KM
461 if (!strncmp(str, "on", 2)) {
462 dmar_disabled = 0;
463 printk(KERN_INFO "Intel-IOMMU: enabled\n");
464 } else if (!strncmp(str, "off", 3)) {
ba395927 465 dmar_disabled = 1;
0cd5c3c8 466 printk(KERN_INFO "Intel-IOMMU: disabled\n");
ba395927
KA
467 } else if (!strncmp(str, "igfx_off", 8)) {
468 dmar_map_gfx = 0;
469 printk(KERN_INFO
470 "Intel-IOMMU: disable GFX device mapping\n");
7d3b03ce 471 } else if (!strncmp(str, "forcedac", 8)) {
5e0d2a6f 472 printk(KERN_INFO
7d3b03ce
KA
473 "Intel-IOMMU: Forcing DAC for PCI devices\n");
474 dmar_forcedac = 1;
5e0d2a6f 475 } else if (!strncmp(str, "strict", 6)) {
476 printk(KERN_INFO
477 "Intel-IOMMU: disable batched IOTLB flush\n");
478 intel_iommu_strict = 1;
6dd9a7c7
YS
479 } else if (!strncmp(str, "sp_off", 6)) {
480 printk(KERN_INFO
481 "Intel-IOMMU: disable supported super page\n");
482 intel_iommu_superpage = 0;
ba395927
KA
483 }
484
485 str += strcspn(str, ",");
486 while (*str == ',')
487 str++;
488 }
489 return 0;
490}
491__setup("intel_iommu=", intel_iommu_setup);
492
493static struct kmem_cache *iommu_domain_cache;
494static struct kmem_cache *iommu_devinfo_cache;
495static struct kmem_cache *iommu_iova_cache;
496
4c923d47 497static inline void *alloc_pgtable_page(int node)
eb3fa7cb 498{
4c923d47
SS
499 struct page *page;
500 void *vaddr = NULL;
eb3fa7cb 501
4c923d47
SS
502 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
503 if (page)
504 vaddr = page_address(page);
eb3fa7cb 505 return vaddr;
ba395927
KA
506}
507
508static inline void free_pgtable_page(void *vaddr)
509{
510 free_page((unsigned long)vaddr);
511}
512
513static inline void *alloc_domain_mem(void)
514{
354bb65e 515 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
516}
517
38717946 518static void free_domain_mem(void *vaddr)
ba395927
KA
519{
520 kmem_cache_free(iommu_domain_cache, vaddr);
521}
522
523static inline void * alloc_devinfo_mem(void)
524{
354bb65e 525 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
526}
527
528static inline void free_devinfo_mem(void *vaddr)
529{
530 kmem_cache_free(iommu_devinfo_cache, vaddr);
531}
532
533struct iova *alloc_iova_mem(void)
534{
354bb65e 535 return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
ba395927
KA
536}
537
538void free_iova_mem(struct iova *iova)
539{
540 kmem_cache_free(iommu_iova_cache, iova);
541}
542
1b573683 543
4ed0d3e6 544static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
545{
546 unsigned long sagaw;
547 int agaw = -1;
548
549 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 550 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
551 agaw >= 0; agaw--) {
552 if (test_bit(agaw, &sagaw))
553 break;
554 }
555
556 return agaw;
557}
558
4ed0d3e6
FY
559/*
560 * Calculate max SAGAW for each iommu.
561 */
562int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
563{
564 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
565}
566
567/*
568 * calculate agaw for each iommu.
569 * "SAGAW" may be different across iommus, use a default agaw, and
570 * get a supported less agaw for iommus that don't support the default agaw.
571 */
572int iommu_calculate_agaw(struct intel_iommu *iommu)
573{
574 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
575}
576
2c2e2c38 577/* This functionin only returns single iommu in a domain */
8c11e798
WH
578static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
579{
580 int iommu_id;
581
2c2e2c38 582 /* si_domain and vm domain should not get here. */
1ce28feb 583 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
2c2e2c38 584 BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
1ce28feb 585
1b198bb0 586 iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
8c11e798
WH
587 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
588 return NULL;
589
590 return g_iommus[iommu_id];
591}
592
8e604097
WH
593static void domain_update_iommu_coherency(struct dmar_domain *domain)
594{
d0501960
DW
595 struct dmar_drhd_unit *drhd;
596 struct intel_iommu *iommu;
597 int i, found = 0;
2e12bc29 598
d0501960 599 domain->iommu_coherency = 1;
8e604097 600
1b198bb0 601 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
d0501960 602 found = 1;
8e604097
WH
603 if (!ecap_coherent(g_iommus[i]->ecap)) {
604 domain->iommu_coherency = 0;
605 break;
606 }
8e604097 607 }
d0501960
DW
608 if (found)
609 return;
610
611 /* No hardware attached; use lowest common denominator */
612 rcu_read_lock();
613 for_each_active_iommu(iommu, drhd) {
614 if (!ecap_coherent(iommu->ecap)) {
615 domain->iommu_coherency = 0;
616 break;
617 }
618 }
619 rcu_read_unlock();
8e604097
WH
620}
621
58c610bd
SY
622static void domain_update_iommu_snooping(struct dmar_domain *domain)
623{
624 int i;
625
626 domain->iommu_snooping = 1;
627
1b198bb0 628 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
58c610bd
SY
629 if (!ecap_sc_support(g_iommus[i]->ecap)) {
630 domain->iommu_snooping = 0;
631 break;
632 }
58c610bd
SY
633 }
634}
635
6dd9a7c7
YS
636static void domain_update_iommu_superpage(struct dmar_domain *domain)
637{
8140a95d
AK
638 struct dmar_drhd_unit *drhd;
639 struct intel_iommu *iommu = NULL;
640 int mask = 0xf;
6dd9a7c7
YS
641
642 if (!intel_iommu_superpage) {
643 domain->iommu_superpage = 0;
644 return;
645 }
646
8140a95d 647 /* set iommu_superpage to the smallest common denominator */
0e242612 648 rcu_read_lock();
8140a95d
AK
649 for_each_active_iommu(iommu, drhd) {
650 mask &= cap_super_page_val(iommu->cap);
6dd9a7c7
YS
651 if (!mask) {
652 break;
653 }
654 }
0e242612
JL
655 rcu_read_unlock();
656
6dd9a7c7
YS
657 domain->iommu_superpage = fls(mask);
658}
659
58c610bd
SY
660/* Some capabilities may be different across iommus */
661static void domain_update_iommu_cap(struct dmar_domain *domain)
662{
663 domain_update_iommu_coherency(domain);
664 domain_update_iommu_snooping(domain);
6dd9a7c7 665 domain_update_iommu_superpage(domain);
58c610bd
SY
666}
667
156baca8 668static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
c7151a8d
WH
669{
670 struct dmar_drhd_unit *drhd = NULL;
b683b230 671 struct intel_iommu *iommu;
156baca8
DW
672 struct device *tmp;
673 struct pci_dev *ptmp, *pdev = NULL;
674 u16 segment;
c7151a8d
WH
675 int i;
676
156baca8
DW
677 if (dev_is_pci(dev)) {
678 pdev = to_pci_dev(dev);
679 segment = pci_domain_nr(pdev->bus);
680 } else if (ACPI_COMPANION(dev))
681 dev = &ACPI_COMPANION(dev)->dev;
682
0e242612 683 rcu_read_lock();
b683b230 684 for_each_active_iommu(iommu, drhd) {
156baca8 685 if (pdev && segment != drhd->segment)
276dbf99 686 continue;
c7151a8d 687
b683b230 688 for_each_active_dev_scope(drhd->devices,
156baca8
DW
689 drhd->devices_cnt, i, tmp) {
690 if (tmp == dev) {
691 *bus = drhd->devices[i].bus;
692 *devfn = drhd->devices[i].devfn;
b683b230 693 goto out;
156baca8
DW
694 }
695
696 if (!pdev || !dev_is_pci(tmp))
697 continue;
698
699 ptmp = to_pci_dev(tmp);
700 if (ptmp->subordinate &&
701 ptmp->subordinate->number <= pdev->bus->number &&
702 ptmp->subordinate->busn_res.end >= pdev->bus->number)
703 goto got_pdev;
924b6231 704 }
c7151a8d 705
156baca8
DW
706 if (pdev && drhd->include_all) {
707 got_pdev:
708 *bus = pdev->bus->number;
709 *devfn = pdev->devfn;
b683b230 710 goto out;
156baca8 711 }
c7151a8d 712 }
b683b230 713 iommu = NULL;
156baca8 714 out:
0e242612 715 rcu_read_unlock();
c7151a8d 716
b683b230 717 return iommu;
c7151a8d
WH
718}
719
5331fe6f
WH
720static void domain_flush_cache(struct dmar_domain *domain,
721 void *addr, int size)
722{
723 if (!domain->iommu_coherency)
724 clflush_cache_range(addr, size);
725}
726
ba395927
KA
727/* Gets context entry for a given bus and devfn */
728static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
729 u8 bus, u8 devfn)
730{
731 struct root_entry *root;
732 struct context_entry *context;
733 unsigned long phy_addr;
734 unsigned long flags;
735
736 spin_lock_irqsave(&iommu->lock, flags);
737 root = &iommu->root_entry[bus];
738 context = get_context_addr_from_root(root);
739 if (!context) {
4c923d47
SS
740 context = (struct context_entry *)
741 alloc_pgtable_page(iommu->node);
ba395927
KA
742 if (!context) {
743 spin_unlock_irqrestore(&iommu->lock, flags);
744 return NULL;
745 }
5b6985ce 746 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
ba395927
KA
747 phy_addr = virt_to_phys((void *)context);
748 set_root_value(root, phy_addr);
749 set_root_present(root);
750 __iommu_flush_cache(iommu, root, sizeof(*root));
751 }
752 spin_unlock_irqrestore(&iommu->lock, flags);
753 return &context[devfn];
754}
755
756static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
757{
758 struct root_entry *root;
759 struct context_entry *context;
760 int ret;
761 unsigned long flags;
762
763 spin_lock_irqsave(&iommu->lock, flags);
764 root = &iommu->root_entry[bus];
765 context = get_context_addr_from_root(root);
766 if (!context) {
767 ret = 0;
768 goto out;
769 }
c07e7d21 770 ret = context_present(&context[devfn]);
ba395927
KA
771out:
772 spin_unlock_irqrestore(&iommu->lock, flags);
773 return ret;
774}
775
776static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
777{
778 struct root_entry *root;
779 struct context_entry *context;
780 unsigned long flags;
781
782 spin_lock_irqsave(&iommu->lock, flags);
783 root = &iommu->root_entry[bus];
784 context = get_context_addr_from_root(root);
785 if (context) {
c07e7d21 786 context_clear_entry(&context[devfn]);
ba395927
KA
787 __iommu_flush_cache(iommu, &context[devfn], \
788 sizeof(*context));
789 }
790 spin_unlock_irqrestore(&iommu->lock, flags);
791}
792
793static void free_context_table(struct intel_iommu *iommu)
794{
795 struct root_entry *root;
796 int i;
797 unsigned long flags;
798 struct context_entry *context;
799
800 spin_lock_irqsave(&iommu->lock, flags);
801 if (!iommu->root_entry) {
802 goto out;
803 }
804 for (i = 0; i < ROOT_ENTRY_NR; i++) {
805 root = &iommu->root_entry[i];
806 context = get_context_addr_from_root(root);
807 if (context)
808 free_pgtable_page(context);
809 }
810 free_pgtable_page(iommu->root_entry);
811 iommu->root_entry = NULL;
812out:
813 spin_unlock_irqrestore(&iommu->lock, flags);
814}
815
b026fd28 816static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
5cf0a76f 817 unsigned long pfn, int *target_level)
ba395927 818{
b026fd28 819 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
ba395927
KA
820 struct dma_pte *parent, *pte = NULL;
821 int level = agaw_to_level(domain->agaw);
4399c8bf 822 int offset;
ba395927
KA
823
824 BUG_ON(!domain->pgd);
f9423606
JS
825
826 if (addr_width < BITS_PER_LONG && pfn >> addr_width)
827 /* Address beyond IOMMU's addressing capabilities. */
828 return NULL;
829
ba395927
KA
830 parent = domain->pgd;
831
5cf0a76f 832 while (1) {
ba395927
KA
833 void *tmp_page;
834
b026fd28 835 offset = pfn_level_offset(pfn, level);
ba395927 836 pte = &parent[offset];
5cf0a76f 837 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
6dd9a7c7 838 break;
5cf0a76f 839 if (level == *target_level)
ba395927
KA
840 break;
841
19c239ce 842 if (!dma_pte_present(pte)) {
c85994e4
DW
843 uint64_t pteval;
844
4c923d47 845 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 846
206a73c1 847 if (!tmp_page)
ba395927 848 return NULL;
206a73c1 849
c85994e4 850 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 851 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
effad4b5 852 if (cmpxchg64(&pte->val, 0ULL, pteval))
c85994e4
DW
853 /* Someone else set it while we were thinking; use theirs. */
854 free_pgtable_page(tmp_page);
effad4b5 855 else
c85994e4 856 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927 857 }
5cf0a76f
DW
858 if (level == 1)
859 break;
860
19c239ce 861 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
862 level--;
863 }
864
5cf0a76f
DW
865 if (!*target_level)
866 *target_level = level;
867
ba395927
KA
868 return pte;
869}
870
6dd9a7c7 871
ba395927 872/* return address's pte at specific level */
90dcfb5e
DW
873static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
874 unsigned long pfn,
6dd9a7c7 875 int level, int *large_page)
ba395927
KA
876{
877 struct dma_pte *parent, *pte = NULL;
878 int total = agaw_to_level(domain->agaw);
879 int offset;
880
881 parent = domain->pgd;
882 while (level <= total) {
90dcfb5e 883 offset = pfn_level_offset(pfn, total);
ba395927
KA
884 pte = &parent[offset];
885 if (level == total)
886 return pte;
887
6dd9a7c7
YS
888 if (!dma_pte_present(pte)) {
889 *large_page = total;
ba395927 890 break;
6dd9a7c7
YS
891 }
892
e16922af 893 if (dma_pte_superpage(pte)) {
6dd9a7c7
YS
894 *large_page = total;
895 return pte;
896 }
897
19c239ce 898 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
899 total--;
900 }
901 return NULL;
902}
903
ba395927 904/* clear last level pte, a tlb flush should be followed */
5cf0a76f 905static void dma_pte_clear_range(struct dmar_domain *domain,
595badf5
DW
906 unsigned long start_pfn,
907 unsigned long last_pfn)
ba395927 908{
04b18e65 909 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
6dd9a7c7 910 unsigned int large_page = 1;
310a5ab9 911 struct dma_pte *first_pte, *pte;
66eae846 912
04b18e65 913 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
595badf5 914 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
59c36286 915 BUG_ON(start_pfn > last_pfn);
ba395927 916
04b18e65 917 /* we don't need lock here; nobody else touches the iova range */
59c36286 918 do {
6dd9a7c7
YS
919 large_page = 1;
920 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
310a5ab9 921 if (!pte) {
6dd9a7c7 922 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
310a5ab9
DW
923 continue;
924 }
6dd9a7c7 925 do {
310a5ab9 926 dma_clear_pte(pte);
6dd9a7c7 927 start_pfn += lvl_to_nr_pages(large_page);
310a5ab9 928 pte++;
75e6bf96
DW
929 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
930
310a5ab9
DW
931 domain_flush_cache(domain, first_pte,
932 (void *)pte - (void *)first_pte);
59c36286
DW
933
934 } while (start_pfn && start_pfn <= last_pfn);
ba395927
KA
935}
936
3269ee0b
AW
937static void dma_pte_free_level(struct dmar_domain *domain, int level,
938 struct dma_pte *pte, unsigned long pfn,
939 unsigned long start_pfn, unsigned long last_pfn)
940{
941 pfn = max(start_pfn, pfn);
942 pte = &pte[pfn_level_offset(pfn, level)];
943
944 do {
945 unsigned long level_pfn;
946 struct dma_pte *level_pte;
947
948 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
949 goto next;
950
951 level_pfn = pfn & level_mask(level - 1);
952 level_pte = phys_to_virt(dma_pte_addr(pte));
953
954 if (level > 2)
955 dma_pte_free_level(domain, level - 1, level_pte,
956 level_pfn, start_pfn, last_pfn);
957
958 /* If range covers entire pagetable, free it */
959 if (!(start_pfn > level_pfn ||
08336fd2 960 last_pfn < level_pfn + level_size(level) - 1)) {
3269ee0b
AW
961 dma_clear_pte(pte);
962 domain_flush_cache(domain, pte, sizeof(*pte));
963 free_pgtable_page(level_pte);
964 }
965next:
966 pfn += level_size(level);
967 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
968}
969
ba395927
KA
970/* free page table pages. last level pte should already be cleared */
971static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b
DW
972 unsigned long start_pfn,
973 unsigned long last_pfn)
ba395927 974{
6660c63a 975 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
ba395927 976
6660c63a
DW
977 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
978 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
59c36286 979 BUG_ON(start_pfn > last_pfn);
ba395927 980
f3a0a52f 981 /* We don't need lock here; nobody else touches the iova range */
3269ee0b
AW
982 dma_pte_free_level(domain, agaw_to_level(domain->agaw),
983 domain->pgd, 0, start_pfn, last_pfn);
6660c63a 984
ba395927 985 /* free pgd */
d794dc9b 986 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
987 free_pgtable_page(domain->pgd);
988 domain->pgd = NULL;
989 }
990}
991
ea8ea460
DW
992/* When a page at a given level is being unlinked from its parent, we don't
993 need to *modify* it at all. All we need to do is make a list of all the
994 pages which can be freed just as soon as we've flushed the IOTLB and we
995 know the hardware page-walk will no longer touch them.
996 The 'pte' argument is the *parent* PTE, pointing to the page that is to
997 be freed. */
998static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
999 int level, struct dma_pte *pte,
1000 struct page *freelist)
1001{
1002 struct page *pg;
1003
1004 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1005 pg->freelist = freelist;
1006 freelist = pg;
1007
1008 if (level == 1)
1009 return freelist;
1010
adeb2590
JL
1011 pte = page_address(pg);
1012 do {
ea8ea460
DW
1013 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1014 freelist = dma_pte_list_pagetables(domain, level - 1,
1015 pte, freelist);
adeb2590
JL
1016 pte++;
1017 } while (!first_pte_in_page(pte));
ea8ea460
DW
1018
1019 return freelist;
1020}
1021
1022static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1023 struct dma_pte *pte, unsigned long pfn,
1024 unsigned long start_pfn,
1025 unsigned long last_pfn,
1026 struct page *freelist)
1027{
1028 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1029
1030 pfn = max(start_pfn, pfn);
1031 pte = &pte[pfn_level_offset(pfn, level)];
1032
1033 do {
1034 unsigned long level_pfn;
1035
1036 if (!dma_pte_present(pte))
1037 goto next;
1038
1039 level_pfn = pfn & level_mask(level);
1040
1041 /* If range covers entire pagetable, free it */
1042 if (start_pfn <= level_pfn &&
1043 last_pfn >= level_pfn + level_size(level) - 1) {
1044 /* These suborbinate page tables are going away entirely. Don't
1045 bother to clear them; we're just going to *free* them. */
1046 if (level > 1 && !dma_pte_superpage(pte))
1047 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1048
1049 dma_clear_pte(pte);
1050 if (!first_pte)
1051 first_pte = pte;
1052 last_pte = pte;
1053 } else if (level > 1) {
1054 /* Recurse down into a level that isn't *entirely* obsolete */
1055 freelist = dma_pte_clear_level(domain, level - 1,
1056 phys_to_virt(dma_pte_addr(pte)),
1057 level_pfn, start_pfn, last_pfn,
1058 freelist);
1059 }
1060next:
1061 pfn += level_size(level);
1062 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1063
1064 if (first_pte)
1065 domain_flush_cache(domain, first_pte,
1066 (void *)++last_pte - (void *)first_pte);
1067
1068 return freelist;
1069}
1070
1071/* We can't just free the pages because the IOMMU may still be walking
1072 the page tables, and may have cached the intermediate levels. The
1073 pages can only be freed after the IOTLB flush has been done. */
1074struct page *domain_unmap(struct dmar_domain *domain,
1075 unsigned long start_pfn,
1076 unsigned long last_pfn)
1077{
1078 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1079 struct page *freelist = NULL;
1080
1081 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
1082 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
1083 BUG_ON(start_pfn > last_pfn);
1084
1085 /* we don't need lock here; nobody else touches the iova range */
1086 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1087 domain->pgd, 0, start_pfn, last_pfn, NULL);
1088
1089 /* free pgd */
1090 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1091 struct page *pgd_page = virt_to_page(domain->pgd);
1092 pgd_page->freelist = freelist;
1093 freelist = pgd_page;
1094
1095 domain->pgd = NULL;
1096 }
1097
1098 return freelist;
1099}
1100
1101void dma_free_pagelist(struct page *freelist)
1102{
1103 struct page *pg;
1104
1105 while ((pg = freelist)) {
1106 freelist = pg->freelist;
1107 free_pgtable_page(page_address(pg));
1108 }
1109}
1110
ba395927
KA
1111/* iommu handling */
1112static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1113{
1114 struct root_entry *root;
1115 unsigned long flags;
1116
4c923d47 1117 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ba395927
KA
1118 if (!root)
1119 return -ENOMEM;
1120
5b6985ce 1121 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
1122
1123 spin_lock_irqsave(&iommu->lock, flags);
1124 iommu->root_entry = root;
1125 spin_unlock_irqrestore(&iommu->lock, flags);
1126
1127 return 0;
1128}
1129
ba395927
KA
1130static void iommu_set_root_entry(struct intel_iommu *iommu)
1131{
1132 void *addr;
c416daa9 1133 u32 sts;
ba395927
KA
1134 unsigned long flag;
1135
1136 addr = iommu->root_entry;
1137
1f5b3c3f 1138 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1139 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
1140
c416daa9 1141 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1142
1143 /* Make sure hardware complete it */
1144 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1145 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927 1146
1f5b3c3f 1147 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1148}
1149
1150static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1151{
1152 u32 val;
1153 unsigned long flag;
1154
9af88143 1155 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 1156 return;
ba395927 1157
1f5b3c3f 1158 raw_spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 1159 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1160
1161 /* Make sure hardware complete it */
1162 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1163 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927 1164
1f5b3c3f 1165 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1166}
1167
1168/* return value determine if we need a write buffer flush */
4c25a2c1
DW
1169static void __iommu_flush_context(struct intel_iommu *iommu,
1170 u16 did, u16 source_id, u8 function_mask,
1171 u64 type)
ba395927
KA
1172{
1173 u64 val = 0;
1174 unsigned long flag;
1175
ba395927
KA
1176 switch (type) {
1177 case DMA_CCMD_GLOBAL_INVL:
1178 val = DMA_CCMD_GLOBAL_INVL;
1179 break;
1180 case DMA_CCMD_DOMAIN_INVL:
1181 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1182 break;
1183 case DMA_CCMD_DEVICE_INVL:
1184 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1185 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1186 break;
1187 default:
1188 BUG();
1189 }
1190 val |= DMA_CCMD_ICC;
1191
1f5b3c3f 1192 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1193 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1194
1195 /* Make sure hardware complete it */
1196 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1197 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1198
1f5b3c3f 1199 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1200}
1201
ba395927 1202/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
1203static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1204 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
1205{
1206 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1207 u64 val = 0, val_iva = 0;
1208 unsigned long flag;
1209
ba395927
KA
1210 switch (type) {
1211 case DMA_TLB_GLOBAL_FLUSH:
1212 /* global flush doesn't need set IVA_REG */
1213 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1214 break;
1215 case DMA_TLB_DSI_FLUSH:
1216 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1217 break;
1218 case DMA_TLB_PSI_FLUSH:
1219 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
ea8ea460 1220 /* IH bit is passed in as part of address */
ba395927
KA
1221 val_iva = size_order | addr;
1222 break;
1223 default:
1224 BUG();
1225 }
1226 /* Note: set drain read/write */
1227#if 0
1228 /*
1229 * This is probably to be super secure.. Looks like we can
1230 * ignore it without any impact.
1231 */
1232 if (cap_read_drain(iommu->cap))
1233 val |= DMA_TLB_READ_DRAIN;
1234#endif
1235 if (cap_write_drain(iommu->cap))
1236 val |= DMA_TLB_WRITE_DRAIN;
1237
1f5b3c3f 1238 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1239 /* Note: Only uses first TLB reg currently */
1240 if (val_iva)
1241 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1242 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1243
1244 /* Make sure hardware complete it */
1245 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1246 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1247
1f5b3c3f 1248 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1249
1250 /* check IOTLB invalidation granularity */
1251 if (DMA_TLB_IAIG(val) == 0)
1252 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1253 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1254 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
5b6985ce
FY
1255 (unsigned long long)DMA_TLB_IIRG(type),
1256 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1257}
1258
64ae892b
DW
1259static struct device_domain_info *
1260iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1261 u8 bus, u8 devfn)
93a23a72
YZ
1262{
1263 int found = 0;
1264 unsigned long flags;
1265 struct device_domain_info *info;
0bcb3e28 1266 struct pci_dev *pdev;
93a23a72
YZ
1267
1268 if (!ecap_dev_iotlb_support(iommu->ecap))
1269 return NULL;
1270
1271 if (!iommu->qi)
1272 return NULL;
1273
1274 spin_lock_irqsave(&device_domain_lock, flags);
1275 list_for_each_entry(info, &domain->devices, link)
1276 if (info->bus == bus && info->devfn == devfn) {
1277 found = 1;
1278 break;
1279 }
1280 spin_unlock_irqrestore(&device_domain_lock, flags);
1281
0bcb3e28 1282 if (!found || !info->dev || !dev_is_pci(info->dev))
93a23a72
YZ
1283 return NULL;
1284
0bcb3e28
DW
1285 pdev = to_pci_dev(info->dev);
1286
1287 if (!pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS))
93a23a72
YZ
1288 return NULL;
1289
0bcb3e28 1290 if (!dmar_find_matched_atsr_unit(pdev))
93a23a72
YZ
1291 return NULL;
1292
93a23a72
YZ
1293 return info;
1294}
1295
1296static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1297{
0bcb3e28 1298 if (!info || !dev_is_pci(info->dev))
93a23a72
YZ
1299 return;
1300
0bcb3e28 1301 pci_enable_ats(to_pci_dev(info->dev), VTD_PAGE_SHIFT);
93a23a72
YZ
1302}
1303
1304static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1305{
0bcb3e28
DW
1306 if (!info->dev || !dev_is_pci(info->dev) ||
1307 !pci_ats_enabled(to_pci_dev(info->dev)))
93a23a72
YZ
1308 return;
1309
0bcb3e28 1310 pci_disable_ats(to_pci_dev(info->dev));
93a23a72
YZ
1311}
1312
1313static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1314 u64 addr, unsigned mask)
1315{
1316 u16 sid, qdep;
1317 unsigned long flags;
1318 struct device_domain_info *info;
1319
1320 spin_lock_irqsave(&device_domain_lock, flags);
1321 list_for_each_entry(info, &domain->devices, link) {
0bcb3e28
DW
1322 struct pci_dev *pdev;
1323 if (!info->dev || !dev_is_pci(info->dev))
1324 continue;
1325
1326 pdev = to_pci_dev(info->dev);
1327 if (!pci_ats_enabled(pdev))
93a23a72
YZ
1328 continue;
1329
1330 sid = info->bus << 8 | info->devfn;
0bcb3e28 1331 qdep = pci_ats_queue_depth(pdev);
93a23a72
YZ
1332 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1333 }
1334 spin_unlock_irqrestore(&device_domain_lock, flags);
1335}
1336
1f0ef2aa 1337static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
ea8ea460 1338 unsigned long pfn, unsigned int pages, int ih, int map)
ba395927 1339{
9dd2fe89 1340 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1341 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
ba395927 1342
ba395927
KA
1343 BUG_ON(pages == 0);
1344
ea8ea460
DW
1345 if (ih)
1346 ih = 1 << 6;
ba395927 1347 /*
9dd2fe89
YZ
1348 * Fallback to domain selective flush if no PSI support or the size is
1349 * too big.
ba395927
KA
1350 * PSI requires page size to be 2 ^ x, and the base address is naturally
1351 * aligned to the size
1352 */
9dd2fe89
YZ
1353 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1354 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1355 DMA_TLB_DSI_FLUSH);
9dd2fe89 1356 else
ea8ea460 1357 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
9dd2fe89 1358 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1359
1360 /*
82653633
NA
1361 * In caching mode, changes of pages from non-present to present require
1362 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1363 */
82653633 1364 if (!cap_caching_mode(iommu->cap) || !map)
93a23a72 1365 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
ba395927
KA
1366}
1367
f8bab735 1368static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1369{
1370 u32 pmen;
1371 unsigned long flags;
1372
1f5b3c3f 1373 raw_spin_lock_irqsave(&iommu->register_lock, flags);
f8bab735 1374 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1375 pmen &= ~DMA_PMEN_EPM;
1376 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1377
1378 /* wait for the protected region status bit to clear */
1379 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1380 readl, !(pmen & DMA_PMEN_PRS), pmen);
1381
1f5b3c3f 1382 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
f8bab735 1383}
1384
ba395927
KA
1385static int iommu_enable_translation(struct intel_iommu *iommu)
1386{
1387 u32 sts;
1388 unsigned long flags;
1389
1f5b3c3f 1390 raw_spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1391 iommu->gcmd |= DMA_GCMD_TE;
1392 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1393
1394 /* Make sure hardware complete it */
1395 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1396 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1397
1f5b3c3f 1398 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
ba395927
KA
1399 return 0;
1400}
1401
1402static int iommu_disable_translation(struct intel_iommu *iommu)
1403{
1404 u32 sts;
1405 unsigned long flag;
1406
1f5b3c3f 1407 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1408 iommu->gcmd &= ~DMA_GCMD_TE;
1409 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1410
1411 /* Make sure hardware complete it */
1412 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1413 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927 1414
1f5b3c3f 1415 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1416 return 0;
1417}
1418
3460a6d9 1419
ba395927
KA
1420static int iommu_init_domains(struct intel_iommu *iommu)
1421{
1422 unsigned long ndomains;
1423 unsigned long nlongs;
1424
1425 ndomains = cap_ndoms(iommu->cap);
852bdb04
JL
1426 pr_debug("IOMMU%d: Number of Domains supported <%ld>\n",
1427 iommu->seq_id, ndomains);
ba395927
KA
1428 nlongs = BITS_TO_LONGS(ndomains);
1429
94a91b50
DD
1430 spin_lock_init(&iommu->lock);
1431
ba395927
KA
1432 /* TBD: there might be 64K domains,
1433 * consider other allocation for future chip
1434 */
1435 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1436 if (!iommu->domain_ids) {
852bdb04
JL
1437 pr_err("IOMMU%d: allocating domain id array failed\n",
1438 iommu->seq_id);
ba395927
KA
1439 return -ENOMEM;
1440 }
1441 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1442 GFP_KERNEL);
1443 if (!iommu->domains) {
852bdb04
JL
1444 pr_err("IOMMU%d: allocating domain array failed\n",
1445 iommu->seq_id);
1446 kfree(iommu->domain_ids);
1447 iommu->domain_ids = NULL;
ba395927
KA
1448 return -ENOMEM;
1449 }
1450
1451 /*
1452 * if Caching mode is set, then invalid translations are tagged
1453 * with domainid 0. Hence we need to pre-allocate it.
1454 */
1455 if (cap_caching_mode(iommu->cap))
1456 set_bit(0, iommu->domain_ids);
1457 return 0;
1458}
ba395927 1459
a868e6b7 1460static void free_dmar_iommu(struct intel_iommu *iommu)
ba395927
KA
1461{
1462 struct dmar_domain *domain;
5ced12af 1463 int i, count;
c7151a8d 1464 unsigned long flags;
ba395927 1465
94a91b50 1466 if ((iommu->domains) && (iommu->domain_ids)) {
a45946ab 1467 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
a4eaa86c
JL
1468 /*
1469 * Domain id 0 is reserved for invalid translation
1470 * if hardware supports caching mode.
1471 */
1472 if (cap_caching_mode(iommu->cap) && i == 0)
1473 continue;
1474
94a91b50
DD
1475 domain = iommu->domains[i];
1476 clear_bit(i, iommu->domain_ids);
1477
1478 spin_lock_irqsave(&domain->iommu_lock, flags);
5ced12af
JL
1479 count = --domain->iommu_count;
1480 spin_unlock_irqrestore(&domain->iommu_lock, flags);
92d03cc8
JL
1481 if (count == 0)
1482 domain_exit(domain);
5e98c4b1 1483 }
ba395927
KA
1484 }
1485
1486 if (iommu->gcmd & DMA_GCMD_TE)
1487 iommu_disable_translation(iommu);
1488
ba395927
KA
1489 kfree(iommu->domains);
1490 kfree(iommu->domain_ids);
a868e6b7
JL
1491 iommu->domains = NULL;
1492 iommu->domain_ids = NULL;
ba395927 1493
d9630fe9
WH
1494 g_iommus[iommu->seq_id] = NULL;
1495
ba395927
KA
1496 /* free context mapping */
1497 free_context_table(iommu);
ba395927
KA
1498}
1499
92d03cc8 1500static struct dmar_domain *alloc_domain(bool vm)
ba395927 1501{
92d03cc8
JL
1502 /* domain id for virtual machine, it won't be set in context */
1503 static atomic_t vm_domid = ATOMIC_INIT(0);
ba395927 1504 struct dmar_domain *domain;
ba395927
KA
1505
1506 domain = alloc_domain_mem();
1507 if (!domain)
1508 return NULL;
1509
4c923d47 1510 domain->nid = -1;
92d03cc8 1511 domain->iommu_count = 0;
1b198bb0 1512 memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp));
2c2e2c38 1513 domain->flags = 0;
92d03cc8
JL
1514 spin_lock_init(&domain->iommu_lock);
1515 INIT_LIST_HEAD(&domain->devices);
1516 if (vm) {
1517 domain->id = atomic_inc_return(&vm_domid);
1518 domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
1519 }
2c2e2c38
FY
1520
1521 return domain;
1522}
1523
1524static int iommu_attach_domain(struct dmar_domain *domain,
1525 struct intel_iommu *iommu)
1526{
1527 int num;
1528 unsigned long ndomains;
1529 unsigned long flags;
1530
ba395927
KA
1531 ndomains = cap_ndoms(iommu->cap);
1532
1533 spin_lock_irqsave(&iommu->lock, flags);
2c2e2c38 1534
ba395927
KA
1535 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1536 if (num >= ndomains) {
1537 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927 1538 printk(KERN_ERR "IOMMU: no free domain ids\n");
2c2e2c38 1539 return -ENOMEM;
ba395927
KA
1540 }
1541
ba395927 1542 domain->id = num;
9ebd682e 1543 domain->iommu_count++;
2c2e2c38 1544 set_bit(num, iommu->domain_ids);
1b198bb0 1545 set_bit(iommu->seq_id, domain->iommu_bmp);
ba395927
KA
1546 iommu->domains[num] = domain;
1547 spin_unlock_irqrestore(&iommu->lock, flags);
1548
2c2e2c38 1549 return 0;
ba395927
KA
1550}
1551
2c2e2c38
FY
1552static void iommu_detach_domain(struct dmar_domain *domain,
1553 struct intel_iommu *iommu)
ba395927
KA
1554{
1555 unsigned long flags;
2c2e2c38 1556 int num, ndomains;
ba395927 1557
8c11e798 1558 spin_lock_irqsave(&iommu->lock, flags);
2c2e2c38 1559 ndomains = cap_ndoms(iommu->cap);
a45946ab 1560 for_each_set_bit(num, iommu->domain_ids, ndomains) {
2c2e2c38 1561 if (iommu->domains[num] == domain) {
92d03cc8
JL
1562 clear_bit(num, iommu->domain_ids);
1563 iommu->domains[num] = NULL;
2c2e2c38
FY
1564 break;
1565 }
2c2e2c38 1566 }
8c11e798 1567 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927
KA
1568}
1569
1570static struct iova_domain reserved_iova_list;
8a443df4 1571static struct lock_class_key reserved_rbtree_key;
ba395927 1572
51a63e67 1573static int dmar_init_reserved_ranges(void)
ba395927
KA
1574{
1575 struct pci_dev *pdev = NULL;
1576 struct iova *iova;
1577 int i;
ba395927 1578
f661197e 1579 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
ba395927 1580
8a443df4
MG
1581 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1582 &reserved_rbtree_key);
1583
ba395927
KA
1584 /* IOAPIC ranges shouldn't be accessed by DMA */
1585 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1586 IOVA_PFN(IOAPIC_RANGE_END));
51a63e67 1587 if (!iova) {
ba395927 1588 printk(KERN_ERR "Reserve IOAPIC range failed\n");
51a63e67
JC
1589 return -ENODEV;
1590 }
ba395927
KA
1591
1592 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1593 for_each_pci_dev(pdev) {
1594 struct resource *r;
1595
1596 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1597 r = &pdev->resource[i];
1598 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1599 continue;
1a4a4551
DW
1600 iova = reserve_iova(&reserved_iova_list,
1601 IOVA_PFN(r->start),
1602 IOVA_PFN(r->end));
51a63e67 1603 if (!iova) {
ba395927 1604 printk(KERN_ERR "Reserve iova failed\n");
51a63e67
JC
1605 return -ENODEV;
1606 }
ba395927
KA
1607 }
1608 }
51a63e67 1609 return 0;
ba395927
KA
1610}
1611
1612static void domain_reserve_special_ranges(struct dmar_domain *domain)
1613{
1614 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1615}
1616
1617static inline int guestwidth_to_adjustwidth(int gaw)
1618{
1619 int agaw;
1620 int r = (gaw - 12) % 9;
1621
1622 if (r == 0)
1623 agaw = gaw;
1624 else
1625 agaw = gaw + 9 - r;
1626 if (agaw > 64)
1627 agaw = 64;
1628 return agaw;
1629}
1630
1631static int domain_init(struct dmar_domain *domain, int guest_width)
1632{
1633 struct intel_iommu *iommu;
1634 int adjust_width, agaw;
1635 unsigned long sagaw;
1636
f661197e 1637 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
ba395927
KA
1638 domain_reserve_special_ranges(domain);
1639
1640 /* calculate AGAW */
8c11e798 1641 iommu = domain_get_iommu(domain);
ba395927
KA
1642 if (guest_width > cap_mgaw(iommu->cap))
1643 guest_width = cap_mgaw(iommu->cap);
1644 domain->gaw = guest_width;
1645 adjust_width = guestwidth_to_adjustwidth(guest_width);
1646 agaw = width_to_agaw(adjust_width);
1647 sagaw = cap_sagaw(iommu->cap);
1648 if (!test_bit(agaw, &sagaw)) {
1649 /* hardware doesn't support it, choose a bigger one */
1650 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1651 agaw = find_next_bit(&sagaw, 5, agaw);
1652 if (agaw >= 5)
1653 return -ENODEV;
1654 }
1655 domain->agaw = agaw;
ba395927 1656
8e604097
WH
1657 if (ecap_coherent(iommu->ecap))
1658 domain->iommu_coherency = 1;
1659 else
1660 domain->iommu_coherency = 0;
1661
58c610bd
SY
1662 if (ecap_sc_support(iommu->ecap))
1663 domain->iommu_snooping = 1;
1664 else
1665 domain->iommu_snooping = 0;
1666
214e39aa
DW
1667 if (intel_iommu_superpage)
1668 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1669 else
1670 domain->iommu_superpage = 0;
1671
4c923d47 1672 domain->nid = iommu->node;
c7151a8d 1673
ba395927 1674 /* always allocate the top pgd */
4c923d47 1675 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
ba395927
KA
1676 if (!domain->pgd)
1677 return -ENOMEM;
5b6985ce 1678 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1679 return 0;
1680}
1681
1682static void domain_exit(struct dmar_domain *domain)
1683{
2c2e2c38
FY
1684 struct dmar_drhd_unit *drhd;
1685 struct intel_iommu *iommu;
ea8ea460 1686 struct page *freelist = NULL;
ba395927
KA
1687
1688 /* Domain 0 is reserved, so dont process it */
1689 if (!domain)
1690 return;
1691
7b668357
AW
1692 /* Flush any lazy unmaps that may reference this domain */
1693 if (!intel_iommu_strict)
1694 flush_unmaps_timeout(0);
1695
92d03cc8 1696 /* remove associated devices */
ba395927 1697 domain_remove_dev_info(domain);
92d03cc8 1698
ba395927
KA
1699 /* destroy iovas */
1700 put_iova_domain(&domain->iovad);
ba395927 1701
ea8ea460 1702 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927 1703
92d03cc8 1704 /* clear attached or cached domains */
0e242612 1705 rcu_read_lock();
2c2e2c38 1706 for_each_active_iommu(iommu, drhd)
92d03cc8
JL
1707 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1708 test_bit(iommu->seq_id, domain->iommu_bmp))
2c2e2c38 1709 iommu_detach_domain(domain, iommu);
0e242612 1710 rcu_read_unlock();
2c2e2c38 1711
ea8ea460
DW
1712 dma_free_pagelist(freelist);
1713
ba395927
KA
1714 free_domain_mem(domain);
1715}
1716
64ae892b
DW
1717static int domain_context_mapping_one(struct dmar_domain *domain,
1718 struct intel_iommu *iommu,
1719 u8 bus, u8 devfn, int translation)
ba395927
KA
1720{
1721 struct context_entry *context;
ba395927 1722 unsigned long flags;
ea6606b0
WH
1723 struct dma_pte *pgd;
1724 unsigned long num;
1725 unsigned long ndomains;
1726 int id;
1727 int agaw;
93a23a72 1728 struct device_domain_info *info = NULL;
ba395927
KA
1729
1730 pr_debug("Set context mapping for %02x:%02x.%d\n",
1731 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 1732
ba395927 1733 BUG_ON(!domain->pgd);
4ed0d3e6
FY
1734 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1735 translation != CONTEXT_TT_MULTI_LEVEL);
5331fe6f 1736
ba395927
KA
1737 context = device_to_context_entry(iommu, bus, devfn);
1738 if (!context)
1739 return -ENOMEM;
1740 spin_lock_irqsave(&iommu->lock, flags);
c07e7d21 1741 if (context_present(context)) {
ba395927
KA
1742 spin_unlock_irqrestore(&iommu->lock, flags);
1743 return 0;
1744 }
1745
ea6606b0
WH
1746 id = domain->id;
1747 pgd = domain->pgd;
1748
2c2e2c38
FY
1749 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1750 domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
ea6606b0
WH
1751 int found = 0;
1752
1753 /* find an available domain id for this device in iommu */
1754 ndomains = cap_ndoms(iommu->cap);
a45946ab 1755 for_each_set_bit(num, iommu->domain_ids, ndomains) {
ea6606b0
WH
1756 if (iommu->domains[num] == domain) {
1757 id = num;
1758 found = 1;
1759 break;
1760 }
ea6606b0
WH
1761 }
1762
1763 if (found == 0) {
1764 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1765 if (num >= ndomains) {
1766 spin_unlock_irqrestore(&iommu->lock, flags);
1767 printk(KERN_ERR "IOMMU: no free domain ids\n");
1768 return -EFAULT;
1769 }
1770
1771 set_bit(num, iommu->domain_ids);
1772 iommu->domains[num] = domain;
1773 id = num;
1774 }
1775
1776 /* Skip top levels of page tables for
1777 * iommu which has less agaw than default.
1672af11 1778 * Unnecessary for PT mode.
ea6606b0 1779 */
1672af11
CW
1780 if (translation != CONTEXT_TT_PASS_THROUGH) {
1781 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1782 pgd = phys_to_virt(dma_pte_addr(pgd));
1783 if (!dma_pte_present(pgd)) {
1784 spin_unlock_irqrestore(&iommu->lock, flags);
1785 return -ENOMEM;
1786 }
ea6606b0
WH
1787 }
1788 }
1789 }
1790
1791 context_set_domain_id(context, id);
4ed0d3e6 1792
93a23a72 1793 if (translation != CONTEXT_TT_PASS_THROUGH) {
64ae892b 1794 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
93a23a72
YZ
1795 translation = info ? CONTEXT_TT_DEV_IOTLB :
1796 CONTEXT_TT_MULTI_LEVEL;
1797 }
4ed0d3e6
FY
1798 /*
1799 * In pass through mode, AW must be programmed to indicate the largest
1800 * AGAW value supported by hardware. And ASR is ignored by hardware.
1801 */
93a23a72 1802 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
4ed0d3e6 1803 context_set_address_width(context, iommu->msagaw);
93a23a72
YZ
1804 else {
1805 context_set_address_root(context, virt_to_phys(pgd));
1806 context_set_address_width(context, iommu->agaw);
1807 }
4ed0d3e6
FY
1808
1809 context_set_translation_type(context, translation);
c07e7d21
MM
1810 context_set_fault_enable(context);
1811 context_set_present(context);
5331fe6f 1812 domain_flush_cache(domain, context, sizeof(*context));
ba395927 1813
4c25a2c1
DW
1814 /*
1815 * It's a non-present to present mapping. If hardware doesn't cache
1816 * non-present entry we only need to flush the write-buffer. If the
1817 * _does_ cache non-present entries, then it does so in the special
1818 * domain #0, which we have to flush:
1819 */
1820 if (cap_caching_mode(iommu->cap)) {
1821 iommu->flush.flush_context(iommu, 0,
1822 (((u16)bus) << 8) | devfn,
1823 DMA_CCMD_MASK_NOBIT,
1824 DMA_CCMD_DEVICE_INVL);
82653633 1825 iommu->flush.flush_iotlb(iommu, domain->id, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 1826 } else {
ba395927 1827 iommu_flush_write_buffer(iommu);
4c25a2c1 1828 }
93a23a72 1829 iommu_enable_dev_iotlb(info);
ba395927 1830 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d
WH
1831
1832 spin_lock_irqsave(&domain->iommu_lock, flags);
1b198bb0 1833 if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
c7151a8d 1834 domain->iommu_count++;
4c923d47
SS
1835 if (domain->iommu_count == 1)
1836 domain->nid = iommu->node;
58c610bd 1837 domain_update_iommu_cap(domain);
c7151a8d
WH
1838 }
1839 spin_unlock_irqrestore(&domain->iommu_lock, flags);
ba395927
KA
1840 return 0;
1841}
1842
1843static int
e1f167f3
DW
1844domain_context_mapping(struct dmar_domain *domain, struct device *dev,
1845 int translation)
ba395927
KA
1846{
1847 int ret;
e1f167f3 1848 struct pci_dev *pdev, *tmp, *parent;
64ae892b 1849 struct intel_iommu *iommu;
156baca8 1850 u8 bus, devfn;
64ae892b 1851
e1f167f3 1852 iommu = device_to_iommu(dev, &bus, &devfn);
64ae892b
DW
1853 if (!iommu)
1854 return -ENODEV;
ba395927 1855
156baca8 1856 ret = domain_context_mapping_one(domain, iommu, bus, devfn,
4ed0d3e6 1857 translation);
e1f167f3 1858 if (ret || !dev_is_pci(dev))
ba395927
KA
1859 return ret;
1860
1861 /* dependent device mapping */
e1f167f3 1862 pdev = to_pci_dev(dev);
ba395927
KA
1863 tmp = pci_find_upstream_pcie_bridge(pdev);
1864 if (!tmp)
1865 return 0;
1866 /* Secondary interface's bus number and devfn 0 */
1867 parent = pdev->bus->self;
1868 while (parent != tmp) {
64ae892b 1869 ret = domain_context_mapping_one(domain, iommu,
276dbf99 1870 parent->bus->number,
4ed0d3e6 1871 parent->devfn, translation);
ba395927
KA
1872 if (ret)
1873 return ret;
1874 parent = parent->bus->self;
1875 }
45e829ea 1876 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
64ae892b 1877 return domain_context_mapping_one(domain, iommu,
4ed0d3e6
FY
1878 tmp->subordinate->number, 0,
1879 translation);
ba395927 1880 else /* this is a legacy PCI bridge */
64ae892b 1881 return domain_context_mapping_one(domain, iommu,
276dbf99 1882 tmp->bus->number,
4ed0d3e6
FY
1883 tmp->devfn,
1884 translation);
ba395927
KA
1885}
1886
e1f167f3 1887static int domain_context_mapped(struct device *dev)
ba395927
KA
1888{
1889 int ret;
e1f167f3 1890 struct pci_dev *pdev, *tmp, *parent;
5331fe6f 1891 struct intel_iommu *iommu;
156baca8 1892 u8 bus, devfn;
5331fe6f 1893
e1f167f3 1894 iommu = device_to_iommu(dev, &bus, &devfn);
5331fe6f
WH
1895 if (!iommu)
1896 return -ENODEV;
ba395927 1897
156baca8 1898 ret = device_context_mapped(iommu, bus, devfn);
e1f167f3 1899 if (!ret || !dev_is_pci(dev))
ba395927 1900 return ret;
e1f167f3 1901
ba395927 1902 /* dependent device mapping */
e1f167f3 1903 pdev = to_pci_dev(dev);
ba395927
KA
1904 tmp = pci_find_upstream_pcie_bridge(pdev);
1905 if (!tmp)
1906 return ret;
1907 /* Secondary interface's bus number and devfn 0 */
1908 parent = pdev->bus->self;
1909 while (parent != tmp) {
8c11e798 1910 ret = device_context_mapped(iommu, parent->bus->number,
276dbf99 1911 parent->devfn);
ba395927
KA
1912 if (!ret)
1913 return ret;
1914 parent = parent->bus->self;
1915 }
5f4d91a1 1916 if (pci_is_pcie(tmp))
276dbf99
DW
1917 return device_context_mapped(iommu, tmp->subordinate->number,
1918 0);
ba395927 1919 else
276dbf99
DW
1920 return device_context_mapped(iommu, tmp->bus->number,
1921 tmp->devfn);
ba395927
KA
1922}
1923
f532959b
FY
1924/* Returns a number of VTD pages, but aligned to MM page size */
1925static inline unsigned long aligned_nrpages(unsigned long host_addr,
1926 size_t size)
1927{
1928 host_addr &= ~PAGE_MASK;
1929 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1930}
1931
6dd9a7c7
YS
1932/* Return largest possible superpage level for a given mapping */
1933static inline int hardware_largepage_caps(struct dmar_domain *domain,
1934 unsigned long iov_pfn,
1935 unsigned long phy_pfn,
1936 unsigned long pages)
1937{
1938 int support, level = 1;
1939 unsigned long pfnmerge;
1940
1941 support = domain->iommu_superpage;
1942
1943 /* To use a large page, the virtual *and* physical addresses
1944 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1945 of them will mean we have to use smaller pages. So just
1946 merge them and check both at once. */
1947 pfnmerge = iov_pfn | phy_pfn;
1948
1949 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1950 pages >>= VTD_STRIDE_SHIFT;
1951 if (!pages)
1952 break;
1953 pfnmerge >>= VTD_STRIDE_SHIFT;
1954 level++;
1955 support--;
1956 }
1957 return level;
1958}
1959
9051aa02
DW
1960static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1961 struct scatterlist *sg, unsigned long phys_pfn,
1962 unsigned long nr_pages, int prot)
e1605495
DW
1963{
1964 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 1965 phys_addr_t uninitialized_var(pteval);
e1605495 1966 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
9051aa02 1967 unsigned long sg_res;
6dd9a7c7
YS
1968 unsigned int largepage_lvl = 0;
1969 unsigned long lvl_pages = 0;
e1605495
DW
1970
1971 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1972
1973 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1974 return -EINVAL;
1975
1976 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1977
9051aa02
DW
1978 if (sg)
1979 sg_res = 0;
1980 else {
1981 sg_res = nr_pages + 1;
1982 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1983 }
1984
6dd9a7c7 1985 while (nr_pages > 0) {
c85994e4
DW
1986 uint64_t tmp;
1987
e1605495 1988 if (!sg_res) {
f532959b 1989 sg_res = aligned_nrpages(sg->offset, sg->length);
e1605495
DW
1990 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1991 sg->dma_length = sg->length;
1992 pteval = page_to_phys(sg_page(sg)) | prot;
6dd9a7c7 1993 phys_pfn = pteval >> VTD_PAGE_SHIFT;
e1605495 1994 }
6dd9a7c7 1995
e1605495 1996 if (!pte) {
6dd9a7c7
YS
1997 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
1998
5cf0a76f 1999 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
e1605495
DW
2000 if (!pte)
2001 return -ENOMEM;
6dd9a7c7 2002 /* It is large page*/
6491d4d0 2003 if (largepage_lvl > 1) {
6dd9a7c7 2004 pteval |= DMA_PTE_LARGE_PAGE;
6491d4d0
WD
2005 /* Ensure that old small page tables are removed to make room
2006 for superpage, if they exist. */
2007 dma_pte_clear_range(domain, iov_pfn,
2008 iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
2009 dma_pte_free_pagetable(domain, iov_pfn,
2010 iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
2011 } else {
6dd9a7c7 2012 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
6491d4d0 2013 }
6dd9a7c7 2014
e1605495
DW
2015 }
2016 /* We don't need lock here, nobody else
2017 * touches the iova range
2018 */
7766a3fb 2019 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 2020 if (tmp) {
1bf20f0d 2021 static int dumps = 5;
c85994e4
DW
2022 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2023 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
2024 if (dumps) {
2025 dumps--;
2026 debug_dma_dump_mappings(NULL);
2027 }
2028 WARN_ON(1);
2029 }
6dd9a7c7
YS
2030
2031 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2032
2033 BUG_ON(nr_pages < lvl_pages);
2034 BUG_ON(sg_res < lvl_pages);
2035
2036 nr_pages -= lvl_pages;
2037 iov_pfn += lvl_pages;
2038 phys_pfn += lvl_pages;
2039 pteval += lvl_pages * VTD_PAGE_SIZE;
2040 sg_res -= lvl_pages;
2041
2042 /* If the next PTE would be the first in a new page, then we
2043 need to flush the cache on the entries we've just written.
2044 And then we'll need to recalculate 'pte', so clear it and
2045 let it get set again in the if (!pte) block above.
2046
2047 If we're done (!nr_pages) we need to flush the cache too.
2048
2049 Also if we've been setting superpages, we may need to
2050 recalculate 'pte' and switch back to smaller pages for the
2051 end of the mapping, if the trailing size is not enough to
2052 use another superpage (i.e. sg_res < lvl_pages). */
e1605495 2053 pte++;
6dd9a7c7
YS
2054 if (!nr_pages || first_pte_in_page(pte) ||
2055 (largepage_lvl > 1 && sg_res < lvl_pages)) {
e1605495
DW
2056 domain_flush_cache(domain, first_pte,
2057 (void *)pte - (void *)first_pte);
2058 pte = NULL;
2059 }
6dd9a7c7
YS
2060
2061 if (!sg_res && nr_pages)
e1605495
DW
2062 sg = sg_next(sg);
2063 }
2064 return 0;
2065}
2066
9051aa02
DW
2067static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2068 struct scatterlist *sg, unsigned long nr_pages,
2069 int prot)
ba395927 2070{
9051aa02
DW
2071 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2072}
6f6a00e4 2073
9051aa02
DW
2074static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2075 unsigned long phys_pfn, unsigned long nr_pages,
2076 int prot)
2077{
2078 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
2079}
2080
c7151a8d 2081static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 2082{
c7151a8d
WH
2083 if (!iommu)
2084 return;
8c11e798
WH
2085
2086 clear_context_table(iommu, bus, devfn);
2087 iommu->flush.flush_context(iommu, 0, 0, 0,
4c25a2c1 2088 DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 2089 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
ba395927
KA
2090}
2091
109b9b04
DW
2092static inline void unlink_domain_info(struct device_domain_info *info)
2093{
2094 assert_spin_locked(&device_domain_lock);
2095 list_del(&info->link);
2096 list_del(&info->global);
2097 if (info->dev)
0bcb3e28 2098 info->dev->archdata.iommu = NULL;
109b9b04
DW
2099}
2100
ba395927
KA
2101static void domain_remove_dev_info(struct dmar_domain *domain)
2102{
3a74ca01 2103 struct device_domain_info *info, *tmp;
92d03cc8 2104 unsigned long flags, flags2;
ba395927
KA
2105
2106 spin_lock_irqsave(&device_domain_lock, flags);
3a74ca01 2107 list_for_each_entry_safe(info, tmp, &domain->devices, link) {
109b9b04 2108 unlink_domain_info(info);
ba395927
KA
2109 spin_unlock_irqrestore(&device_domain_lock, flags);
2110
93a23a72 2111 iommu_disable_dev_iotlb(info);
7c7faa11 2112 iommu_detach_dev(info->iommu, info->bus, info->devfn);
ba395927 2113
92d03cc8 2114 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
7c7faa11 2115 iommu_detach_dependent_devices(info->iommu, info->dev);
92d03cc8
JL
2116 /* clear this iommu in iommu_bmp, update iommu count
2117 * and capabilities
2118 */
2119 spin_lock_irqsave(&domain->iommu_lock, flags2);
7c7faa11 2120 if (test_and_clear_bit(info->iommu->seq_id,
92d03cc8
JL
2121 domain->iommu_bmp)) {
2122 domain->iommu_count--;
2123 domain_update_iommu_cap(domain);
2124 }
2125 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
2126 }
2127
2128 free_devinfo_mem(info);
ba395927
KA
2129 spin_lock_irqsave(&device_domain_lock, flags);
2130 }
2131 spin_unlock_irqrestore(&device_domain_lock, flags);
2132}
2133
2134/*
2135 * find_domain
1525a29a 2136 * Note: we use struct device->archdata.iommu stores the info
ba395927 2137 */
1525a29a 2138static struct dmar_domain *find_domain(struct device *dev)
ba395927
KA
2139{
2140 struct device_domain_info *info;
2141
2142 /* No lock here, assumes no domain exit in normal case */
1525a29a 2143 info = dev->archdata.iommu;
ba395927
KA
2144 if (info)
2145 return info->domain;
2146 return NULL;
2147}
2148
5a8f40e8 2149static inline struct device_domain_info *
745f2586
JL
2150dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2151{
2152 struct device_domain_info *info;
2153
2154 list_for_each_entry(info, &device_domain_list, global)
41e80dca 2155 if (info->iommu->segment == segment && info->bus == bus &&
745f2586 2156 info->devfn == devfn)
5a8f40e8 2157 return info;
745f2586
JL
2158
2159 return NULL;
2160}
2161
5a8f40e8 2162static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu,
41e80dca 2163 int bus, int devfn,
b718cd3d
DW
2164 struct device *dev,
2165 struct dmar_domain *domain)
745f2586 2166{
5a8f40e8 2167 struct dmar_domain *found = NULL;
745f2586
JL
2168 struct device_domain_info *info;
2169 unsigned long flags;
2170
2171 info = alloc_devinfo_mem();
2172 if (!info)
b718cd3d 2173 return NULL;
745f2586 2174
745f2586
JL
2175 info->bus = bus;
2176 info->devfn = devfn;
2177 info->dev = dev;
2178 info->domain = domain;
5a8f40e8 2179 info->iommu = iommu;
745f2586
JL
2180 if (!dev)
2181 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
2182
2183 spin_lock_irqsave(&device_domain_lock, flags);
2184 if (dev)
0bcb3e28 2185 found = find_domain(dev);
5a8f40e8
DW
2186 else {
2187 struct device_domain_info *info2;
41e80dca 2188 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
5a8f40e8
DW
2189 if (info2)
2190 found = info2->domain;
2191 }
745f2586
JL
2192 if (found) {
2193 spin_unlock_irqrestore(&device_domain_lock, flags);
2194 free_devinfo_mem(info);
b718cd3d
DW
2195 /* Caller must free the original domain */
2196 return found;
745f2586
JL
2197 }
2198
b718cd3d
DW
2199 list_add(&info->link, &domain->devices);
2200 list_add(&info->global, &device_domain_list);
2201 if (dev)
2202 dev->archdata.iommu = info;
2203 spin_unlock_irqrestore(&device_domain_lock, flags);
2204
2205 return domain;
745f2586
JL
2206}
2207
ba395927 2208/* domain is initialized */
146922ec 2209static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
ba395927 2210{
e85bb5d4 2211 struct dmar_domain *domain, *free = NULL;
5a8f40e8
DW
2212 struct intel_iommu *iommu = NULL;
2213 struct device_domain_info *info;
146922ec 2214 struct pci_dev *dev_tmp = NULL;
ba395927 2215 unsigned long flags;
146922ec 2216 u8 bus, devfn, bridge_bus, bridge_devfn;
ba395927 2217
146922ec 2218 domain = find_domain(dev);
ba395927
KA
2219 if (domain)
2220 return domain;
2221
146922ec
DW
2222 if (dev_is_pci(dev)) {
2223 struct pci_dev *pdev = to_pci_dev(dev);
2224 u16 segment;
276dbf99 2225
146922ec
DW
2226 segment = pci_domain_nr(pdev->bus);
2227 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
2228 if (dev_tmp) {
2229 if (pci_is_pcie(dev_tmp)) {
2230 bridge_bus = dev_tmp->subordinate->number;
2231 bridge_devfn = 0;
2232 } else {
2233 bridge_bus = dev_tmp->bus->number;
2234 bridge_devfn = dev_tmp->devfn;
2235 }
2236 spin_lock_irqsave(&device_domain_lock, flags);
9f05d3fb
DW
2237 info = dmar_search_domain_by_dev_info(segment,
2238 bridge_bus,
2239 bridge_devfn);
146922ec
DW
2240 if (info) {
2241 iommu = info->iommu;
2242 domain = info->domain;
2243 }
2244 spin_unlock_irqrestore(&device_domain_lock, flags);
2245 /* pcie-pci bridge already has a domain, uses it */
2246 if (info)
2247 goto found_domain;
5a8f40e8 2248 }
ba395927
KA
2249 }
2250
146922ec
DW
2251 iommu = device_to_iommu(dev, &bus, &devfn);
2252 if (!iommu)
2253 goto error;
ba395927 2254
146922ec 2255 /* Allocate and initialize new domain for the device */
92d03cc8 2256 domain = alloc_domain(false);
745f2586
JL
2257 if (!domain)
2258 goto error;
2259 if (iommu_attach_domain(domain, iommu)) {
2fe9723d 2260 free_domain_mem(domain);
14d40569 2261 domain = NULL;
ba395927 2262 goto error;
2c2e2c38 2263 }
e85bb5d4
JL
2264 free = domain;
2265 if (domain_init(domain, gaw))
ba395927 2266 goto error;
ba395927
KA
2267
2268 /* register pcie-to-pci device */
2269 if (dev_tmp) {
146922ec
DW
2270 domain = dmar_insert_dev_info(iommu, bridge_bus, bridge_devfn,
2271 NULL, domain);
b718cd3d 2272 if (!domain)
ba395927 2273 goto error;
ba395927
KA
2274 }
2275
2276found_domain:
146922ec 2277 domain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
ba395927 2278error:
b718cd3d 2279 if (free != domain)
e85bb5d4 2280 domain_exit(free);
b718cd3d
DW
2281
2282 return domain;
ba395927
KA
2283}
2284
2c2e2c38 2285static int iommu_identity_mapping;
e0fc7e0b
DW
2286#define IDENTMAP_ALL 1
2287#define IDENTMAP_GFX 2
2288#define IDENTMAP_AZALIA 4
2c2e2c38 2289
b213203e
DW
2290static int iommu_domain_identity_map(struct dmar_domain *domain,
2291 unsigned long long start,
2292 unsigned long long end)
ba395927 2293{
c5395d5c
DW
2294 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2295 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2296
2297 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2298 dma_to_mm_pfn(last_vpfn))) {
ba395927 2299 printk(KERN_ERR "IOMMU: reserve iova failed\n");
b213203e 2300 return -ENOMEM;
ba395927
KA
2301 }
2302
c5395d5c
DW
2303 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2304 start, end, domain->id);
ba395927
KA
2305 /*
2306 * RMRR range might have overlap with physical memory range,
2307 * clear it first
2308 */
c5395d5c 2309 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 2310
c5395d5c
DW
2311 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2312 last_vpfn - first_vpfn + 1,
61df7443 2313 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
2314}
2315
0b9d9753 2316static int iommu_prepare_identity_map(struct device *dev,
b213203e
DW
2317 unsigned long long start,
2318 unsigned long long end)
2319{
2320 struct dmar_domain *domain;
2321 int ret;
2322
0b9d9753 2323 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
b213203e
DW
2324 if (!domain)
2325 return -ENOMEM;
2326
19943b0e
DW
2327 /* For _hardware_ passthrough, don't bother. But for software
2328 passthrough, we do it anyway -- it may indicate a memory
2329 range which is reserved in E820, so which didn't get set
2330 up to start with in si_domain */
2331 if (domain == si_domain && hw_pass_through) {
2332 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
0b9d9753 2333 dev_name(dev), start, end);
19943b0e
DW
2334 return 0;
2335 }
2336
2337 printk(KERN_INFO
2338 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
0b9d9753 2339 dev_name(dev), start, end);
2ff729f5 2340
5595b528
DW
2341 if (end < start) {
2342 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2343 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2344 dmi_get_system_info(DMI_BIOS_VENDOR),
2345 dmi_get_system_info(DMI_BIOS_VERSION),
2346 dmi_get_system_info(DMI_PRODUCT_VERSION));
2347 ret = -EIO;
2348 goto error;
2349 }
2350
2ff729f5
DW
2351 if (end >> agaw_to_width(domain->agaw)) {
2352 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2353 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2354 agaw_to_width(domain->agaw),
2355 dmi_get_system_info(DMI_BIOS_VENDOR),
2356 dmi_get_system_info(DMI_BIOS_VERSION),
2357 dmi_get_system_info(DMI_PRODUCT_VERSION));
2358 ret = -EIO;
2359 goto error;
2360 }
19943b0e 2361
b213203e 2362 ret = iommu_domain_identity_map(domain, start, end);
ba395927
KA
2363 if (ret)
2364 goto error;
2365
2366 /* context entry init */
0b9d9753 2367 ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
b213203e
DW
2368 if (ret)
2369 goto error;
2370
2371 return 0;
2372
2373 error:
ba395927
KA
2374 domain_exit(domain);
2375 return ret;
ba395927
KA
2376}
2377
2378static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
0b9d9753 2379 struct device *dev)
ba395927 2380{
0b9d9753 2381 if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927 2382 return 0;
0b9d9753
DW
2383 return iommu_prepare_identity_map(dev, rmrr->base_address,
2384 rmrr->end_address);
ba395927
KA
2385}
2386
d3f13810 2387#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
49a0429e
KA
2388static inline void iommu_prepare_isa(void)
2389{
2390 struct pci_dev *pdev;
2391 int ret;
2392
2393 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2394 if (!pdev)
2395 return;
2396
c7ab48d2 2397 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
0b9d9753 2398 ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
49a0429e
KA
2399
2400 if (ret)
c7ab48d2
DW
2401 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2402 "floppy might not work\n");
49a0429e 2403
9b27e82d 2404 pci_dev_put(pdev);
49a0429e
KA
2405}
2406#else
2407static inline void iommu_prepare_isa(void)
2408{
2409 return;
2410}
d3f13810 2411#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
49a0429e 2412
2c2e2c38 2413static int md_domain_init(struct dmar_domain *domain, int guest_width);
c7ab48d2 2414
071e1374 2415static int __init si_domain_init(int hw)
2c2e2c38
FY
2416{
2417 struct dmar_drhd_unit *drhd;
2418 struct intel_iommu *iommu;
c7ab48d2 2419 int nid, ret = 0;
2c2e2c38 2420
92d03cc8 2421 si_domain = alloc_domain(false);
2c2e2c38
FY
2422 if (!si_domain)
2423 return -EFAULT;
2424
92d03cc8
JL
2425 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2426
2c2e2c38
FY
2427 for_each_active_iommu(iommu, drhd) {
2428 ret = iommu_attach_domain(si_domain, iommu);
2429 if (ret) {
2430 domain_exit(si_domain);
2431 return -EFAULT;
2432 }
2433 }
2434
2435 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2436 domain_exit(si_domain);
2437 return -EFAULT;
2438 }
2439
9544c003
JL
2440 pr_debug("IOMMU: identity mapping domain is domain %d\n",
2441 si_domain->id);
2c2e2c38 2442
19943b0e
DW
2443 if (hw)
2444 return 0;
2445
c7ab48d2 2446 for_each_online_node(nid) {
5dfe8660
TH
2447 unsigned long start_pfn, end_pfn;
2448 int i;
2449
2450 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2451 ret = iommu_domain_identity_map(si_domain,
2452 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2453 if (ret)
2454 return ret;
2455 }
c7ab48d2
DW
2456 }
2457
2c2e2c38
FY
2458 return 0;
2459}
2460
9b226624 2461static int identity_mapping(struct device *dev)
2c2e2c38
FY
2462{
2463 struct device_domain_info *info;
2464
2465 if (likely(!iommu_identity_mapping))
2466 return 0;
2467
9b226624 2468 info = dev->archdata.iommu;
cb452a40
MT
2469 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2470 return (info->domain == si_domain);
2c2e2c38 2471
2c2e2c38
FY
2472 return 0;
2473}
2474
2475static int domain_add_dev_info(struct dmar_domain *domain,
5913c9bf 2476 struct device *dev, int translation)
2c2e2c38 2477{
0ac72664 2478 struct dmar_domain *ndomain;
5a8f40e8 2479 struct intel_iommu *iommu;
156baca8 2480 u8 bus, devfn;
5fe60f4e 2481 int ret;
2c2e2c38 2482
5913c9bf 2483 iommu = device_to_iommu(dev, &bus, &devfn);
5a8f40e8
DW
2484 if (!iommu)
2485 return -ENODEV;
2486
5913c9bf 2487 ndomain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
0ac72664
DW
2488 if (ndomain != domain)
2489 return -EBUSY;
2c2e2c38 2490
5913c9bf 2491 ret = domain_context_mapping(domain, dev, translation);
e2ad23d0 2492 if (ret) {
5913c9bf 2493 domain_remove_one_dev_info(domain, dev);
e2ad23d0
DW
2494 return ret;
2495 }
2496
2c2e2c38
FY
2497 return 0;
2498}
2499
0b9d9753 2500static bool device_has_rmrr(struct device *dev)
ea2447f7
TM
2501{
2502 struct dmar_rmrr_unit *rmrr;
832bd858 2503 struct device *tmp;
ea2447f7
TM
2504 int i;
2505
0e242612 2506 rcu_read_lock();
ea2447f7 2507 for_each_rmrr_units(rmrr) {
b683b230
JL
2508 /*
2509 * Return TRUE if this RMRR contains the device that
2510 * is passed in.
2511 */
2512 for_each_active_dev_scope(rmrr->devices,
2513 rmrr->devices_cnt, i, tmp)
0b9d9753 2514 if (tmp == dev) {
0e242612 2515 rcu_read_unlock();
ea2447f7 2516 return true;
b683b230 2517 }
ea2447f7 2518 }
0e242612 2519 rcu_read_unlock();
ea2447f7
TM
2520 return false;
2521}
2522
3bdb2591 2523static int iommu_should_identity_map(struct device *dev, int startup)
6941af28 2524{
ea2447f7 2525
3bdb2591
DW
2526 if (dev_is_pci(dev)) {
2527 struct pci_dev *pdev = to_pci_dev(dev);
ea2447f7 2528
3bdb2591
DW
2529 /*
2530 * We want to prevent any device associated with an RMRR from
2531 * getting placed into the SI Domain. This is done because
2532 * problems exist when devices are moved in and out of domains
2533 * and their respective RMRR info is lost. We exempt USB devices
2534 * from this process due to their usage of RMRRs that are known
2535 * to not be needed after BIOS hand-off to OS.
2536 */
2537 if (device_has_rmrr(dev) &&
2538 (pdev->class >> 8) != PCI_CLASS_SERIAL_USB)
2539 return 0;
e0fc7e0b 2540
3bdb2591
DW
2541 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2542 return 1;
e0fc7e0b 2543
3bdb2591
DW
2544 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2545 return 1;
6941af28 2546
3bdb2591 2547 if (!(iommu_identity_mapping & IDENTMAP_ALL))
3dfc813d 2548 return 0;
3bdb2591
DW
2549
2550 /*
2551 * We want to start off with all devices in the 1:1 domain, and
2552 * take them out later if we find they can't access all of memory.
2553 *
2554 * However, we can't do this for PCI devices behind bridges,
2555 * because all PCI devices behind the same bridge will end up
2556 * with the same source-id on their transactions.
2557 *
2558 * Practically speaking, we can't change things around for these
2559 * devices at run-time, because we can't be sure there'll be no
2560 * DMA transactions in flight for any of their siblings.
2561 *
2562 * So PCI devices (unless they're on the root bus) as well as
2563 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2564 * the 1:1 domain, just in _case_ one of their siblings turns out
2565 * not to be able to map all of memory.
2566 */
2567 if (!pci_is_pcie(pdev)) {
2568 if (!pci_is_root_bus(pdev->bus))
2569 return 0;
2570 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2571 return 0;
2572 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
3dfc813d 2573 return 0;
3bdb2591
DW
2574 } else {
2575 if (device_has_rmrr(dev))
2576 return 0;
2577 }
3dfc813d 2578
3bdb2591 2579 /*
3dfc813d 2580 * At boot time, we don't yet know if devices will be 64-bit capable.
3bdb2591 2581 * Assume that they will — if they turn out not to be, then we can
3dfc813d
DW
2582 * take them out of the 1:1 domain later.
2583 */
8fcc5372
CW
2584 if (!startup) {
2585 /*
2586 * If the device's dma_mask is less than the system's memory
2587 * size then this is not a candidate for identity mapping.
2588 */
3bdb2591 2589 u64 dma_mask = *dev->dma_mask;
8fcc5372 2590
3bdb2591
DW
2591 if (dev->coherent_dma_mask &&
2592 dev->coherent_dma_mask < dma_mask)
2593 dma_mask = dev->coherent_dma_mask;
8fcc5372 2594
3bdb2591 2595 return dma_mask >= dma_get_required_mask(dev);
8fcc5372 2596 }
6941af28
DW
2597
2598 return 1;
2599}
2600
cf04eee8
DW
2601static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2602{
2603 int ret;
2604
2605 if (!iommu_should_identity_map(dev, 1))
2606 return 0;
2607
2608 ret = domain_add_dev_info(si_domain, dev,
2609 hw ? CONTEXT_TT_PASS_THROUGH :
2610 CONTEXT_TT_MULTI_LEVEL);
2611 if (!ret)
2612 pr_info("IOMMU: %s identity mapping for device %s\n",
2613 hw ? "hardware" : "software", dev_name(dev));
2614 else if (ret == -ENODEV)
2615 /* device not associated with an iommu */
2616 ret = 0;
2617
2618 return ret;
2619}
2620
2621
071e1374 2622static int __init iommu_prepare_static_identity_mapping(int hw)
2c2e2c38 2623{
2c2e2c38 2624 struct pci_dev *pdev = NULL;
cf04eee8
DW
2625 struct dmar_drhd_unit *drhd;
2626 struct intel_iommu *iommu;
2627 struct device *dev;
2628 int i;
2629 int ret = 0;
2c2e2c38 2630
19943b0e 2631 ret = si_domain_init(hw);
2c2e2c38
FY
2632 if (ret)
2633 return -EFAULT;
2634
2c2e2c38 2635 for_each_pci_dev(pdev) {
cf04eee8
DW
2636 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2637 if (ret)
2638 return ret;
2639 }
2640
2641 for_each_active_iommu(iommu, drhd)
2642 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2643 struct acpi_device_physical_node *pn;
2644 struct acpi_device *adev;
2645
2646 if (dev->bus != &acpi_bus_type)
2647 continue;
2648
2649 adev= to_acpi_device(dev);
2650 mutex_lock(&adev->physical_node_lock);
2651 list_for_each_entry(pn, &adev->physical_node_list, node) {
2652 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2653 if (ret)
2654 break;
eae460b6 2655 }
cf04eee8
DW
2656 mutex_unlock(&adev->physical_node_lock);
2657 if (ret)
2658 return ret;
62edf5dc 2659 }
2c2e2c38
FY
2660
2661 return 0;
2662}
2663
b779260b 2664static int __init init_dmars(void)
ba395927
KA
2665{
2666 struct dmar_drhd_unit *drhd;
2667 struct dmar_rmrr_unit *rmrr;
832bd858 2668 struct device *dev;
ba395927 2669 struct intel_iommu *iommu;
9d783ba0 2670 int i, ret;
2c2e2c38 2671
ba395927
KA
2672 /*
2673 * for each drhd
2674 * allocate root
2675 * initialize and program root entry to not present
2676 * endfor
2677 */
2678 for_each_drhd_unit(drhd) {
5e0d2a6f 2679 /*
2680 * lock not needed as this is only incremented in the single
2681 * threaded kernel __init code path all other access are read
2682 * only
2683 */
1b198bb0
MT
2684 if (g_num_of_iommus < IOMMU_UNITS_SUPPORTED) {
2685 g_num_of_iommus++;
2686 continue;
2687 }
2688 printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n",
2689 IOMMU_UNITS_SUPPORTED);
5e0d2a6f 2690 }
2691
d9630fe9
WH
2692 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2693 GFP_KERNEL);
2694 if (!g_iommus) {
2695 printk(KERN_ERR "Allocating global iommu array failed\n");
2696 ret = -ENOMEM;
2697 goto error;
2698 }
2699
80b20dd8 2700 deferred_flush = kzalloc(g_num_of_iommus *
2701 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2702 if (!deferred_flush) {
5e0d2a6f 2703 ret = -ENOMEM;
989d51fc 2704 goto free_g_iommus;
5e0d2a6f 2705 }
2706
7c919779 2707 for_each_active_iommu(iommu, drhd) {
d9630fe9 2708 g_iommus[iommu->seq_id] = iommu;
ba395927 2709
e61d98d8
SS
2710 ret = iommu_init_domains(iommu);
2711 if (ret)
989d51fc 2712 goto free_iommu;
e61d98d8 2713
ba395927
KA
2714 /*
2715 * TBD:
2716 * we could share the same root & context tables
25985edc 2717 * among all IOMMU's. Need to Split it later.
ba395927
KA
2718 */
2719 ret = iommu_alloc_root_entry(iommu);
2720 if (ret) {
2721 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
989d51fc 2722 goto free_iommu;
ba395927 2723 }
4ed0d3e6 2724 if (!ecap_pass_through(iommu->ecap))
19943b0e 2725 hw_pass_through = 0;
ba395927
KA
2726 }
2727
1531a6a6
SS
2728 /*
2729 * Start from the sane iommu hardware state.
2730 */
7c919779 2731 for_each_active_iommu(iommu, drhd) {
1531a6a6
SS
2732 /*
2733 * If the queued invalidation is already initialized by us
2734 * (for example, while enabling interrupt-remapping) then
2735 * we got the things already rolling from a sane state.
2736 */
2737 if (iommu->qi)
2738 continue;
2739
2740 /*
2741 * Clear any previous faults.
2742 */
2743 dmar_fault(-1, iommu);
2744 /*
2745 * Disable queued invalidation if supported and already enabled
2746 * before OS handover.
2747 */
2748 dmar_disable_qi(iommu);
2749 }
2750
7c919779 2751 for_each_active_iommu(iommu, drhd) {
a77b67d4
YS
2752 if (dmar_enable_qi(iommu)) {
2753 /*
2754 * Queued Invalidate not enabled, use Register Based
2755 * Invalidate
2756 */
2757 iommu->flush.flush_context = __iommu_flush_context;
2758 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
680a7524 2759 printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
b4e0f9eb 2760 "invalidation\n",
680a7524 2761 iommu->seq_id,
b4e0f9eb 2762 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2763 } else {
2764 iommu->flush.flush_context = qi_flush_context;
2765 iommu->flush.flush_iotlb = qi_flush_iotlb;
680a7524 2766 printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
b4e0f9eb 2767 "invalidation\n",
680a7524 2768 iommu->seq_id,
b4e0f9eb 2769 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2770 }
2771 }
2772
19943b0e 2773 if (iommu_pass_through)
e0fc7e0b
DW
2774 iommu_identity_mapping |= IDENTMAP_ALL;
2775
d3f13810 2776#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
e0fc7e0b 2777 iommu_identity_mapping |= IDENTMAP_GFX;
19943b0e 2778#endif
e0fc7e0b
DW
2779
2780 check_tylersburg_isoch();
2781
ba395927 2782 /*
19943b0e
DW
2783 * If pass through is not set or not enabled, setup context entries for
2784 * identity mappings for rmrr, gfx, and isa and may fall back to static
2785 * identity mapping if iommu_identity_mapping is set.
ba395927 2786 */
19943b0e
DW
2787 if (iommu_identity_mapping) {
2788 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
4ed0d3e6 2789 if (ret) {
19943b0e 2790 printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
989d51fc 2791 goto free_iommu;
ba395927
KA
2792 }
2793 }
ba395927 2794 /*
19943b0e
DW
2795 * For each rmrr
2796 * for each dev attached to rmrr
2797 * do
2798 * locate drhd for dev, alloc domain for dev
2799 * allocate free domain
2800 * allocate page table entries for rmrr
2801 * if context not allocated for bus
2802 * allocate and init context
2803 * set present in root table for this bus
2804 * init context with domain, translation etc
2805 * endfor
2806 * endfor
ba395927 2807 */
19943b0e
DW
2808 printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2809 for_each_rmrr_units(rmrr) {
b683b230
JL
2810 /* some BIOS lists non-exist devices in DMAR table. */
2811 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
832bd858 2812 i, dev) {
0b9d9753 2813 ret = iommu_prepare_rmrr_dev(rmrr, dev);
19943b0e
DW
2814 if (ret)
2815 printk(KERN_ERR
2816 "IOMMU: mapping reserved region failed\n");
ba395927 2817 }
4ed0d3e6 2818 }
49a0429e 2819
19943b0e
DW
2820 iommu_prepare_isa();
2821
ba395927
KA
2822 /*
2823 * for each drhd
2824 * enable fault log
2825 * global invalidate context cache
2826 * global invalidate iotlb
2827 * enable translation
2828 */
7c919779 2829 for_each_iommu(iommu, drhd) {
51a63e67
JC
2830 if (drhd->ignored) {
2831 /*
2832 * we always have to disable PMRs or DMA may fail on
2833 * this device
2834 */
2835 if (force_on)
7c919779 2836 iommu_disable_protect_mem_regions(iommu);
ba395927 2837 continue;
51a63e67 2838 }
ba395927
KA
2839
2840 iommu_flush_write_buffer(iommu);
2841
3460a6d9
KA
2842 ret = dmar_set_interrupt(iommu);
2843 if (ret)
989d51fc 2844 goto free_iommu;
3460a6d9 2845
ba395927
KA
2846 iommu_set_root_entry(iommu);
2847
4c25a2c1 2848 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 2849 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
f8bab735 2850
ba395927
KA
2851 ret = iommu_enable_translation(iommu);
2852 if (ret)
989d51fc 2853 goto free_iommu;
b94996c9
DW
2854
2855 iommu_disable_protect_mem_regions(iommu);
ba395927
KA
2856 }
2857
2858 return 0;
989d51fc
JL
2859
2860free_iommu:
7c919779 2861 for_each_active_iommu(iommu, drhd)
a868e6b7 2862 free_dmar_iommu(iommu);
9bdc531e 2863 kfree(deferred_flush);
989d51fc 2864free_g_iommus:
d9630fe9 2865 kfree(g_iommus);
989d51fc 2866error:
ba395927
KA
2867 return ret;
2868}
2869
5a5e02a6 2870/* This takes a number of _MM_ pages, not VTD pages */
875764de
DW
2871static struct iova *intel_alloc_iova(struct device *dev,
2872 struct dmar_domain *domain,
2873 unsigned long nrpages, uint64_t dma_mask)
ba395927 2874{
ba395927 2875 struct iova *iova = NULL;
ba395927 2876
875764de
DW
2877 /* Restrict dma_mask to the width that the iommu can handle */
2878 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2879
2880 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
2881 /*
2882 * First try to allocate an io virtual address in
284901a9 2883 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 2884 * from higher range
ba395927 2885 */
875764de
DW
2886 iova = alloc_iova(&domain->iovad, nrpages,
2887 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2888 if (iova)
2889 return iova;
2890 }
2891 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2892 if (unlikely(!iova)) {
2893 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
207e3592 2894 nrpages, dev_name(dev));
f76aec76
KA
2895 return NULL;
2896 }
2897
2898 return iova;
2899}
2900
d4b709f4 2901static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
f76aec76
KA
2902{
2903 struct dmar_domain *domain;
2904 int ret;
2905
d4b709f4 2906 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
f76aec76 2907 if (!domain) {
d4b709f4
DW
2908 printk(KERN_ERR "Allocating domain for %s failed",
2909 dev_name(dev));
4fe05bbc 2910 return NULL;
ba395927
KA
2911 }
2912
2913 /* make sure context mapping is ok */
d4b709f4
DW
2914 if (unlikely(!domain_context_mapped(dev))) {
2915 ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
f76aec76 2916 if (ret) {
d4b709f4
DW
2917 printk(KERN_ERR "Domain context map for %s failed",
2918 dev_name(dev));
4fe05bbc 2919 return NULL;
f76aec76 2920 }
ba395927
KA
2921 }
2922
f76aec76
KA
2923 return domain;
2924}
2925
d4b709f4 2926static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
147202aa
DW
2927{
2928 struct device_domain_info *info;
2929
2930 /* No lock here, assumes no domain exit in normal case */
d4b709f4 2931 info = dev->archdata.iommu;
147202aa
DW
2932 if (likely(info))
2933 return info->domain;
2934
2935 return __get_valid_domain_for_dev(dev);
2936}
2937
3d89194a 2938static int iommu_dummy(struct device *dev)
2c2e2c38 2939{
3d89194a 2940 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2c2e2c38
FY
2941}
2942
ecb509ec 2943/* Check if the dev needs to go through non-identity map and unmap process.*/
73676832 2944static int iommu_no_mapping(struct device *dev)
2c2e2c38
FY
2945{
2946 int found;
2947
3d89194a 2948 if (iommu_dummy(dev))
1e4c64c4
DW
2949 return 1;
2950
2c2e2c38 2951 if (!iommu_identity_mapping)
1e4c64c4 2952 return 0;
2c2e2c38 2953
9b226624 2954 found = identity_mapping(dev);
2c2e2c38 2955 if (found) {
ecb509ec 2956 if (iommu_should_identity_map(dev, 0))
2c2e2c38
FY
2957 return 1;
2958 else {
2959 /*
2960 * 32 bit DMA is removed from si_domain and fall back
2961 * to non-identity mapping.
2962 */
bf9c9eda 2963 domain_remove_one_dev_info(si_domain, dev);
2c2e2c38 2964 printk(KERN_INFO "32bit %s uses non-identity mapping\n",
ecb509ec 2965 dev_name(dev));
2c2e2c38
FY
2966 return 0;
2967 }
2968 } else {
2969 /*
2970 * In case of a detached 64 bit DMA device from vm, the device
2971 * is put into si_domain for identity mapping.
2972 */
ecb509ec 2973 if (iommu_should_identity_map(dev, 0)) {
2c2e2c38 2974 int ret;
5913c9bf 2975 ret = domain_add_dev_info(si_domain, dev,
5fe60f4e
DW
2976 hw_pass_through ?
2977 CONTEXT_TT_PASS_THROUGH :
2978 CONTEXT_TT_MULTI_LEVEL);
2c2e2c38
FY
2979 if (!ret) {
2980 printk(KERN_INFO "64bit %s uses identity mapping\n",
ecb509ec 2981 dev_name(dev));
2c2e2c38
FY
2982 return 1;
2983 }
2984 }
2985 }
2986
1e4c64c4 2987 return 0;
2c2e2c38
FY
2988}
2989
5040a918 2990static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
bb9e6d65 2991 size_t size, int dir, u64 dma_mask)
f76aec76 2992{
f76aec76 2993 struct dmar_domain *domain;
5b6985ce 2994 phys_addr_t start_paddr;
f76aec76
KA
2995 struct iova *iova;
2996 int prot = 0;
6865f0d1 2997 int ret;
8c11e798 2998 struct intel_iommu *iommu;
33041ec0 2999 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
3000
3001 BUG_ON(dir == DMA_NONE);
2c2e2c38 3002
5040a918 3003 if (iommu_no_mapping(dev))
6865f0d1 3004 return paddr;
f76aec76 3005
5040a918 3006 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3007 if (!domain)
3008 return 0;
3009
8c11e798 3010 iommu = domain_get_iommu(domain);
88cb6a74 3011 size = aligned_nrpages(paddr, size);
f76aec76 3012
5040a918 3013 iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
f76aec76
KA
3014 if (!iova)
3015 goto error;
3016
ba395927
KA
3017 /*
3018 * Check if DMAR supports zero-length reads on write only
3019 * mappings..
3020 */
3021 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3022 !cap_zlr(iommu->cap))
ba395927
KA
3023 prot |= DMA_PTE_READ;
3024 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3025 prot |= DMA_PTE_WRITE;
3026 /*
6865f0d1 3027 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 3028 * page. Note: if two part of one page are separately mapped, we
6865f0d1 3029 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
3030 * is not a big problem
3031 */
0ab36de2 3032 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
33041ec0 3033 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
3034 if (ret)
3035 goto error;
3036
1f0ef2aa
DW
3037 /* it's a non-present to present mapping. Only flush if caching mode */
3038 if (cap_caching_mode(iommu->cap))
ea8ea460 3039 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 0, 1);
1f0ef2aa 3040 else
8c11e798 3041 iommu_flush_write_buffer(iommu);
f76aec76 3042
03d6a246
DW
3043 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
3044 start_paddr += paddr & ~PAGE_MASK;
3045 return start_paddr;
ba395927 3046
ba395927 3047error:
f76aec76
KA
3048 if (iova)
3049 __free_iova(&domain->iovad, iova);
4cf2e75d 3050 printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
5040a918 3051 dev_name(dev), size, (unsigned long long)paddr, dir);
ba395927
KA
3052 return 0;
3053}
3054
ffbbef5c
FT
3055static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3056 unsigned long offset, size_t size,
3057 enum dma_data_direction dir,
3058 struct dma_attrs *attrs)
bb9e6d65 3059{
ffbbef5c 3060 return __intel_map_single(dev, page_to_phys(page) + offset, size,
46333e37 3061 dir, *dev->dma_mask);
bb9e6d65
FT
3062}
3063
5e0d2a6f 3064static void flush_unmaps(void)
3065{
80b20dd8 3066 int i, j;
5e0d2a6f 3067
5e0d2a6f 3068 timer_on = 0;
3069
3070 /* just flush them all */
3071 for (i = 0; i < g_num_of_iommus; i++) {
a2bb8459
WH
3072 struct intel_iommu *iommu = g_iommus[i];
3073 if (!iommu)
3074 continue;
c42d9f32 3075
9dd2fe89
YZ
3076 if (!deferred_flush[i].next)
3077 continue;
3078
78d5f0f5
NA
3079 /* In caching mode, global flushes turn emulation expensive */
3080 if (!cap_caching_mode(iommu->cap))
3081 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
93a23a72 3082 DMA_TLB_GLOBAL_FLUSH);
9dd2fe89 3083 for (j = 0; j < deferred_flush[i].next; j++) {
93a23a72
YZ
3084 unsigned long mask;
3085 struct iova *iova = deferred_flush[i].iova[j];
78d5f0f5
NA
3086 struct dmar_domain *domain = deferred_flush[i].domain[j];
3087
3088 /* On real hardware multiple invalidations are expensive */
3089 if (cap_caching_mode(iommu->cap))
3090 iommu_flush_iotlb_psi(iommu, domain->id,
ea8ea460
DW
3091 iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1,
3092 !deferred_flush[i].freelist[j], 0);
78d5f0f5
NA
3093 else {
3094 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
3095 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
3096 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
3097 }
93a23a72 3098 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
ea8ea460
DW
3099 if (deferred_flush[i].freelist[j])
3100 dma_free_pagelist(deferred_flush[i].freelist[j]);
80b20dd8 3101 }
9dd2fe89 3102 deferred_flush[i].next = 0;
5e0d2a6f 3103 }
3104
5e0d2a6f 3105 list_size = 0;
5e0d2a6f 3106}
3107
3108static void flush_unmaps_timeout(unsigned long data)
3109{
80b20dd8 3110 unsigned long flags;
3111
3112 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 3113 flush_unmaps();
80b20dd8 3114 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 3115}
3116
ea8ea460 3117static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)
5e0d2a6f 3118{
3119 unsigned long flags;
80b20dd8 3120 int next, iommu_id;
8c11e798 3121 struct intel_iommu *iommu;
5e0d2a6f 3122
3123 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 3124 if (list_size == HIGH_WATER_MARK)
3125 flush_unmaps();
3126
8c11e798
WH
3127 iommu = domain_get_iommu(dom);
3128 iommu_id = iommu->seq_id;
c42d9f32 3129
80b20dd8 3130 next = deferred_flush[iommu_id].next;
3131 deferred_flush[iommu_id].domain[next] = dom;
3132 deferred_flush[iommu_id].iova[next] = iova;
ea8ea460 3133 deferred_flush[iommu_id].freelist[next] = freelist;
80b20dd8 3134 deferred_flush[iommu_id].next++;
5e0d2a6f 3135
3136 if (!timer_on) {
3137 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
3138 timer_on = 1;
3139 }
3140 list_size++;
3141 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3142}
3143
ffbbef5c
FT
3144static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3145 size_t size, enum dma_data_direction dir,
3146 struct dma_attrs *attrs)
ba395927 3147{
f76aec76 3148 struct dmar_domain *domain;
d794dc9b 3149 unsigned long start_pfn, last_pfn;
ba395927 3150 struct iova *iova;
8c11e798 3151 struct intel_iommu *iommu;
ea8ea460 3152 struct page *freelist;
ba395927 3153
73676832 3154 if (iommu_no_mapping(dev))
f76aec76 3155 return;
2c2e2c38 3156
1525a29a 3157 domain = find_domain(dev);
ba395927
KA
3158 BUG_ON(!domain);
3159
8c11e798
WH
3160 iommu = domain_get_iommu(domain);
3161
ba395927 3162 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
85b98276
DW
3163 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
3164 (unsigned long long)dev_addr))
ba395927 3165 return;
ba395927 3166
d794dc9b
DW
3167 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3168 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
ba395927 3169
d794dc9b 3170 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
207e3592 3171 dev_name(dev), start_pfn, last_pfn);
ba395927 3172
ea8ea460 3173 freelist = domain_unmap(domain, start_pfn, last_pfn);
d794dc9b 3174
5e0d2a6f 3175 if (intel_iommu_strict) {
03d6a246 3176 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
ea8ea460 3177 last_pfn - start_pfn + 1, !freelist, 0);
5e0d2a6f 3178 /* free iova */
3179 __free_iova(&domain->iovad, iova);
ea8ea460 3180 dma_free_pagelist(freelist);
5e0d2a6f 3181 } else {
ea8ea460 3182 add_unmap(domain, iova, freelist);
5e0d2a6f 3183 /*
3184 * queue up the release of the unmap to save the 1/6th of the
3185 * cpu used up by the iotlb flush operation...
3186 */
5e0d2a6f 3187 }
ba395927
KA
3188}
3189
5040a918 3190static void *intel_alloc_coherent(struct device *dev, size_t size,
baa676fc
AP
3191 dma_addr_t *dma_handle, gfp_t flags,
3192 struct dma_attrs *attrs)
ba395927 3193{
36746436 3194 struct page *page = NULL;
ba395927
KA
3195 int order;
3196
5b6985ce 3197 size = PAGE_ALIGN(size);
ba395927 3198 order = get_order(size);
e8bb910d 3199
5040a918 3200 if (!iommu_no_mapping(dev))
e8bb910d 3201 flags &= ~(GFP_DMA | GFP_DMA32);
5040a918
DW
3202 else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3203 if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
e8bb910d
AW
3204 flags |= GFP_DMA;
3205 else
3206 flags |= GFP_DMA32;
3207 }
ba395927 3208
36746436
AM
3209 if (flags & __GFP_WAIT) {
3210 unsigned int count = size >> PAGE_SHIFT;
3211
3212 page = dma_alloc_from_contiguous(dev, count, order);
3213 if (page && iommu_no_mapping(dev) &&
3214 page_to_phys(page) + size > dev->coherent_dma_mask) {
3215 dma_release_from_contiguous(dev, page, count);
3216 page = NULL;
3217 }
3218 }
3219
3220 if (!page)
3221 page = alloc_pages(flags, order);
3222 if (!page)
ba395927 3223 return NULL;
36746436 3224 memset(page_address(page), 0, size);
ba395927 3225
36746436 3226 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
bb9e6d65 3227 DMA_BIDIRECTIONAL,
5040a918 3228 dev->coherent_dma_mask);
ba395927 3229 if (*dma_handle)
36746436
AM
3230 return page_address(page);
3231 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3232 __free_pages(page, order);
3233
ba395927
KA
3234 return NULL;
3235}
3236
5040a918 3237static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
baa676fc 3238 dma_addr_t dma_handle, struct dma_attrs *attrs)
ba395927
KA
3239{
3240 int order;
36746436 3241 struct page *page = virt_to_page(vaddr);
ba395927 3242
5b6985ce 3243 size = PAGE_ALIGN(size);
ba395927
KA
3244 order = get_order(size);
3245
5040a918 3246 intel_unmap_page(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
36746436
AM
3247 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3248 __free_pages(page, order);
ba395927
KA
3249}
3250
5040a918 3251static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
d7ab5c46
FT
3252 int nelems, enum dma_data_direction dir,
3253 struct dma_attrs *attrs)
ba395927 3254{
ba395927 3255 struct dmar_domain *domain;
d794dc9b 3256 unsigned long start_pfn, last_pfn;
f76aec76 3257 struct iova *iova;
8c11e798 3258 struct intel_iommu *iommu;
ea8ea460 3259 struct page *freelist;
ba395927 3260
5040a918 3261 if (iommu_no_mapping(dev))
ba395927
KA
3262 return;
3263
5040a918 3264 domain = find_domain(dev);
8c11e798
WH
3265 BUG_ON(!domain);
3266
3267 iommu = domain_get_iommu(domain);
ba395927 3268
c03ab37c 3269 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
85b98276
DW
3270 if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
3271 (unsigned long long)sglist[0].dma_address))
f76aec76 3272 return;
f76aec76 3273
d794dc9b
DW
3274 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3275 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
f76aec76 3276
ea8ea460 3277 freelist = domain_unmap(domain, start_pfn, last_pfn);
f76aec76 3278
acea0018
DW
3279 if (intel_iommu_strict) {
3280 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
ea8ea460 3281 last_pfn - start_pfn + 1, !freelist, 0);
acea0018
DW
3282 /* free iova */
3283 __free_iova(&domain->iovad, iova);
ea8ea460 3284 dma_free_pagelist(freelist);
acea0018 3285 } else {
ea8ea460 3286 add_unmap(domain, iova, freelist);
acea0018
DW
3287 /*
3288 * queue up the release of the unmap to save the 1/6th of the
3289 * cpu used up by the iotlb flush operation...
3290 */
3291 }
ba395927
KA
3292}
3293
ba395927 3294static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 3295 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
3296{
3297 int i;
c03ab37c 3298 struct scatterlist *sg;
ba395927 3299
c03ab37c 3300 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 3301 BUG_ON(!sg_page(sg));
4cf2e75d 3302 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
c03ab37c 3303 sg->dma_length = sg->length;
ba395927
KA
3304 }
3305 return nelems;
3306}
3307
5040a918 3308static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
d7ab5c46 3309 enum dma_data_direction dir, struct dma_attrs *attrs)
ba395927 3310{
ba395927 3311 int i;
ba395927 3312 struct dmar_domain *domain;
f76aec76
KA
3313 size_t size = 0;
3314 int prot = 0;
f76aec76
KA
3315 struct iova *iova = NULL;
3316 int ret;
c03ab37c 3317 struct scatterlist *sg;
b536d24d 3318 unsigned long start_vpfn;
8c11e798 3319 struct intel_iommu *iommu;
ba395927
KA
3320
3321 BUG_ON(dir == DMA_NONE);
5040a918
DW
3322 if (iommu_no_mapping(dev))
3323 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
ba395927 3324
5040a918 3325 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3326 if (!domain)
3327 return 0;
3328
8c11e798
WH
3329 iommu = domain_get_iommu(domain);
3330
b536d24d 3331 for_each_sg(sglist, sg, nelems, i)
88cb6a74 3332 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 3333
5040a918
DW
3334 iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3335 *dev->dma_mask);
f76aec76 3336 if (!iova) {
c03ab37c 3337 sglist->dma_length = 0;
f76aec76
KA
3338 return 0;
3339 }
3340
3341 /*
3342 * Check if DMAR supports zero-length reads on write only
3343 * mappings..
3344 */
3345 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3346 !cap_zlr(iommu->cap))
f76aec76
KA
3347 prot |= DMA_PTE_READ;
3348 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3349 prot |= DMA_PTE_WRITE;
3350
b536d24d 3351 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
e1605495 3352
f532959b 3353 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495
DW
3354 if (unlikely(ret)) {
3355 /* clear the page */
3356 dma_pte_clear_range(domain, start_vpfn,
3357 start_vpfn + size - 1);
3358 /* free page tables */
3359 dma_pte_free_pagetable(domain, start_vpfn,
3360 start_vpfn + size - 1);
3361 /* free iova */
3362 __free_iova(&domain->iovad, iova);
3363 return 0;
ba395927
KA
3364 }
3365
1f0ef2aa
DW
3366 /* it's a non-present to present mapping. Only flush if caching mode */
3367 if (cap_caching_mode(iommu->cap))
ea8ea460 3368 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 0, 1);
1f0ef2aa 3369 else
8c11e798 3370 iommu_flush_write_buffer(iommu);
1f0ef2aa 3371
ba395927
KA
3372 return nelems;
3373}
3374
dfb805e8
FT
3375static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3376{
3377 return !dma_addr;
3378}
3379
160c1d8e 3380struct dma_map_ops intel_dma_ops = {
baa676fc
AP
3381 .alloc = intel_alloc_coherent,
3382 .free = intel_free_coherent,
ba395927
KA
3383 .map_sg = intel_map_sg,
3384 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
3385 .map_page = intel_map_page,
3386 .unmap_page = intel_unmap_page,
dfb805e8 3387 .mapping_error = intel_mapping_error,
ba395927
KA
3388};
3389
3390static inline int iommu_domain_cache_init(void)
3391{
3392 int ret = 0;
3393
3394 iommu_domain_cache = kmem_cache_create("iommu_domain",
3395 sizeof(struct dmar_domain),
3396 0,
3397 SLAB_HWCACHE_ALIGN,
3398
3399 NULL);
3400 if (!iommu_domain_cache) {
3401 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3402 ret = -ENOMEM;
3403 }
3404
3405 return ret;
3406}
3407
3408static inline int iommu_devinfo_cache_init(void)
3409{
3410 int ret = 0;
3411
3412 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3413 sizeof(struct device_domain_info),
3414 0,
3415 SLAB_HWCACHE_ALIGN,
ba395927
KA
3416 NULL);
3417 if (!iommu_devinfo_cache) {
3418 printk(KERN_ERR "Couldn't create devinfo cache\n");
3419 ret = -ENOMEM;
3420 }
3421
3422 return ret;
3423}
3424
3425static inline int iommu_iova_cache_init(void)
3426{
3427 int ret = 0;
3428
3429 iommu_iova_cache = kmem_cache_create("iommu_iova",
3430 sizeof(struct iova),
3431 0,
3432 SLAB_HWCACHE_ALIGN,
ba395927
KA
3433 NULL);
3434 if (!iommu_iova_cache) {
3435 printk(KERN_ERR "Couldn't create iova cache\n");
3436 ret = -ENOMEM;
3437 }
3438
3439 return ret;
3440}
3441
3442static int __init iommu_init_mempool(void)
3443{
3444 int ret;
3445 ret = iommu_iova_cache_init();
3446 if (ret)
3447 return ret;
3448
3449 ret = iommu_domain_cache_init();
3450 if (ret)
3451 goto domain_error;
3452
3453 ret = iommu_devinfo_cache_init();
3454 if (!ret)
3455 return ret;
3456
3457 kmem_cache_destroy(iommu_domain_cache);
3458domain_error:
3459 kmem_cache_destroy(iommu_iova_cache);
3460
3461 return -ENOMEM;
3462}
3463
3464static void __init iommu_exit_mempool(void)
3465{
3466 kmem_cache_destroy(iommu_devinfo_cache);
3467 kmem_cache_destroy(iommu_domain_cache);
3468 kmem_cache_destroy(iommu_iova_cache);
3469
3470}
3471
556ab45f
DW
3472static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3473{
3474 struct dmar_drhd_unit *drhd;
3475 u32 vtbar;
3476 int rc;
3477
3478 /* We know that this device on this chipset has its own IOMMU.
3479 * If we find it under a different IOMMU, then the BIOS is lying
3480 * to us. Hope that the IOMMU for this device is actually
3481 * disabled, and it needs no translation...
3482 */
3483 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3484 if (rc) {
3485 /* "can't" happen */
3486 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3487 return;
3488 }
3489 vtbar &= 0xffff0000;
3490
3491 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3492 drhd = dmar_find_matched_drhd_unit(pdev);
3493 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3494 TAINT_FIRMWARE_WORKAROUND,
3495 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3496 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3497}
3498DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3499
ba395927
KA
3500static void __init init_no_remapping_devices(void)
3501{
3502 struct dmar_drhd_unit *drhd;
832bd858 3503 struct device *dev;
b683b230 3504 int i;
ba395927
KA
3505
3506 for_each_drhd_unit(drhd) {
3507 if (!drhd->include_all) {
b683b230
JL
3508 for_each_active_dev_scope(drhd->devices,
3509 drhd->devices_cnt, i, dev)
3510 break;
832bd858 3511 /* ignore DMAR unit if no devices exist */
ba395927
KA
3512 if (i == drhd->devices_cnt)
3513 drhd->ignored = 1;
3514 }
3515 }
3516
7c919779 3517 for_each_active_drhd_unit(drhd) {
7c919779 3518 if (drhd->include_all)
ba395927
KA
3519 continue;
3520
b683b230
JL
3521 for_each_active_dev_scope(drhd->devices,
3522 drhd->devices_cnt, i, dev)
832bd858 3523 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
ba395927 3524 break;
ba395927
KA
3525 if (i < drhd->devices_cnt)
3526 continue;
3527
c0771df8
DW
3528 /* This IOMMU has *only* gfx devices. Either bypass it or
3529 set the gfx_mapped flag, as appropriate */
3530 if (dmar_map_gfx) {
3531 intel_iommu_gfx_mapped = 1;
3532 } else {
3533 drhd->ignored = 1;
b683b230
JL
3534 for_each_active_dev_scope(drhd->devices,
3535 drhd->devices_cnt, i, dev)
832bd858 3536 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
3537 }
3538 }
3539}
3540
f59c7b69
FY
3541#ifdef CONFIG_SUSPEND
3542static int init_iommu_hw(void)
3543{
3544 struct dmar_drhd_unit *drhd;
3545 struct intel_iommu *iommu = NULL;
3546
3547 for_each_active_iommu(iommu, drhd)
3548 if (iommu->qi)
3549 dmar_reenable_qi(iommu);
3550
b779260b
JC
3551 for_each_iommu(iommu, drhd) {
3552 if (drhd->ignored) {
3553 /*
3554 * we always have to disable PMRs or DMA may fail on
3555 * this device
3556 */
3557 if (force_on)
3558 iommu_disable_protect_mem_regions(iommu);
3559 continue;
3560 }
3561
f59c7b69
FY
3562 iommu_flush_write_buffer(iommu);
3563
3564 iommu_set_root_entry(iommu);
3565
3566 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3567 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3568 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3569 DMA_TLB_GLOBAL_FLUSH);
b779260b
JC
3570 if (iommu_enable_translation(iommu))
3571 return 1;
b94996c9 3572 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
3573 }
3574
3575 return 0;
3576}
3577
3578static void iommu_flush_all(void)
3579{
3580 struct dmar_drhd_unit *drhd;
3581 struct intel_iommu *iommu;
3582
3583 for_each_active_iommu(iommu, drhd) {
3584 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3585 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3586 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3587 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
3588 }
3589}
3590
134fac3f 3591static int iommu_suspend(void)
f59c7b69
FY
3592{
3593 struct dmar_drhd_unit *drhd;
3594 struct intel_iommu *iommu = NULL;
3595 unsigned long flag;
3596
3597 for_each_active_iommu(iommu, drhd) {
3598 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3599 GFP_ATOMIC);
3600 if (!iommu->iommu_state)
3601 goto nomem;
3602 }
3603
3604 iommu_flush_all();
3605
3606 for_each_active_iommu(iommu, drhd) {
3607 iommu_disable_translation(iommu);
3608
1f5b3c3f 3609 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
3610
3611 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3612 readl(iommu->reg + DMAR_FECTL_REG);
3613 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3614 readl(iommu->reg + DMAR_FEDATA_REG);
3615 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3616 readl(iommu->reg + DMAR_FEADDR_REG);
3617 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3618 readl(iommu->reg + DMAR_FEUADDR_REG);
3619
1f5b3c3f 3620 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
3621 }
3622 return 0;
3623
3624nomem:
3625 for_each_active_iommu(iommu, drhd)
3626 kfree(iommu->iommu_state);
3627
3628 return -ENOMEM;
3629}
3630
134fac3f 3631static void iommu_resume(void)
f59c7b69
FY
3632{
3633 struct dmar_drhd_unit *drhd;
3634 struct intel_iommu *iommu = NULL;
3635 unsigned long flag;
3636
3637 if (init_iommu_hw()) {
b779260b
JC
3638 if (force_on)
3639 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3640 else
3641 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 3642 return;
f59c7b69
FY
3643 }
3644
3645 for_each_active_iommu(iommu, drhd) {
3646
1f5b3c3f 3647 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
3648
3649 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3650 iommu->reg + DMAR_FECTL_REG);
3651 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3652 iommu->reg + DMAR_FEDATA_REG);
3653 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3654 iommu->reg + DMAR_FEADDR_REG);
3655 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3656 iommu->reg + DMAR_FEUADDR_REG);
3657
1f5b3c3f 3658 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
3659 }
3660
3661 for_each_active_iommu(iommu, drhd)
3662 kfree(iommu->iommu_state);
f59c7b69
FY
3663}
3664
134fac3f 3665static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
3666 .resume = iommu_resume,
3667 .suspend = iommu_suspend,
3668};
3669
134fac3f 3670static void __init init_iommu_pm_ops(void)
f59c7b69 3671{
134fac3f 3672 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
3673}
3674
3675#else
99592ba4 3676static inline void init_iommu_pm_ops(void) {}
f59c7b69
FY
3677#endif /* CONFIG_PM */
3678
318fe7df
SS
3679
3680int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
3681{
3682 struct acpi_dmar_reserved_memory *rmrr;
3683 struct dmar_rmrr_unit *rmrru;
3684
3685 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3686 if (!rmrru)
3687 return -ENOMEM;
3688
3689 rmrru->hdr = header;
3690 rmrr = (struct acpi_dmar_reserved_memory *)header;
3691 rmrru->base_address = rmrr->base_address;
3692 rmrru->end_address = rmrr->end_address;
2e455289
JL
3693 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
3694 ((void *)rmrr) + rmrr->header.length,
3695 &rmrru->devices_cnt);
3696 if (rmrru->devices_cnt && rmrru->devices == NULL) {
3697 kfree(rmrru);
3698 return -ENOMEM;
3699 }
318fe7df 3700
2e455289 3701 list_add(&rmrru->list, &dmar_rmrr_units);
318fe7df 3702
2e455289 3703 return 0;
318fe7df
SS
3704}
3705
318fe7df
SS
3706int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
3707{
3708 struct acpi_dmar_atsr *atsr;
3709 struct dmar_atsr_unit *atsru;
3710
3711 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3712 atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
3713 if (!atsru)
3714 return -ENOMEM;
3715
3716 atsru->hdr = hdr;
3717 atsru->include_all = atsr->flags & 0x1;
2e455289
JL
3718 if (!atsru->include_all) {
3719 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
3720 (void *)atsr + atsr->header.length,
3721 &atsru->devices_cnt);
3722 if (atsru->devices_cnt && atsru->devices == NULL) {
3723 kfree(atsru);
3724 return -ENOMEM;
3725 }
3726 }
318fe7df 3727
0e242612 3728 list_add_rcu(&atsru->list, &dmar_atsr_units);
318fe7df
SS
3729
3730 return 0;
3731}
3732
9bdc531e
JL
3733static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
3734{
3735 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
3736 kfree(atsru);
3737}
3738
3739static void intel_iommu_free_dmars(void)
3740{
3741 struct dmar_rmrr_unit *rmrru, *rmrr_n;
3742 struct dmar_atsr_unit *atsru, *atsr_n;
3743
3744 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
3745 list_del(&rmrru->list);
3746 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
3747 kfree(rmrru);
318fe7df
SS
3748 }
3749
9bdc531e
JL
3750 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
3751 list_del(&atsru->list);
3752 intel_iommu_free_atsr(atsru);
3753 }
318fe7df
SS
3754}
3755
3756int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3757{
b683b230 3758 int i, ret = 1;
318fe7df 3759 struct pci_bus *bus;
832bd858
DW
3760 struct pci_dev *bridge = NULL;
3761 struct device *tmp;
318fe7df
SS
3762 struct acpi_dmar_atsr *atsr;
3763 struct dmar_atsr_unit *atsru;
3764
3765 dev = pci_physfn(dev);
318fe7df 3766 for (bus = dev->bus; bus; bus = bus->parent) {
b5f82ddf 3767 bridge = bus->self;
318fe7df 3768 if (!bridge || !pci_is_pcie(bridge) ||
62f87c0e 3769 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
318fe7df 3770 return 0;
b5f82ddf 3771 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
318fe7df 3772 break;
318fe7df 3773 }
b5f82ddf
JL
3774 if (!bridge)
3775 return 0;
318fe7df 3776
0e242612 3777 rcu_read_lock();
b5f82ddf
JL
3778 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
3779 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3780 if (atsr->segment != pci_domain_nr(dev->bus))
3781 continue;
3782
b683b230 3783 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
832bd858 3784 if (tmp == &bridge->dev)
b683b230 3785 goto out;
b5f82ddf
JL
3786
3787 if (atsru->include_all)
b683b230 3788 goto out;
b5f82ddf 3789 }
b683b230
JL
3790 ret = 0;
3791out:
0e242612 3792 rcu_read_unlock();
318fe7df 3793
b683b230 3794 return ret;
318fe7df
SS
3795}
3796
59ce0515
JL
3797int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
3798{
3799 int ret = 0;
3800 struct dmar_rmrr_unit *rmrru;
3801 struct dmar_atsr_unit *atsru;
3802 struct acpi_dmar_atsr *atsr;
3803 struct acpi_dmar_reserved_memory *rmrr;
3804
3805 if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
3806 return 0;
3807
3808 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
3809 rmrr = container_of(rmrru->hdr,
3810 struct acpi_dmar_reserved_memory, header);
3811 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3812 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
3813 ((void *)rmrr) + rmrr->header.length,
3814 rmrr->segment, rmrru->devices,
3815 rmrru->devices_cnt);
27e24950 3816 if(ret < 0)
59ce0515
JL
3817 return ret;
3818 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
27e24950
JL
3819 dmar_remove_dev_scope(info, rmrr->segment,
3820 rmrru->devices, rmrru->devices_cnt);
59ce0515
JL
3821 }
3822 }
3823
3824 list_for_each_entry(atsru, &dmar_atsr_units, list) {
3825 if (atsru->include_all)
3826 continue;
3827
3828 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3829 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3830 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
3831 (void *)atsr + atsr->header.length,
3832 atsr->segment, atsru->devices,
3833 atsru->devices_cnt);
3834 if (ret > 0)
3835 break;
3836 else if(ret < 0)
3837 return ret;
3838 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
3839 if (dmar_remove_dev_scope(info, atsr->segment,
3840 atsru->devices, atsru->devices_cnt))
3841 break;
3842 }
3843 }
3844
3845 return 0;
3846}
3847
99dcaded
FY
3848/*
3849 * Here we only respond to action of unbound device from driver.
3850 *
3851 * Added device is not attached to its DMAR domain here yet. That will happen
3852 * when mapping the device to iova.
3853 */
3854static int device_notifier(struct notifier_block *nb,
3855 unsigned long action, void *data)
3856{
3857 struct device *dev = data;
99dcaded
FY
3858 struct dmar_domain *domain;
3859
3d89194a 3860 if (iommu_dummy(dev))
44cd613c
DW
3861 return 0;
3862
7e7dfab7
JL
3863 if (action != BUS_NOTIFY_UNBOUND_DRIVER &&
3864 action != BUS_NOTIFY_DEL_DEVICE)
3865 return 0;
3866
1525a29a 3867 domain = find_domain(dev);
99dcaded
FY
3868 if (!domain)
3869 return 0;
3870
3a5670e8 3871 down_read(&dmar_global_lock);
bf9c9eda 3872 domain_remove_one_dev_info(domain, dev);
7e7dfab7
JL
3873 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3874 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
3875 list_empty(&domain->devices))
3876 domain_exit(domain);
3a5670e8 3877 up_read(&dmar_global_lock);
a97590e5 3878
99dcaded
FY
3879 return 0;
3880}
3881
3882static struct notifier_block device_nb = {
3883 .notifier_call = device_notifier,
3884};
3885
75f05569
JL
3886static int intel_iommu_memory_notifier(struct notifier_block *nb,
3887 unsigned long val, void *v)
3888{
3889 struct memory_notify *mhp = v;
3890 unsigned long long start, end;
3891 unsigned long start_vpfn, last_vpfn;
3892
3893 switch (val) {
3894 case MEM_GOING_ONLINE:
3895 start = mhp->start_pfn << PAGE_SHIFT;
3896 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
3897 if (iommu_domain_identity_map(si_domain, start, end)) {
3898 pr_warn("dmar: failed to build identity map for [%llx-%llx]\n",
3899 start, end);
3900 return NOTIFY_BAD;
3901 }
3902 break;
3903
3904 case MEM_OFFLINE:
3905 case MEM_CANCEL_ONLINE:
3906 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
3907 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
3908 while (start_vpfn <= last_vpfn) {
3909 struct iova *iova;
3910 struct dmar_drhd_unit *drhd;
3911 struct intel_iommu *iommu;
ea8ea460 3912 struct page *freelist;
75f05569
JL
3913
3914 iova = find_iova(&si_domain->iovad, start_vpfn);
3915 if (iova == NULL) {
3916 pr_debug("dmar: failed get IOVA for PFN %lx\n",
3917 start_vpfn);
3918 break;
3919 }
3920
3921 iova = split_and_remove_iova(&si_domain->iovad, iova,
3922 start_vpfn, last_vpfn);
3923 if (iova == NULL) {
3924 pr_warn("dmar: failed to split IOVA PFN [%lx-%lx]\n",
3925 start_vpfn, last_vpfn);
3926 return NOTIFY_BAD;
3927 }
3928
ea8ea460
DW
3929 freelist = domain_unmap(si_domain, iova->pfn_lo,
3930 iova->pfn_hi);
3931
75f05569
JL
3932 rcu_read_lock();
3933 for_each_active_iommu(iommu, drhd)
3934 iommu_flush_iotlb_psi(iommu, si_domain->id,
3935 iova->pfn_lo,
ea8ea460
DW
3936 iova->pfn_hi - iova->pfn_lo + 1,
3937 !freelist, 0);
75f05569 3938 rcu_read_unlock();
ea8ea460 3939 dma_free_pagelist(freelist);
75f05569
JL
3940
3941 start_vpfn = iova->pfn_hi + 1;
3942 free_iova_mem(iova);
3943 }
3944 break;
3945 }
3946
3947 return NOTIFY_OK;
3948}
3949
3950static struct notifier_block intel_iommu_memory_nb = {
3951 .notifier_call = intel_iommu_memory_notifier,
3952 .priority = 0
3953};
3954
ba395927
KA
3955int __init intel_iommu_init(void)
3956{
9bdc531e 3957 int ret = -ENODEV;
3a93c841 3958 struct dmar_drhd_unit *drhd;
7c919779 3959 struct intel_iommu *iommu;
ba395927 3960
a59b50e9
JC
3961 /* VT-d is required for a TXT/tboot launch, so enforce that */
3962 force_on = tboot_force_iommu();
3963
3a5670e8
JL
3964 if (iommu_init_mempool()) {
3965 if (force_on)
3966 panic("tboot: Failed to initialize iommu memory\n");
3967 return -ENOMEM;
3968 }
3969
3970 down_write(&dmar_global_lock);
a59b50e9
JC
3971 if (dmar_table_init()) {
3972 if (force_on)
3973 panic("tboot: Failed to initialize DMAR table\n");
9bdc531e 3974 goto out_free_dmar;
a59b50e9 3975 }
ba395927 3976
3a93c841
TI
3977 /*
3978 * Disable translation if already enabled prior to OS handover.
3979 */
7c919779 3980 for_each_active_iommu(iommu, drhd)
3a93c841
TI
3981 if (iommu->gcmd & DMA_GCMD_TE)
3982 iommu_disable_translation(iommu);
3a93c841 3983
c2c7286a 3984 if (dmar_dev_scope_init() < 0) {
a59b50e9
JC
3985 if (force_on)
3986 panic("tboot: Failed to initialize DMAR device scope\n");
9bdc531e 3987 goto out_free_dmar;
a59b50e9 3988 }
1886e8a9 3989
75f1cdf1 3990 if (no_iommu || dmar_disabled)
9bdc531e 3991 goto out_free_dmar;
2ae21010 3992
318fe7df
SS
3993 if (list_empty(&dmar_rmrr_units))
3994 printk(KERN_INFO "DMAR: No RMRR found\n");
3995
3996 if (list_empty(&dmar_atsr_units))
3997 printk(KERN_INFO "DMAR: No ATSR found\n");
3998
51a63e67
JC
3999 if (dmar_init_reserved_ranges()) {
4000 if (force_on)
4001 panic("tboot: Failed to reserve iommu ranges\n");
3a5670e8 4002 goto out_free_reserved_range;
51a63e67 4003 }
ba395927
KA
4004
4005 init_no_remapping_devices();
4006
b779260b 4007 ret = init_dmars();
ba395927 4008 if (ret) {
a59b50e9
JC
4009 if (force_on)
4010 panic("tboot: Failed to initialize DMARs\n");
ba395927 4011 printk(KERN_ERR "IOMMU: dmar init failed\n");
9bdc531e 4012 goto out_free_reserved_range;
ba395927 4013 }
3a5670e8 4014 up_write(&dmar_global_lock);
ba395927
KA
4015 printk(KERN_INFO
4016 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
4017
5e0d2a6f 4018 init_timer(&unmap_timer);
75f1cdf1
FT
4019#ifdef CONFIG_SWIOTLB
4020 swiotlb = 0;
4021#endif
19943b0e 4022 dma_ops = &intel_dma_ops;
4ed0d3e6 4023
134fac3f 4024 init_iommu_pm_ops();
a8bcbb0d 4025
4236d97d 4026 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
99dcaded 4027 bus_register_notifier(&pci_bus_type, &device_nb);
75f05569
JL
4028 if (si_domain && !hw_pass_through)
4029 register_memory_notifier(&intel_iommu_memory_nb);
99dcaded 4030
8bc1f85c
ED
4031 intel_iommu_enabled = 1;
4032
ba395927 4033 return 0;
9bdc531e
JL
4034
4035out_free_reserved_range:
4036 put_iova_domain(&reserved_iova_list);
9bdc531e
JL
4037out_free_dmar:
4038 intel_iommu_free_dmars();
3a5670e8
JL
4039 up_write(&dmar_global_lock);
4040 iommu_exit_mempool();
9bdc531e 4041 return ret;
ba395927 4042}
e820482c 4043
3199aa6b 4044static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
0bcb3e28 4045 struct device *dev)
3199aa6b 4046{
0bcb3e28 4047 struct pci_dev *tmp, *parent, *pdev;
3199aa6b 4048
0bcb3e28 4049 if (!iommu || !dev || !dev_is_pci(dev))
3199aa6b
HW
4050 return;
4051
0bcb3e28
DW
4052 pdev = to_pci_dev(dev);
4053
3199aa6b
HW
4054 /* dependent device detach */
4055 tmp = pci_find_upstream_pcie_bridge(pdev);
4056 /* Secondary interface's bus number and devfn 0 */
4057 if (tmp) {
4058 parent = pdev->bus->self;
4059 while (parent != tmp) {
4060 iommu_detach_dev(iommu, parent->bus->number,
276dbf99 4061 parent->devfn);
3199aa6b
HW
4062 parent = parent->bus->self;
4063 }
45e829ea 4064 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
3199aa6b
HW
4065 iommu_detach_dev(iommu,
4066 tmp->subordinate->number, 0);
4067 else /* this is a legacy PCI bridge */
276dbf99
DW
4068 iommu_detach_dev(iommu, tmp->bus->number,
4069 tmp->devfn);
3199aa6b
HW
4070 }
4071}
4072
2c2e2c38 4073static void domain_remove_one_dev_info(struct dmar_domain *domain,
bf9c9eda 4074 struct device *dev)
c7151a8d 4075{
bca2b916 4076 struct device_domain_info *info, *tmp;
c7151a8d
WH
4077 struct intel_iommu *iommu;
4078 unsigned long flags;
4079 int found = 0;
156baca8 4080 u8 bus, devfn;
c7151a8d 4081
bf9c9eda 4082 iommu = device_to_iommu(dev, &bus, &devfn);
c7151a8d
WH
4083 if (!iommu)
4084 return;
4085
4086 spin_lock_irqsave(&device_domain_lock, flags);
bca2b916 4087 list_for_each_entry_safe(info, tmp, &domain->devices, link) {
bf9c9eda
DW
4088 if (info->iommu == iommu && info->bus == bus &&
4089 info->devfn == devfn) {
109b9b04 4090 unlink_domain_info(info);
c7151a8d
WH
4091 spin_unlock_irqrestore(&device_domain_lock, flags);
4092
93a23a72 4093 iommu_disable_dev_iotlb(info);
c7151a8d 4094 iommu_detach_dev(iommu, info->bus, info->devfn);
bf9c9eda 4095 iommu_detach_dependent_devices(iommu, dev);
c7151a8d
WH
4096 free_devinfo_mem(info);
4097
4098 spin_lock_irqsave(&device_domain_lock, flags);
4099
4100 if (found)
4101 break;
4102 else
4103 continue;
4104 }
4105
4106 /* if there is no other devices under the same iommu
4107 * owned by this domain, clear this iommu in iommu_bmp
4108 * update iommu count and coherency
4109 */
8bbc4410 4110 if (info->iommu == iommu)
c7151a8d
WH
4111 found = 1;
4112 }
4113
3e7abe25
RD
4114 spin_unlock_irqrestore(&device_domain_lock, flags);
4115
c7151a8d
WH
4116 if (found == 0) {
4117 unsigned long tmp_flags;
4118 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
1b198bb0 4119 clear_bit(iommu->seq_id, domain->iommu_bmp);
c7151a8d 4120 domain->iommu_count--;
58c610bd 4121 domain_update_iommu_cap(domain);
c7151a8d 4122 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
a97590e5 4123
9b4554b2
AW
4124 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
4125 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
4126 spin_lock_irqsave(&iommu->lock, tmp_flags);
4127 clear_bit(domain->id, iommu->domain_ids);
4128 iommu->domains[domain->id] = NULL;
4129 spin_unlock_irqrestore(&iommu->lock, tmp_flags);
4130 }
c7151a8d 4131 }
c7151a8d
WH
4132}
4133
2c2e2c38 4134static int md_domain_init(struct dmar_domain *domain, int guest_width)
5e98c4b1
WH
4135{
4136 int adjust_width;
4137
4138 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
5e98c4b1
WH
4139 domain_reserve_special_ranges(domain);
4140
4141 /* calculate AGAW */
4142 domain->gaw = guest_width;
4143 adjust_width = guestwidth_to_adjustwidth(guest_width);
4144 domain->agaw = width_to_agaw(adjust_width);
4145
5e98c4b1 4146 domain->iommu_coherency = 0;
c5b15255 4147 domain->iommu_snooping = 0;
6dd9a7c7 4148 domain->iommu_superpage = 0;
fe40f1e0 4149 domain->max_addr = 0;
5e98c4b1
WH
4150
4151 /* always allocate the top pgd */
4c923d47 4152 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5e98c4b1
WH
4153 if (!domain->pgd)
4154 return -ENOMEM;
4155 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4156 return 0;
4157}
4158
5d450806 4159static int intel_iommu_domain_init(struct iommu_domain *domain)
38717946 4160{
5d450806 4161 struct dmar_domain *dmar_domain;
38717946 4162
92d03cc8 4163 dmar_domain = alloc_domain(true);
5d450806 4164 if (!dmar_domain) {
38717946 4165 printk(KERN_ERR
5d450806
JR
4166 "intel_iommu_domain_init: dmar_domain == NULL\n");
4167 return -ENOMEM;
38717946 4168 }
2c2e2c38 4169 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
38717946 4170 printk(KERN_ERR
5d450806 4171 "intel_iommu_domain_init() failed\n");
92d03cc8 4172 domain_exit(dmar_domain);
5d450806 4173 return -ENOMEM;
38717946 4174 }
8140a95d 4175 domain_update_iommu_cap(dmar_domain);
5d450806 4176 domain->priv = dmar_domain;
faa3d6f5 4177
8a0e715b
JR
4178 domain->geometry.aperture_start = 0;
4179 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4180 domain->geometry.force_aperture = true;
4181
5d450806 4182 return 0;
38717946 4183}
38717946 4184
5d450806 4185static void intel_iommu_domain_destroy(struct iommu_domain *domain)
38717946 4186{
5d450806
JR
4187 struct dmar_domain *dmar_domain = domain->priv;
4188
4189 domain->priv = NULL;
92d03cc8 4190 domain_exit(dmar_domain);
38717946 4191}
38717946 4192
4c5478c9
JR
4193static int intel_iommu_attach_device(struct iommu_domain *domain,
4194 struct device *dev)
38717946 4195{
4c5478c9 4196 struct dmar_domain *dmar_domain = domain->priv;
fe40f1e0
WH
4197 struct intel_iommu *iommu;
4198 int addr_width;
156baca8 4199 u8 bus, devfn;
faa3d6f5 4200
7207d8f9
DW
4201 /* normally dev is not mapped */
4202 if (unlikely(domain_context_mapped(dev))) {
faa3d6f5
WH
4203 struct dmar_domain *old_domain;
4204
1525a29a 4205 old_domain = find_domain(dev);
faa3d6f5 4206 if (old_domain) {
2c2e2c38
FY
4207 if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
4208 dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
bf9c9eda 4209 domain_remove_one_dev_info(old_domain, dev);
faa3d6f5
WH
4210 else
4211 domain_remove_dev_info(old_domain);
4212 }
4213 }
4214
156baca8 4215 iommu = device_to_iommu(dev, &bus, &devfn);
fe40f1e0
WH
4216 if (!iommu)
4217 return -ENODEV;
4218
4219 /* check if this iommu agaw is sufficient for max mapped address */
4220 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
4221 if (addr_width > cap_mgaw(iommu->cap))
4222 addr_width = cap_mgaw(iommu->cap);
4223
4224 if (dmar_domain->max_addr > (1LL << addr_width)) {
4225 printk(KERN_ERR "%s: iommu width (%d) is not "
fe40f1e0 4226 "sufficient for the mapped address (%llx)\n",
a99c47a2 4227 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
4228 return -EFAULT;
4229 }
a99c47a2
TL
4230 dmar_domain->gaw = addr_width;
4231
4232 /*
4233 * Knock out extra levels of page tables if necessary
4234 */
4235 while (iommu->agaw < dmar_domain->agaw) {
4236 struct dma_pte *pte;
4237
4238 pte = dmar_domain->pgd;
4239 if (dma_pte_present(pte)) {
25cbff16
SY
4240 dmar_domain->pgd = (struct dma_pte *)
4241 phys_to_virt(dma_pte_addr(pte));
7a661013 4242 free_pgtable_page(pte);
a99c47a2
TL
4243 }
4244 dmar_domain->agaw--;
4245 }
fe40f1e0 4246
5913c9bf 4247 return domain_add_dev_info(dmar_domain, dev, CONTEXT_TT_MULTI_LEVEL);
38717946 4248}
38717946 4249
4c5478c9
JR
4250static void intel_iommu_detach_device(struct iommu_domain *domain,
4251 struct device *dev)
38717946 4252{
4c5478c9 4253 struct dmar_domain *dmar_domain = domain->priv;
4c5478c9 4254
bf9c9eda 4255 domain_remove_one_dev_info(dmar_domain, dev);
faa3d6f5 4256}
c7151a8d 4257
b146a1c9
JR
4258static int intel_iommu_map(struct iommu_domain *domain,
4259 unsigned long iova, phys_addr_t hpa,
5009065d 4260 size_t size, int iommu_prot)
faa3d6f5 4261{
dde57a21 4262 struct dmar_domain *dmar_domain = domain->priv;
fe40f1e0 4263 u64 max_addr;
dde57a21 4264 int prot = 0;
faa3d6f5 4265 int ret;
fe40f1e0 4266
dde57a21
JR
4267 if (iommu_prot & IOMMU_READ)
4268 prot |= DMA_PTE_READ;
4269 if (iommu_prot & IOMMU_WRITE)
4270 prot |= DMA_PTE_WRITE;
9cf06697
SY
4271 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4272 prot |= DMA_PTE_SNP;
dde57a21 4273
163cc52c 4274 max_addr = iova + size;
dde57a21 4275 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
4276 u64 end;
4277
4278 /* check if minimum agaw is sufficient for mapped address */
8954da1f 4279 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 4280 if (end < max_addr) {
8954da1f 4281 printk(KERN_ERR "%s: iommu width (%d) is not "
fe40f1e0 4282 "sufficient for the mapped address (%llx)\n",
8954da1f 4283 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
4284 return -EFAULT;
4285 }
dde57a21 4286 dmar_domain->max_addr = max_addr;
fe40f1e0 4287 }
ad051221
DW
4288 /* Round up size to next multiple of PAGE_SIZE, if it and
4289 the low bits of hpa would take us onto the next page */
88cb6a74 4290 size = aligned_nrpages(hpa, size);
ad051221
DW
4291 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4292 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 4293 return ret;
38717946 4294}
38717946 4295
5009065d 4296static size_t intel_iommu_unmap(struct iommu_domain *domain,
ea8ea460 4297 unsigned long iova, size_t size)
38717946 4298{
dde57a21 4299 struct dmar_domain *dmar_domain = domain->priv;
ea8ea460
DW
4300 struct page *freelist = NULL;
4301 struct intel_iommu *iommu;
4302 unsigned long start_pfn, last_pfn;
4303 unsigned int npages;
4304 int iommu_id, num, ndomains, level = 0;
5cf0a76f
DW
4305
4306 /* Cope with horrid API which requires us to unmap more than the
4307 size argument if it happens to be a large-page mapping. */
4308 if (!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level))
4309 BUG();
4310
4311 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4312 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4b99d352 4313
ea8ea460
DW
4314 start_pfn = iova >> VTD_PAGE_SHIFT;
4315 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4316
4317 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
4318
4319 npages = last_pfn - start_pfn + 1;
4320
4321 for_each_set_bit(iommu_id, dmar_domain->iommu_bmp, g_num_of_iommus) {
4322 iommu = g_iommus[iommu_id];
4323
4324 /*
4325 * find bit position of dmar_domain
4326 */
4327 ndomains = cap_ndoms(iommu->cap);
4328 for_each_set_bit(num, iommu->domain_ids, ndomains) {
4329 if (iommu->domains[num] == dmar_domain)
4330 iommu_flush_iotlb_psi(iommu, num, start_pfn,
4331 npages, !freelist, 0);
4332 }
4333
4334 }
4335
4336 dma_free_pagelist(freelist);
fe40f1e0 4337
163cc52c
DW
4338 if (dmar_domain->max_addr == iova + size)
4339 dmar_domain->max_addr = iova;
b146a1c9 4340
5cf0a76f 4341 return size;
38717946 4342}
38717946 4343
d14d6577 4344static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
bb5547ac 4345 dma_addr_t iova)
38717946 4346{
d14d6577 4347 struct dmar_domain *dmar_domain = domain->priv;
38717946 4348 struct dma_pte *pte;
5cf0a76f 4349 int level = 0;
faa3d6f5 4350 u64 phys = 0;
38717946 4351
5cf0a76f 4352 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
38717946 4353 if (pte)
faa3d6f5 4354 phys = dma_pte_addr(pte);
38717946 4355
faa3d6f5 4356 return phys;
38717946 4357}
a8bcbb0d 4358
dbb9fd86
SY
4359static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
4360 unsigned long cap)
4361{
4362 struct dmar_domain *dmar_domain = domain->priv;
4363
4364 if (cap == IOMMU_CAP_CACHE_COHERENCY)
4365 return dmar_domain->iommu_snooping;
323f99cb 4366 if (cap == IOMMU_CAP_INTR_REMAP)
95a02e97 4367 return irq_remapping_enabled;
dbb9fd86
SY
4368
4369 return 0;
4370}
4371
783f157b 4372#define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
70ae6f0d 4373
abdfdde2
AW
4374static int intel_iommu_add_device(struct device *dev)
4375{
4376 struct pci_dev *pdev = to_pci_dev(dev);
3da4af0a 4377 struct pci_dev *bridge, *dma_pdev = NULL;
abdfdde2
AW
4378 struct iommu_group *group;
4379 int ret;
156baca8 4380 u8 bus, devfn;
70ae6f0d 4381
156baca8 4382 if (!device_to_iommu(dev, &bus, &devfn))
70ae6f0d
AW
4383 return -ENODEV;
4384
4385 bridge = pci_find_upstream_pcie_bridge(pdev);
4386 if (bridge) {
abdfdde2
AW
4387 if (pci_is_pcie(bridge))
4388 dma_pdev = pci_get_domain_bus_and_slot(
4389 pci_domain_nr(pdev->bus),
4390 bridge->subordinate->number, 0);
3da4af0a 4391 if (!dma_pdev)
abdfdde2
AW
4392 dma_pdev = pci_dev_get(bridge);
4393 } else
4394 dma_pdev = pci_dev_get(pdev);
4395
a4ff1fc2 4396 /* Account for quirked devices */
783f157b
AW
4397 swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev));
4398
a4ff1fc2
AW
4399 /*
4400 * If it's a multifunction device that does not support our
c14d2690
AW
4401 * required ACS flags, add to the same group as lowest numbered
4402 * function that also does not suport the required ACS flags.
a4ff1fc2 4403 */
783f157b 4404 if (dma_pdev->multifunction &&
c14d2690
AW
4405 !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS)) {
4406 u8 i, slot = PCI_SLOT(dma_pdev->devfn);
4407
4408 for (i = 0; i < 8; i++) {
4409 struct pci_dev *tmp;
4410
4411 tmp = pci_get_slot(dma_pdev->bus, PCI_DEVFN(slot, i));
4412 if (!tmp)
4413 continue;
4414
4415 if (!pci_acs_enabled(tmp, REQ_ACS_FLAGS)) {
4416 swap_pci_ref(&dma_pdev, tmp);
4417 break;
4418 }
4419 pci_dev_put(tmp);
4420 }
4421 }
783f157b 4422
a4ff1fc2
AW
4423 /*
4424 * Devices on the root bus go through the iommu. If that's not us,
4425 * find the next upstream device and test ACS up to the root bus.
4426 * Finding the next device may require skipping virtual buses.
4427 */
783f157b 4428 while (!pci_is_root_bus(dma_pdev->bus)) {
a4ff1fc2
AW
4429 struct pci_bus *bus = dma_pdev->bus;
4430
4431 while (!bus->self) {
4432 if (!pci_is_root_bus(bus))
4433 bus = bus->parent;
4434 else
4435 goto root_bus;
4436 }
4437
4438 if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
783f157b
AW
4439 break;
4440
a4ff1fc2 4441 swap_pci_ref(&dma_pdev, pci_dev_get(bus->self));
783f157b
AW
4442 }
4443
a4ff1fc2 4444root_bus:
abdfdde2
AW
4445 group = iommu_group_get(&dma_pdev->dev);
4446 pci_dev_put(dma_pdev);
4447 if (!group) {
4448 group = iommu_group_alloc();
4449 if (IS_ERR(group))
4450 return PTR_ERR(group);
70ae6f0d
AW
4451 }
4452
abdfdde2 4453 ret = iommu_group_add_device(group, dev);
bcb71abe 4454
abdfdde2
AW
4455 iommu_group_put(group);
4456 return ret;
4457}
70ae6f0d 4458
abdfdde2
AW
4459static void intel_iommu_remove_device(struct device *dev)
4460{
4461 iommu_group_remove_device(dev);
70ae6f0d
AW
4462}
4463
a8bcbb0d
JR
4464static struct iommu_ops intel_iommu_ops = {
4465 .domain_init = intel_iommu_domain_init,
4466 .domain_destroy = intel_iommu_domain_destroy,
4467 .attach_dev = intel_iommu_attach_device,
4468 .detach_dev = intel_iommu_detach_device,
b146a1c9
JR
4469 .map = intel_iommu_map,
4470 .unmap = intel_iommu_unmap,
a8bcbb0d 4471 .iova_to_phys = intel_iommu_iova_to_phys,
dbb9fd86 4472 .domain_has_cap = intel_iommu_domain_has_cap,
abdfdde2
AW
4473 .add_device = intel_iommu_add_device,
4474 .remove_device = intel_iommu_remove_device,
6d1c56a9 4475 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
a8bcbb0d 4476};
9af88143 4477
9452618e
DV
4478static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
4479{
4480 /* G4x/GM45 integrated gfx dmar support is totally busted. */
4481 printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
4482 dmar_map_gfx = 0;
4483}
4484
4485DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
4486DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
4487DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
4488DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
4489DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
4490DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
4491DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
4492
d34d6517 4493static void quirk_iommu_rwbf(struct pci_dev *dev)
9af88143
DW
4494{
4495 /*
4496 * Mobile 4 Series Chipset neglects to set RWBF capability,
210561ff 4497 * but needs it. Same seems to hold for the desktop versions.
9af88143
DW
4498 */
4499 printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
4500 rwbf_quirk = 1;
4501}
4502
4503DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
210561ff
DV
4504DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4505DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4506DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4507DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4508DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4509DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
e0fc7e0b 4510
eecfd57f
AJ
4511#define GGC 0x52
4512#define GGC_MEMORY_SIZE_MASK (0xf << 8)
4513#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
4514#define GGC_MEMORY_SIZE_1M (0x1 << 8)
4515#define GGC_MEMORY_SIZE_2M (0x3 << 8)
4516#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
4517#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
4518#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
4519#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
4520
d34d6517 4521static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
9eecabcb
DW
4522{
4523 unsigned short ggc;
4524
eecfd57f 4525 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
4526 return;
4527
eecfd57f 4528 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
9eecabcb
DW
4529 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4530 dmar_map_gfx = 0;
6fbcfb3e
DW
4531 } else if (dmar_map_gfx) {
4532 /* we have to ensure the gfx device is idle before we flush */
4533 printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
4534 intel_iommu_strict = 1;
4535 }
9eecabcb
DW
4536}
4537DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4538DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4539DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4540DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4541
e0fc7e0b
DW
4542/* On Tylersburg chipsets, some BIOSes have been known to enable the
4543 ISOCH DMAR unit for the Azalia sound device, but not give it any
4544 TLB entries, which causes it to deadlock. Check for that. We do
4545 this in a function called from init_dmars(), instead of in a PCI
4546 quirk, because we don't want to print the obnoxious "BIOS broken"
4547 message if VT-d is actually disabled.
4548*/
4549static void __init check_tylersburg_isoch(void)
4550{
4551 struct pci_dev *pdev;
4552 uint32_t vtisochctrl;
4553
4554 /* If there's no Azalia in the system anyway, forget it. */
4555 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4556 if (!pdev)
4557 return;
4558 pci_dev_put(pdev);
4559
4560 /* System Management Registers. Might be hidden, in which case
4561 we can't do the sanity check. But that's OK, because the
4562 known-broken BIOSes _don't_ actually hide it, so far. */
4563 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4564 if (!pdev)
4565 return;
4566
4567 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4568 pci_dev_put(pdev);
4569 return;
4570 }
4571
4572 pci_dev_put(pdev);
4573
4574 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4575 if (vtisochctrl & 1)
4576 return;
4577
4578 /* Drop all bits other than the number of TLB entries */
4579 vtisochctrl &= 0x1c;
4580
4581 /* If we have the recommended number of TLB entries (16), fine. */
4582 if (vtisochctrl == 0x10)
4583 return;
4584
4585 /* Zero TLB entries? You get to ride the short bus to school. */
4586 if (!vtisochctrl) {
4587 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4588 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4589 dmi_get_system_info(DMI_BIOS_VENDOR),
4590 dmi_get_system_info(DMI_BIOS_VERSION),
4591 dmi_get_system_info(DMI_PRODUCT_VERSION));
4592 iommu_identity_mapping |= IDENTMAP_AZALIA;
4593 return;
4594 }
4595
4596 printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4597 vtisochctrl);
4598}
This page took 1.047017 seconds and 5 git commands to generate.