Merge branch 'exynos-drm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git...
[deliverable/linux.git] / drivers / iommu / intel-iommu.c
CommitLineData
ba395927 1/*
ea8ea460 2 * Copyright © 2006-2014 Intel Corporation.
ba395927
KA
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
ea8ea460
DW
13 * Authors: David Woodhouse <dwmw2@infradead.org>,
14 * Ashok Raj <ashok.raj@intel.com>,
15 * Shaohua Li <shaohua.li@intel.com>,
16 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17 * Fenghua Yu <fenghua.yu@intel.com>
ba395927
KA
18 */
19
20#include <linux/init.h>
21#include <linux/bitmap.h>
5e0d2a6f 22#include <linux/debugfs.h>
54485c30 23#include <linux/export.h>
ba395927
KA
24#include <linux/slab.h>
25#include <linux/irq.h>
26#include <linux/interrupt.h>
ba395927
KA
27#include <linux/spinlock.h>
28#include <linux/pci.h>
29#include <linux/dmar.h>
30#include <linux/dma-mapping.h>
31#include <linux/mempool.h>
75f05569 32#include <linux/memory.h>
5e0d2a6f 33#include <linux/timer.h>
38717946 34#include <linux/iova.h>
5d450806 35#include <linux/iommu.h>
38717946 36#include <linux/intel-iommu.h>
134fac3f 37#include <linux/syscore_ops.h>
69575d38 38#include <linux/tboot.h>
adb2fe02 39#include <linux/dmi.h>
5cdede24 40#include <linux/pci-ats.h>
0ee332c1 41#include <linux/memblock.h>
36746436 42#include <linux/dma-contiguous.h>
8a8f422d 43#include <asm/irq_remapping.h>
ba395927 44#include <asm/cacheflush.h>
46a7fa27 45#include <asm/iommu.h>
ba395927 46
078e1ee2
JR
47#include "irq_remapping.h"
48
5b6985ce
FY
49#define ROOT_SIZE VTD_PAGE_SIZE
50#define CONTEXT_SIZE VTD_PAGE_SIZE
51
ba395927
KA
52#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
53#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 54#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
55
56#define IOAPIC_RANGE_START (0xfee00000)
57#define IOAPIC_RANGE_END (0xfeefffff)
58#define IOVA_START_ADDR (0x1000)
59
60#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
61
4ed0d3e6 62#define MAX_AGAW_WIDTH 64
5c645b35 63#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
4ed0d3e6 64
2ebe3151
DW
65#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
66#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
67
68/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
69 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
70#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
71 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
72#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 73
f27be03b 74#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
284901a9 75#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
6a35528a 76#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
5e0d2a6f 77
df08cdc7
AM
78/* page table handling */
79#define LEVEL_STRIDE (9)
80#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
81
6d1c56a9
OBC
82/*
83 * This bitmap is used to advertise the page sizes our hardware support
84 * to the IOMMU core, which will then use this information to split
85 * physically contiguous memory regions it is mapping into page sizes
86 * that we support.
87 *
88 * Traditionally the IOMMU core just handed us the mappings directly,
89 * after making sure the size is an order of a 4KiB page and that the
90 * mapping has natural alignment.
91 *
92 * To retain this behavior, we currently advertise that we support
93 * all page sizes that are an order of 4KiB.
94 *
95 * If at some point we'd like to utilize the IOMMU core's new behavior,
96 * we could change this to advertise the real page sizes we support.
97 */
98#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
99
df08cdc7
AM
100static inline int agaw_to_level(int agaw)
101{
102 return agaw + 2;
103}
104
105static inline int agaw_to_width(int agaw)
106{
5c645b35 107 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
df08cdc7
AM
108}
109
110static inline int width_to_agaw(int width)
111{
5c645b35 112 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
df08cdc7
AM
113}
114
115static inline unsigned int level_to_offset_bits(int level)
116{
117 return (level - 1) * LEVEL_STRIDE;
118}
119
120static inline int pfn_level_offset(unsigned long pfn, int level)
121{
122 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
123}
124
125static inline unsigned long level_mask(int level)
126{
127 return -1UL << level_to_offset_bits(level);
128}
129
130static inline unsigned long level_size(int level)
131{
132 return 1UL << level_to_offset_bits(level);
133}
134
135static inline unsigned long align_to_level(unsigned long pfn, int level)
136{
137 return (pfn + level_size(level) - 1) & level_mask(level);
138}
fd18de50 139
6dd9a7c7
YS
140static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
141{
5c645b35 142 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
6dd9a7c7
YS
143}
144
dd4e8319
DW
145/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
146 are never going to work. */
147static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
148{
149 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
150}
151
152static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
153{
154 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
155}
156static inline unsigned long page_to_dma_pfn(struct page *pg)
157{
158 return mm_to_dma_pfn(page_to_pfn(pg));
159}
160static inline unsigned long virt_to_dma_pfn(void *p)
161{
162 return page_to_dma_pfn(virt_to_page(p));
163}
164
d9630fe9
WH
165/* global iommu list, set NULL for ignored DMAR units */
166static struct intel_iommu **g_iommus;
167
e0fc7e0b 168static void __init check_tylersburg_isoch(void);
9af88143
DW
169static int rwbf_quirk;
170
b779260b
JC
171/*
172 * set to 1 to panic kernel if can't successfully enable VT-d
173 * (used when kernel is launched w/ TXT)
174 */
175static int force_on = 0;
176
46b08e1a
MM
177/*
178 * 0: Present
179 * 1-11: Reserved
180 * 12-63: Context Ptr (12 - (haw-1))
181 * 64-127: Reserved
182 */
183struct root_entry {
184 u64 val;
185 u64 rsvd1;
186};
187#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
188static inline bool root_present(struct root_entry *root)
189{
190 return (root->val & 1);
191}
192static inline void set_root_present(struct root_entry *root)
193{
194 root->val |= 1;
195}
196static inline void set_root_value(struct root_entry *root, unsigned long value)
197{
1a2262f9 198 root->val &= ~VTD_PAGE_MASK;
46b08e1a
MM
199 root->val |= value & VTD_PAGE_MASK;
200}
201
202static inline struct context_entry *
203get_context_addr_from_root(struct root_entry *root)
204{
205 return (struct context_entry *)
206 (root_present(root)?phys_to_virt(
207 root->val & VTD_PAGE_MASK) :
208 NULL);
209}
210
7a8fc25e
MM
211/*
212 * low 64 bits:
213 * 0: present
214 * 1: fault processing disable
215 * 2-3: translation type
216 * 12-63: address space root
217 * high 64 bits:
218 * 0-2: address width
219 * 3-6: aval
220 * 8-23: domain id
221 */
222struct context_entry {
223 u64 lo;
224 u64 hi;
225};
c07e7d21
MM
226
227static inline bool context_present(struct context_entry *context)
228{
229 return (context->lo & 1);
230}
231static inline void context_set_present(struct context_entry *context)
232{
233 context->lo |= 1;
234}
235
236static inline void context_set_fault_enable(struct context_entry *context)
237{
238 context->lo &= (((u64)-1) << 2) | 1;
239}
240
c07e7d21
MM
241static inline void context_set_translation_type(struct context_entry *context,
242 unsigned long value)
243{
244 context->lo &= (((u64)-1) << 4) | 3;
245 context->lo |= (value & 3) << 2;
246}
247
248static inline void context_set_address_root(struct context_entry *context,
249 unsigned long value)
250{
1a2262f9 251 context->lo &= ~VTD_PAGE_MASK;
c07e7d21
MM
252 context->lo |= value & VTD_PAGE_MASK;
253}
254
255static inline void context_set_address_width(struct context_entry *context,
256 unsigned long value)
257{
258 context->hi |= value & 7;
259}
260
261static inline void context_set_domain_id(struct context_entry *context,
262 unsigned long value)
263{
264 context->hi |= (value & ((1 << 16) - 1)) << 8;
265}
266
267static inline void context_clear_entry(struct context_entry *context)
268{
269 context->lo = 0;
270 context->hi = 0;
271}
7a8fc25e 272
622ba12a
MM
273/*
274 * 0: readable
275 * 1: writable
276 * 2-6: reserved
277 * 7: super page
9cf06697
SY
278 * 8-10: available
279 * 11: snoop behavior
622ba12a
MM
280 * 12-63: Host physcial address
281 */
282struct dma_pte {
283 u64 val;
284};
622ba12a 285
19c239ce
MM
286static inline void dma_clear_pte(struct dma_pte *pte)
287{
288 pte->val = 0;
289}
290
19c239ce
MM
291static inline u64 dma_pte_addr(struct dma_pte *pte)
292{
c85994e4
DW
293#ifdef CONFIG_64BIT
294 return pte->val & VTD_PAGE_MASK;
295#else
296 /* Must have a full atomic 64-bit read */
1a8bd481 297 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
c85994e4 298#endif
19c239ce
MM
299}
300
19c239ce
MM
301static inline bool dma_pte_present(struct dma_pte *pte)
302{
303 return (pte->val & 3) != 0;
304}
622ba12a 305
4399c8bf
AK
306static inline bool dma_pte_superpage(struct dma_pte *pte)
307{
c3c75eb7 308 return (pte->val & DMA_PTE_LARGE_PAGE);
4399c8bf
AK
309}
310
75e6bf96
DW
311static inline int first_pte_in_page(struct dma_pte *pte)
312{
313 return !((unsigned long)pte & ~VTD_PAGE_MASK);
314}
315
2c2e2c38
FY
316/*
317 * This domain is a statically identity mapping domain.
318 * 1. This domain creats a static 1:1 mapping to all usable memory.
319 * 2. It maps to each iommu if successful.
320 * 3. Each iommu mapps to this domain if successful.
321 */
19943b0e
DW
322static struct dmar_domain *si_domain;
323static int hw_pass_through = 1;
2c2e2c38 324
1ce28feb
WH
325/* domain represents a virtual machine, more than one devices
326 * across iommus may be owned in one domain, e.g. kvm guest.
327 */
ab8dfe25 328#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 0)
1ce28feb 329
2c2e2c38 330/* si_domain contains mulitple devices */
ab8dfe25 331#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 1)
2c2e2c38 332
99126f7c
MM
333struct dmar_domain {
334 int id; /* domain id */
4c923d47 335 int nid; /* node id */
78d8e704 336 DECLARE_BITMAP(iommu_bmp, DMAR_UNITS_SUPPORTED);
1b198bb0 337 /* bitmap of iommus this domain uses*/
99126f7c
MM
338
339 struct list_head devices; /* all devices' list */
340 struct iova_domain iovad; /* iova's that belong to this domain */
341
342 struct dma_pte *pgd; /* virtual address */
99126f7c
MM
343 int gaw; /* max guest address width */
344
345 /* adjusted guest address width, 0 is level 2 30-bit */
346 int agaw;
347
3b5410e7 348 int flags; /* flags to find out type of domain */
8e604097
WH
349
350 int iommu_coherency;/* indicate coherency of iommu access */
58c610bd 351 int iommu_snooping; /* indicate snooping control feature*/
c7151a8d 352 int iommu_count; /* reference count of iommu */
6dd9a7c7
YS
353 int iommu_superpage;/* Level of superpages supported:
354 0 == 4KiB (no superpages), 1 == 2MiB,
355 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
c7151a8d 356 spinlock_t iommu_lock; /* protect iommu set in domain */
fe40f1e0 357 u64 max_addr; /* maximum mapped address */
99126f7c
MM
358};
359
a647dacb
MM
360/* PCI domain-device relationship */
361struct device_domain_info {
362 struct list_head link; /* link to domain siblings */
363 struct list_head global; /* link to global list */
276dbf99 364 u8 bus; /* PCI bus number */
a647dacb 365 u8 devfn; /* PCI devfn number */
0bcb3e28 366 struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
93a23a72 367 struct intel_iommu *iommu; /* IOMMU used by this device */
a647dacb
MM
368 struct dmar_domain *domain; /* pointer to domain */
369};
370
b94e4117
JL
371struct dmar_rmrr_unit {
372 struct list_head list; /* list of rmrr units */
373 struct acpi_dmar_header *hdr; /* ACPI header */
374 u64 base_address; /* reserved base address*/
375 u64 end_address; /* reserved end address */
832bd858 376 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
377 int devices_cnt; /* target device count */
378};
379
380struct dmar_atsr_unit {
381 struct list_head list; /* list of ATSR units */
382 struct acpi_dmar_header *hdr; /* ACPI header */
832bd858 383 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
384 int devices_cnt; /* target device count */
385 u8 include_all:1; /* include all ports */
386};
387
388static LIST_HEAD(dmar_atsr_units);
389static LIST_HEAD(dmar_rmrr_units);
390
391#define for_each_rmrr_units(rmrr) \
392 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
393
5e0d2a6f 394static void flush_unmaps_timeout(unsigned long data);
395
b707cb02 396static DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
5e0d2a6f 397
80b20dd8 398#define HIGH_WATER_MARK 250
399struct deferred_flush_tables {
400 int next;
401 struct iova *iova[HIGH_WATER_MARK];
402 struct dmar_domain *domain[HIGH_WATER_MARK];
ea8ea460 403 struct page *freelist[HIGH_WATER_MARK];
80b20dd8 404};
405
406static struct deferred_flush_tables *deferred_flush;
407
5e0d2a6f 408/* bitmap for indexing intel_iommus */
5e0d2a6f 409static int g_num_of_iommus;
410
411static DEFINE_SPINLOCK(async_umap_flush_lock);
412static LIST_HEAD(unmaps_to_do);
413
414static int timer_on;
415static long list_size;
5e0d2a6f 416
92d03cc8 417static void domain_exit(struct dmar_domain *domain);
ba395927 418static void domain_remove_dev_info(struct dmar_domain *domain);
b94e4117 419static void domain_remove_one_dev_info(struct dmar_domain *domain,
bf9c9eda 420 struct device *dev);
92d03cc8 421static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
0bcb3e28 422 struct device *dev);
2a46ddf7
JL
423static int domain_detach_iommu(struct dmar_domain *domain,
424 struct intel_iommu *iommu);
ba395927 425
d3f13810 426#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
0cd5c3c8
KM
427int dmar_disabled = 0;
428#else
429int dmar_disabled = 1;
d3f13810 430#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
0cd5c3c8 431
8bc1f85c
ED
432int intel_iommu_enabled = 0;
433EXPORT_SYMBOL_GPL(intel_iommu_enabled);
434
2d9e667e 435static int dmar_map_gfx = 1;
7d3b03ce 436static int dmar_forcedac;
5e0d2a6f 437static int intel_iommu_strict;
6dd9a7c7 438static int intel_iommu_superpage = 1;
ba395927 439
c0771df8
DW
440int intel_iommu_gfx_mapped;
441EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
442
ba395927
KA
443#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
444static DEFINE_SPINLOCK(device_domain_lock);
445static LIST_HEAD(device_domain_list);
446
b22f6434 447static const struct iommu_ops intel_iommu_ops;
a8bcbb0d 448
ba395927
KA
449static int __init intel_iommu_setup(char *str)
450{
451 if (!str)
452 return -EINVAL;
453 while (*str) {
0cd5c3c8
KM
454 if (!strncmp(str, "on", 2)) {
455 dmar_disabled = 0;
456 printk(KERN_INFO "Intel-IOMMU: enabled\n");
457 } else if (!strncmp(str, "off", 3)) {
ba395927 458 dmar_disabled = 1;
0cd5c3c8 459 printk(KERN_INFO "Intel-IOMMU: disabled\n");
ba395927
KA
460 } else if (!strncmp(str, "igfx_off", 8)) {
461 dmar_map_gfx = 0;
462 printk(KERN_INFO
463 "Intel-IOMMU: disable GFX device mapping\n");
7d3b03ce 464 } else if (!strncmp(str, "forcedac", 8)) {
5e0d2a6f 465 printk(KERN_INFO
7d3b03ce
KA
466 "Intel-IOMMU: Forcing DAC for PCI devices\n");
467 dmar_forcedac = 1;
5e0d2a6f 468 } else if (!strncmp(str, "strict", 6)) {
469 printk(KERN_INFO
470 "Intel-IOMMU: disable batched IOTLB flush\n");
471 intel_iommu_strict = 1;
6dd9a7c7
YS
472 } else if (!strncmp(str, "sp_off", 6)) {
473 printk(KERN_INFO
474 "Intel-IOMMU: disable supported super page\n");
475 intel_iommu_superpage = 0;
ba395927
KA
476 }
477
478 str += strcspn(str, ",");
479 while (*str == ',')
480 str++;
481 }
482 return 0;
483}
484__setup("intel_iommu=", intel_iommu_setup);
485
486static struct kmem_cache *iommu_domain_cache;
487static struct kmem_cache *iommu_devinfo_cache;
488static struct kmem_cache *iommu_iova_cache;
489
4c923d47 490static inline void *alloc_pgtable_page(int node)
eb3fa7cb 491{
4c923d47
SS
492 struct page *page;
493 void *vaddr = NULL;
eb3fa7cb 494
4c923d47
SS
495 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
496 if (page)
497 vaddr = page_address(page);
eb3fa7cb 498 return vaddr;
ba395927
KA
499}
500
501static inline void free_pgtable_page(void *vaddr)
502{
503 free_page((unsigned long)vaddr);
504}
505
506static inline void *alloc_domain_mem(void)
507{
354bb65e 508 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
509}
510
38717946 511static void free_domain_mem(void *vaddr)
ba395927
KA
512{
513 kmem_cache_free(iommu_domain_cache, vaddr);
514}
515
516static inline void * alloc_devinfo_mem(void)
517{
354bb65e 518 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
519}
520
521static inline void free_devinfo_mem(void *vaddr)
522{
523 kmem_cache_free(iommu_devinfo_cache, vaddr);
524}
525
526struct iova *alloc_iova_mem(void)
527{
354bb65e 528 return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
ba395927
KA
529}
530
531void free_iova_mem(struct iova *iova)
532{
533 kmem_cache_free(iommu_iova_cache, iova);
534}
535
ab8dfe25
JL
536static inline int domain_type_is_vm(struct dmar_domain *domain)
537{
538 return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
539}
540
541static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
542{
543 return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
544 DOMAIN_FLAG_STATIC_IDENTITY);
545}
1b573683 546
162d1b10
JL
547static inline int domain_pfn_supported(struct dmar_domain *domain,
548 unsigned long pfn)
549{
550 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
551
552 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
553}
554
4ed0d3e6 555static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
556{
557 unsigned long sagaw;
558 int agaw = -1;
559
560 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 561 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
562 agaw >= 0; agaw--) {
563 if (test_bit(agaw, &sagaw))
564 break;
565 }
566
567 return agaw;
568}
569
4ed0d3e6
FY
570/*
571 * Calculate max SAGAW for each iommu.
572 */
573int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
574{
575 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
576}
577
578/*
579 * calculate agaw for each iommu.
580 * "SAGAW" may be different across iommus, use a default agaw, and
581 * get a supported less agaw for iommus that don't support the default agaw.
582 */
583int iommu_calculate_agaw(struct intel_iommu *iommu)
584{
585 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
586}
587
2c2e2c38 588/* This functionin only returns single iommu in a domain */
8c11e798
WH
589static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
590{
591 int iommu_id;
592
2c2e2c38 593 /* si_domain and vm domain should not get here. */
ab8dfe25 594 BUG_ON(domain_type_is_vm_or_si(domain));
1b198bb0 595 iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
8c11e798
WH
596 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
597 return NULL;
598
599 return g_iommus[iommu_id];
600}
601
8e604097
WH
602static void domain_update_iommu_coherency(struct dmar_domain *domain)
603{
d0501960
DW
604 struct dmar_drhd_unit *drhd;
605 struct intel_iommu *iommu;
606 int i, found = 0;
2e12bc29 607
d0501960 608 domain->iommu_coherency = 1;
8e604097 609
1b198bb0 610 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
d0501960 611 found = 1;
8e604097
WH
612 if (!ecap_coherent(g_iommus[i]->ecap)) {
613 domain->iommu_coherency = 0;
614 break;
615 }
8e604097 616 }
d0501960
DW
617 if (found)
618 return;
619
620 /* No hardware attached; use lowest common denominator */
621 rcu_read_lock();
622 for_each_active_iommu(iommu, drhd) {
623 if (!ecap_coherent(iommu->ecap)) {
624 domain->iommu_coherency = 0;
625 break;
626 }
627 }
628 rcu_read_unlock();
8e604097
WH
629}
630
161f6934 631static int domain_update_iommu_snooping(struct intel_iommu *skip)
58c610bd 632{
161f6934
JL
633 struct dmar_drhd_unit *drhd;
634 struct intel_iommu *iommu;
635 int ret = 1;
58c610bd 636
161f6934
JL
637 rcu_read_lock();
638 for_each_active_iommu(iommu, drhd) {
639 if (iommu != skip) {
640 if (!ecap_sc_support(iommu->ecap)) {
641 ret = 0;
642 break;
643 }
58c610bd 644 }
58c610bd 645 }
161f6934
JL
646 rcu_read_unlock();
647
648 return ret;
58c610bd
SY
649}
650
161f6934 651static int domain_update_iommu_superpage(struct intel_iommu *skip)
6dd9a7c7 652{
8140a95d 653 struct dmar_drhd_unit *drhd;
161f6934 654 struct intel_iommu *iommu;
8140a95d 655 int mask = 0xf;
6dd9a7c7
YS
656
657 if (!intel_iommu_superpage) {
161f6934 658 return 0;
6dd9a7c7
YS
659 }
660
8140a95d 661 /* set iommu_superpage to the smallest common denominator */
0e242612 662 rcu_read_lock();
8140a95d 663 for_each_active_iommu(iommu, drhd) {
161f6934
JL
664 if (iommu != skip) {
665 mask &= cap_super_page_val(iommu->cap);
666 if (!mask)
667 break;
6dd9a7c7
YS
668 }
669 }
0e242612
JL
670 rcu_read_unlock();
671
161f6934 672 return fls(mask);
6dd9a7c7
YS
673}
674
58c610bd
SY
675/* Some capabilities may be different across iommus */
676static void domain_update_iommu_cap(struct dmar_domain *domain)
677{
678 domain_update_iommu_coherency(domain);
161f6934
JL
679 domain->iommu_snooping = domain_update_iommu_snooping(NULL);
680 domain->iommu_superpage = domain_update_iommu_superpage(NULL);
58c610bd
SY
681}
682
156baca8 683static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
c7151a8d
WH
684{
685 struct dmar_drhd_unit *drhd = NULL;
b683b230 686 struct intel_iommu *iommu;
156baca8
DW
687 struct device *tmp;
688 struct pci_dev *ptmp, *pdev = NULL;
aa4d066a 689 u16 segment = 0;
c7151a8d
WH
690 int i;
691
156baca8
DW
692 if (dev_is_pci(dev)) {
693 pdev = to_pci_dev(dev);
694 segment = pci_domain_nr(pdev->bus);
695 } else if (ACPI_COMPANION(dev))
696 dev = &ACPI_COMPANION(dev)->dev;
697
0e242612 698 rcu_read_lock();
b683b230 699 for_each_active_iommu(iommu, drhd) {
156baca8 700 if (pdev && segment != drhd->segment)
276dbf99 701 continue;
c7151a8d 702
b683b230 703 for_each_active_dev_scope(drhd->devices,
156baca8
DW
704 drhd->devices_cnt, i, tmp) {
705 if (tmp == dev) {
706 *bus = drhd->devices[i].bus;
707 *devfn = drhd->devices[i].devfn;
b683b230 708 goto out;
156baca8
DW
709 }
710
711 if (!pdev || !dev_is_pci(tmp))
712 continue;
713
714 ptmp = to_pci_dev(tmp);
715 if (ptmp->subordinate &&
716 ptmp->subordinate->number <= pdev->bus->number &&
717 ptmp->subordinate->busn_res.end >= pdev->bus->number)
718 goto got_pdev;
924b6231 719 }
c7151a8d 720
156baca8
DW
721 if (pdev && drhd->include_all) {
722 got_pdev:
723 *bus = pdev->bus->number;
724 *devfn = pdev->devfn;
b683b230 725 goto out;
156baca8 726 }
c7151a8d 727 }
b683b230 728 iommu = NULL;
156baca8 729 out:
0e242612 730 rcu_read_unlock();
c7151a8d 731
b683b230 732 return iommu;
c7151a8d
WH
733}
734
5331fe6f
WH
735static void domain_flush_cache(struct dmar_domain *domain,
736 void *addr, int size)
737{
738 if (!domain->iommu_coherency)
739 clflush_cache_range(addr, size);
740}
741
ba395927
KA
742/* Gets context entry for a given bus and devfn */
743static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
744 u8 bus, u8 devfn)
745{
746 struct root_entry *root;
747 struct context_entry *context;
748 unsigned long phy_addr;
749 unsigned long flags;
750
751 spin_lock_irqsave(&iommu->lock, flags);
752 root = &iommu->root_entry[bus];
753 context = get_context_addr_from_root(root);
754 if (!context) {
4c923d47
SS
755 context = (struct context_entry *)
756 alloc_pgtable_page(iommu->node);
ba395927
KA
757 if (!context) {
758 spin_unlock_irqrestore(&iommu->lock, flags);
759 return NULL;
760 }
5b6985ce 761 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
ba395927
KA
762 phy_addr = virt_to_phys((void *)context);
763 set_root_value(root, phy_addr);
764 set_root_present(root);
765 __iommu_flush_cache(iommu, root, sizeof(*root));
766 }
767 spin_unlock_irqrestore(&iommu->lock, flags);
768 return &context[devfn];
769}
770
771static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
772{
773 struct root_entry *root;
774 struct context_entry *context;
775 int ret;
776 unsigned long flags;
777
778 spin_lock_irqsave(&iommu->lock, flags);
779 root = &iommu->root_entry[bus];
780 context = get_context_addr_from_root(root);
781 if (!context) {
782 ret = 0;
783 goto out;
784 }
c07e7d21 785 ret = context_present(&context[devfn]);
ba395927
KA
786out:
787 spin_unlock_irqrestore(&iommu->lock, flags);
788 return ret;
789}
790
791static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
792{
793 struct root_entry *root;
794 struct context_entry *context;
795 unsigned long flags;
796
797 spin_lock_irqsave(&iommu->lock, flags);
798 root = &iommu->root_entry[bus];
799 context = get_context_addr_from_root(root);
800 if (context) {
c07e7d21 801 context_clear_entry(&context[devfn]);
ba395927
KA
802 __iommu_flush_cache(iommu, &context[devfn], \
803 sizeof(*context));
804 }
805 spin_unlock_irqrestore(&iommu->lock, flags);
806}
807
808static void free_context_table(struct intel_iommu *iommu)
809{
810 struct root_entry *root;
811 int i;
812 unsigned long flags;
813 struct context_entry *context;
814
815 spin_lock_irqsave(&iommu->lock, flags);
816 if (!iommu->root_entry) {
817 goto out;
818 }
819 for (i = 0; i < ROOT_ENTRY_NR; i++) {
820 root = &iommu->root_entry[i];
821 context = get_context_addr_from_root(root);
822 if (context)
823 free_pgtable_page(context);
824 }
825 free_pgtable_page(iommu->root_entry);
826 iommu->root_entry = NULL;
827out:
828 spin_unlock_irqrestore(&iommu->lock, flags);
829}
830
b026fd28 831static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
5cf0a76f 832 unsigned long pfn, int *target_level)
ba395927 833{
ba395927
KA
834 struct dma_pte *parent, *pte = NULL;
835 int level = agaw_to_level(domain->agaw);
4399c8bf 836 int offset;
ba395927
KA
837
838 BUG_ON(!domain->pgd);
f9423606 839
162d1b10 840 if (!domain_pfn_supported(domain, pfn))
f9423606
JS
841 /* Address beyond IOMMU's addressing capabilities. */
842 return NULL;
843
ba395927
KA
844 parent = domain->pgd;
845
5cf0a76f 846 while (1) {
ba395927
KA
847 void *tmp_page;
848
b026fd28 849 offset = pfn_level_offset(pfn, level);
ba395927 850 pte = &parent[offset];
5cf0a76f 851 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
6dd9a7c7 852 break;
5cf0a76f 853 if (level == *target_level)
ba395927
KA
854 break;
855
19c239ce 856 if (!dma_pte_present(pte)) {
c85994e4
DW
857 uint64_t pteval;
858
4c923d47 859 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 860
206a73c1 861 if (!tmp_page)
ba395927 862 return NULL;
206a73c1 863
c85994e4 864 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 865 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
effad4b5 866 if (cmpxchg64(&pte->val, 0ULL, pteval))
c85994e4
DW
867 /* Someone else set it while we were thinking; use theirs. */
868 free_pgtable_page(tmp_page);
effad4b5 869 else
c85994e4 870 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927 871 }
5cf0a76f
DW
872 if (level == 1)
873 break;
874
19c239ce 875 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
876 level--;
877 }
878
5cf0a76f
DW
879 if (!*target_level)
880 *target_level = level;
881
ba395927
KA
882 return pte;
883}
884
6dd9a7c7 885
ba395927 886/* return address's pte at specific level */
90dcfb5e
DW
887static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
888 unsigned long pfn,
6dd9a7c7 889 int level, int *large_page)
ba395927
KA
890{
891 struct dma_pte *parent, *pte = NULL;
892 int total = agaw_to_level(domain->agaw);
893 int offset;
894
895 parent = domain->pgd;
896 while (level <= total) {
90dcfb5e 897 offset = pfn_level_offset(pfn, total);
ba395927
KA
898 pte = &parent[offset];
899 if (level == total)
900 return pte;
901
6dd9a7c7
YS
902 if (!dma_pte_present(pte)) {
903 *large_page = total;
ba395927 904 break;
6dd9a7c7
YS
905 }
906
e16922af 907 if (dma_pte_superpage(pte)) {
6dd9a7c7
YS
908 *large_page = total;
909 return pte;
910 }
911
19c239ce 912 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
913 total--;
914 }
915 return NULL;
916}
917
ba395927 918/* clear last level pte, a tlb flush should be followed */
5cf0a76f 919static void dma_pte_clear_range(struct dmar_domain *domain,
595badf5
DW
920 unsigned long start_pfn,
921 unsigned long last_pfn)
ba395927 922{
6dd9a7c7 923 unsigned int large_page = 1;
310a5ab9 924 struct dma_pte *first_pte, *pte;
66eae846 925
162d1b10
JL
926 BUG_ON(!domain_pfn_supported(domain, start_pfn));
927 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 928 BUG_ON(start_pfn > last_pfn);
ba395927 929
04b18e65 930 /* we don't need lock here; nobody else touches the iova range */
59c36286 931 do {
6dd9a7c7
YS
932 large_page = 1;
933 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
310a5ab9 934 if (!pte) {
6dd9a7c7 935 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
310a5ab9
DW
936 continue;
937 }
6dd9a7c7 938 do {
310a5ab9 939 dma_clear_pte(pte);
6dd9a7c7 940 start_pfn += lvl_to_nr_pages(large_page);
310a5ab9 941 pte++;
75e6bf96
DW
942 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
943
310a5ab9
DW
944 domain_flush_cache(domain, first_pte,
945 (void *)pte - (void *)first_pte);
59c36286
DW
946
947 } while (start_pfn && start_pfn <= last_pfn);
ba395927
KA
948}
949
3269ee0b
AW
950static void dma_pte_free_level(struct dmar_domain *domain, int level,
951 struct dma_pte *pte, unsigned long pfn,
952 unsigned long start_pfn, unsigned long last_pfn)
953{
954 pfn = max(start_pfn, pfn);
955 pte = &pte[pfn_level_offset(pfn, level)];
956
957 do {
958 unsigned long level_pfn;
959 struct dma_pte *level_pte;
960
961 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
962 goto next;
963
964 level_pfn = pfn & level_mask(level - 1);
965 level_pte = phys_to_virt(dma_pte_addr(pte));
966
967 if (level > 2)
968 dma_pte_free_level(domain, level - 1, level_pte,
969 level_pfn, start_pfn, last_pfn);
970
971 /* If range covers entire pagetable, free it */
972 if (!(start_pfn > level_pfn ||
08336fd2 973 last_pfn < level_pfn + level_size(level) - 1)) {
3269ee0b
AW
974 dma_clear_pte(pte);
975 domain_flush_cache(domain, pte, sizeof(*pte));
976 free_pgtable_page(level_pte);
977 }
978next:
979 pfn += level_size(level);
980 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
981}
982
ba395927
KA
983/* free page table pages. last level pte should already be cleared */
984static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b
DW
985 unsigned long start_pfn,
986 unsigned long last_pfn)
ba395927 987{
162d1b10
JL
988 BUG_ON(!domain_pfn_supported(domain, start_pfn));
989 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 990 BUG_ON(start_pfn > last_pfn);
ba395927 991
d41a4adb
JL
992 dma_pte_clear_range(domain, start_pfn, last_pfn);
993
f3a0a52f 994 /* We don't need lock here; nobody else touches the iova range */
3269ee0b
AW
995 dma_pte_free_level(domain, agaw_to_level(domain->agaw),
996 domain->pgd, 0, start_pfn, last_pfn);
6660c63a 997
ba395927 998 /* free pgd */
d794dc9b 999 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
1000 free_pgtable_page(domain->pgd);
1001 domain->pgd = NULL;
1002 }
1003}
1004
ea8ea460
DW
1005/* When a page at a given level is being unlinked from its parent, we don't
1006 need to *modify* it at all. All we need to do is make a list of all the
1007 pages which can be freed just as soon as we've flushed the IOTLB and we
1008 know the hardware page-walk will no longer touch them.
1009 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1010 be freed. */
1011static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1012 int level, struct dma_pte *pte,
1013 struct page *freelist)
1014{
1015 struct page *pg;
1016
1017 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1018 pg->freelist = freelist;
1019 freelist = pg;
1020
1021 if (level == 1)
1022 return freelist;
1023
adeb2590
JL
1024 pte = page_address(pg);
1025 do {
ea8ea460
DW
1026 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1027 freelist = dma_pte_list_pagetables(domain, level - 1,
1028 pte, freelist);
adeb2590
JL
1029 pte++;
1030 } while (!first_pte_in_page(pte));
ea8ea460
DW
1031
1032 return freelist;
1033}
1034
1035static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1036 struct dma_pte *pte, unsigned long pfn,
1037 unsigned long start_pfn,
1038 unsigned long last_pfn,
1039 struct page *freelist)
1040{
1041 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1042
1043 pfn = max(start_pfn, pfn);
1044 pte = &pte[pfn_level_offset(pfn, level)];
1045
1046 do {
1047 unsigned long level_pfn;
1048
1049 if (!dma_pte_present(pte))
1050 goto next;
1051
1052 level_pfn = pfn & level_mask(level);
1053
1054 /* If range covers entire pagetable, free it */
1055 if (start_pfn <= level_pfn &&
1056 last_pfn >= level_pfn + level_size(level) - 1) {
1057 /* These suborbinate page tables are going away entirely. Don't
1058 bother to clear them; we're just going to *free* them. */
1059 if (level > 1 && !dma_pte_superpage(pte))
1060 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1061
1062 dma_clear_pte(pte);
1063 if (!first_pte)
1064 first_pte = pte;
1065 last_pte = pte;
1066 } else if (level > 1) {
1067 /* Recurse down into a level that isn't *entirely* obsolete */
1068 freelist = dma_pte_clear_level(domain, level - 1,
1069 phys_to_virt(dma_pte_addr(pte)),
1070 level_pfn, start_pfn, last_pfn,
1071 freelist);
1072 }
1073next:
1074 pfn += level_size(level);
1075 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1076
1077 if (first_pte)
1078 domain_flush_cache(domain, first_pte,
1079 (void *)++last_pte - (void *)first_pte);
1080
1081 return freelist;
1082}
1083
1084/* We can't just free the pages because the IOMMU may still be walking
1085 the page tables, and may have cached the intermediate levels. The
1086 pages can only be freed after the IOTLB flush has been done. */
1087struct page *domain_unmap(struct dmar_domain *domain,
1088 unsigned long start_pfn,
1089 unsigned long last_pfn)
1090{
ea8ea460
DW
1091 struct page *freelist = NULL;
1092
162d1b10
JL
1093 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1094 BUG_ON(!domain_pfn_supported(domain, last_pfn));
ea8ea460
DW
1095 BUG_ON(start_pfn > last_pfn);
1096
1097 /* we don't need lock here; nobody else touches the iova range */
1098 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1099 domain->pgd, 0, start_pfn, last_pfn, NULL);
1100
1101 /* free pgd */
1102 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1103 struct page *pgd_page = virt_to_page(domain->pgd);
1104 pgd_page->freelist = freelist;
1105 freelist = pgd_page;
1106
1107 domain->pgd = NULL;
1108 }
1109
1110 return freelist;
1111}
1112
1113void dma_free_pagelist(struct page *freelist)
1114{
1115 struct page *pg;
1116
1117 while ((pg = freelist)) {
1118 freelist = pg->freelist;
1119 free_pgtable_page(page_address(pg));
1120 }
1121}
1122
ba395927
KA
1123/* iommu handling */
1124static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1125{
1126 struct root_entry *root;
1127 unsigned long flags;
1128
4c923d47 1129 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ffebeb46
JL
1130 if (!root) {
1131 pr_err("IOMMU: allocating root entry for %s failed\n",
1132 iommu->name);
ba395927 1133 return -ENOMEM;
ffebeb46 1134 }
ba395927 1135
5b6985ce 1136 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
1137
1138 spin_lock_irqsave(&iommu->lock, flags);
1139 iommu->root_entry = root;
1140 spin_unlock_irqrestore(&iommu->lock, flags);
1141
1142 return 0;
1143}
1144
ba395927
KA
1145static void iommu_set_root_entry(struct intel_iommu *iommu)
1146{
1147 void *addr;
c416daa9 1148 u32 sts;
ba395927
KA
1149 unsigned long flag;
1150
1151 addr = iommu->root_entry;
1152
1f5b3c3f 1153 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1154 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
1155
c416daa9 1156 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1157
1158 /* Make sure hardware complete it */
1159 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1160 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927 1161
1f5b3c3f 1162 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1163}
1164
1165static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1166{
1167 u32 val;
1168 unsigned long flag;
1169
9af88143 1170 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 1171 return;
ba395927 1172
1f5b3c3f 1173 raw_spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 1174 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1175
1176 /* Make sure hardware complete it */
1177 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1178 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927 1179
1f5b3c3f 1180 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1181}
1182
1183/* return value determine if we need a write buffer flush */
4c25a2c1
DW
1184static void __iommu_flush_context(struct intel_iommu *iommu,
1185 u16 did, u16 source_id, u8 function_mask,
1186 u64 type)
ba395927
KA
1187{
1188 u64 val = 0;
1189 unsigned long flag;
1190
ba395927
KA
1191 switch (type) {
1192 case DMA_CCMD_GLOBAL_INVL:
1193 val = DMA_CCMD_GLOBAL_INVL;
1194 break;
1195 case DMA_CCMD_DOMAIN_INVL:
1196 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1197 break;
1198 case DMA_CCMD_DEVICE_INVL:
1199 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1200 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1201 break;
1202 default:
1203 BUG();
1204 }
1205 val |= DMA_CCMD_ICC;
1206
1f5b3c3f 1207 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1208 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1209
1210 /* Make sure hardware complete it */
1211 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1212 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1213
1f5b3c3f 1214 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1215}
1216
ba395927 1217/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
1218static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1219 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
1220{
1221 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1222 u64 val = 0, val_iva = 0;
1223 unsigned long flag;
1224
ba395927
KA
1225 switch (type) {
1226 case DMA_TLB_GLOBAL_FLUSH:
1227 /* global flush doesn't need set IVA_REG */
1228 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1229 break;
1230 case DMA_TLB_DSI_FLUSH:
1231 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1232 break;
1233 case DMA_TLB_PSI_FLUSH:
1234 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
ea8ea460 1235 /* IH bit is passed in as part of address */
ba395927
KA
1236 val_iva = size_order | addr;
1237 break;
1238 default:
1239 BUG();
1240 }
1241 /* Note: set drain read/write */
1242#if 0
1243 /*
1244 * This is probably to be super secure.. Looks like we can
1245 * ignore it without any impact.
1246 */
1247 if (cap_read_drain(iommu->cap))
1248 val |= DMA_TLB_READ_DRAIN;
1249#endif
1250 if (cap_write_drain(iommu->cap))
1251 val |= DMA_TLB_WRITE_DRAIN;
1252
1f5b3c3f 1253 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1254 /* Note: Only uses first TLB reg currently */
1255 if (val_iva)
1256 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1257 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1258
1259 /* Make sure hardware complete it */
1260 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1261 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1262
1f5b3c3f 1263 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1264
1265 /* check IOTLB invalidation granularity */
1266 if (DMA_TLB_IAIG(val) == 0)
1267 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1268 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1269 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
5b6985ce
FY
1270 (unsigned long long)DMA_TLB_IIRG(type),
1271 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1272}
1273
64ae892b
DW
1274static struct device_domain_info *
1275iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1276 u8 bus, u8 devfn)
93a23a72
YZ
1277{
1278 int found = 0;
1279 unsigned long flags;
1280 struct device_domain_info *info;
0bcb3e28 1281 struct pci_dev *pdev;
93a23a72
YZ
1282
1283 if (!ecap_dev_iotlb_support(iommu->ecap))
1284 return NULL;
1285
1286 if (!iommu->qi)
1287 return NULL;
1288
1289 spin_lock_irqsave(&device_domain_lock, flags);
1290 list_for_each_entry(info, &domain->devices, link)
c3b497c6
JL
1291 if (info->iommu == iommu && info->bus == bus &&
1292 info->devfn == devfn) {
93a23a72
YZ
1293 found = 1;
1294 break;
1295 }
1296 spin_unlock_irqrestore(&device_domain_lock, flags);
1297
0bcb3e28 1298 if (!found || !info->dev || !dev_is_pci(info->dev))
93a23a72
YZ
1299 return NULL;
1300
0bcb3e28
DW
1301 pdev = to_pci_dev(info->dev);
1302
1303 if (!pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS))
93a23a72
YZ
1304 return NULL;
1305
0bcb3e28 1306 if (!dmar_find_matched_atsr_unit(pdev))
93a23a72
YZ
1307 return NULL;
1308
93a23a72
YZ
1309 return info;
1310}
1311
1312static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1313{
0bcb3e28 1314 if (!info || !dev_is_pci(info->dev))
93a23a72
YZ
1315 return;
1316
0bcb3e28 1317 pci_enable_ats(to_pci_dev(info->dev), VTD_PAGE_SHIFT);
93a23a72
YZ
1318}
1319
1320static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1321{
0bcb3e28
DW
1322 if (!info->dev || !dev_is_pci(info->dev) ||
1323 !pci_ats_enabled(to_pci_dev(info->dev)))
93a23a72
YZ
1324 return;
1325
0bcb3e28 1326 pci_disable_ats(to_pci_dev(info->dev));
93a23a72
YZ
1327}
1328
1329static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1330 u64 addr, unsigned mask)
1331{
1332 u16 sid, qdep;
1333 unsigned long flags;
1334 struct device_domain_info *info;
1335
1336 spin_lock_irqsave(&device_domain_lock, flags);
1337 list_for_each_entry(info, &domain->devices, link) {
0bcb3e28
DW
1338 struct pci_dev *pdev;
1339 if (!info->dev || !dev_is_pci(info->dev))
1340 continue;
1341
1342 pdev = to_pci_dev(info->dev);
1343 if (!pci_ats_enabled(pdev))
93a23a72
YZ
1344 continue;
1345
1346 sid = info->bus << 8 | info->devfn;
0bcb3e28 1347 qdep = pci_ats_queue_depth(pdev);
93a23a72
YZ
1348 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1349 }
1350 spin_unlock_irqrestore(&device_domain_lock, flags);
1351}
1352
1f0ef2aa 1353static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
ea8ea460 1354 unsigned long pfn, unsigned int pages, int ih, int map)
ba395927 1355{
9dd2fe89 1356 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1357 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
ba395927 1358
ba395927
KA
1359 BUG_ON(pages == 0);
1360
ea8ea460
DW
1361 if (ih)
1362 ih = 1 << 6;
ba395927 1363 /*
9dd2fe89
YZ
1364 * Fallback to domain selective flush if no PSI support or the size is
1365 * too big.
ba395927
KA
1366 * PSI requires page size to be 2 ^ x, and the base address is naturally
1367 * aligned to the size
1368 */
9dd2fe89
YZ
1369 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1370 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1371 DMA_TLB_DSI_FLUSH);
9dd2fe89 1372 else
ea8ea460 1373 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
9dd2fe89 1374 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1375
1376 /*
82653633
NA
1377 * In caching mode, changes of pages from non-present to present require
1378 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1379 */
82653633 1380 if (!cap_caching_mode(iommu->cap) || !map)
93a23a72 1381 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
ba395927
KA
1382}
1383
f8bab735 1384static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1385{
1386 u32 pmen;
1387 unsigned long flags;
1388
1f5b3c3f 1389 raw_spin_lock_irqsave(&iommu->register_lock, flags);
f8bab735 1390 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1391 pmen &= ~DMA_PMEN_EPM;
1392 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1393
1394 /* wait for the protected region status bit to clear */
1395 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1396 readl, !(pmen & DMA_PMEN_PRS), pmen);
1397
1f5b3c3f 1398 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
f8bab735 1399}
1400
2a41ccee 1401static void iommu_enable_translation(struct intel_iommu *iommu)
ba395927
KA
1402{
1403 u32 sts;
1404 unsigned long flags;
1405
1f5b3c3f 1406 raw_spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1407 iommu->gcmd |= DMA_GCMD_TE;
1408 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1409
1410 /* Make sure hardware complete it */
1411 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1412 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1413
1f5b3c3f 1414 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
ba395927
KA
1415}
1416
2a41ccee 1417static void iommu_disable_translation(struct intel_iommu *iommu)
ba395927
KA
1418{
1419 u32 sts;
1420 unsigned long flag;
1421
1f5b3c3f 1422 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1423 iommu->gcmd &= ~DMA_GCMD_TE;
1424 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1425
1426 /* Make sure hardware complete it */
1427 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1428 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927 1429
1f5b3c3f 1430 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1431}
1432
3460a6d9 1433
ba395927
KA
1434static int iommu_init_domains(struct intel_iommu *iommu)
1435{
1436 unsigned long ndomains;
1437 unsigned long nlongs;
1438
1439 ndomains = cap_ndoms(iommu->cap);
852bdb04
JL
1440 pr_debug("IOMMU%d: Number of Domains supported <%ld>\n",
1441 iommu->seq_id, ndomains);
ba395927
KA
1442 nlongs = BITS_TO_LONGS(ndomains);
1443
94a91b50
DD
1444 spin_lock_init(&iommu->lock);
1445
ba395927
KA
1446 /* TBD: there might be 64K domains,
1447 * consider other allocation for future chip
1448 */
1449 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1450 if (!iommu->domain_ids) {
852bdb04
JL
1451 pr_err("IOMMU%d: allocating domain id array failed\n",
1452 iommu->seq_id);
ba395927
KA
1453 return -ENOMEM;
1454 }
1455 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1456 GFP_KERNEL);
1457 if (!iommu->domains) {
852bdb04
JL
1458 pr_err("IOMMU%d: allocating domain array failed\n",
1459 iommu->seq_id);
1460 kfree(iommu->domain_ids);
1461 iommu->domain_ids = NULL;
ba395927
KA
1462 return -ENOMEM;
1463 }
1464
1465 /*
1466 * if Caching mode is set, then invalid translations are tagged
1467 * with domainid 0. Hence we need to pre-allocate it.
1468 */
1469 if (cap_caching_mode(iommu->cap))
1470 set_bit(0, iommu->domain_ids);
1471 return 0;
1472}
ba395927 1473
ffebeb46 1474static void disable_dmar_iommu(struct intel_iommu *iommu)
ba395927
KA
1475{
1476 struct dmar_domain *domain;
2a46ddf7 1477 int i;
ba395927 1478
94a91b50 1479 if ((iommu->domains) && (iommu->domain_ids)) {
a45946ab 1480 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
a4eaa86c
JL
1481 /*
1482 * Domain id 0 is reserved for invalid translation
1483 * if hardware supports caching mode.
1484 */
1485 if (cap_caching_mode(iommu->cap) && i == 0)
1486 continue;
1487
94a91b50
DD
1488 domain = iommu->domains[i];
1489 clear_bit(i, iommu->domain_ids);
129ad281
JL
1490 if (domain_detach_iommu(domain, iommu) == 0 &&
1491 !domain_type_is_vm(domain))
92d03cc8 1492 domain_exit(domain);
5e98c4b1 1493 }
ba395927
KA
1494 }
1495
1496 if (iommu->gcmd & DMA_GCMD_TE)
1497 iommu_disable_translation(iommu);
ffebeb46 1498}
ba395927 1499
ffebeb46
JL
1500static void free_dmar_iommu(struct intel_iommu *iommu)
1501{
1502 if ((iommu->domains) && (iommu->domain_ids)) {
1503 kfree(iommu->domains);
1504 kfree(iommu->domain_ids);
1505 iommu->domains = NULL;
1506 iommu->domain_ids = NULL;
1507 }
ba395927 1508
d9630fe9
WH
1509 g_iommus[iommu->seq_id] = NULL;
1510
ba395927
KA
1511 /* free context mapping */
1512 free_context_table(iommu);
ba395927
KA
1513}
1514
ab8dfe25 1515static struct dmar_domain *alloc_domain(int flags)
ba395927 1516{
92d03cc8
JL
1517 /* domain id for virtual machine, it won't be set in context */
1518 static atomic_t vm_domid = ATOMIC_INIT(0);
ba395927 1519 struct dmar_domain *domain;
ba395927
KA
1520
1521 domain = alloc_domain_mem();
1522 if (!domain)
1523 return NULL;
1524
ab8dfe25 1525 memset(domain, 0, sizeof(*domain));
4c923d47 1526 domain->nid = -1;
ab8dfe25 1527 domain->flags = flags;
92d03cc8
JL
1528 spin_lock_init(&domain->iommu_lock);
1529 INIT_LIST_HEAD(&domain->devices);
ab8dfe25 1530 if (flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
92d03cc8 1531 domain->id = atomic_inc_return(&vm_domid);
2c2e2c38
FY
1532
1533 return domain;
1534}
1535
fb170fb4
JL
1536static int __iommu_attach_domain(struct dmar_domain *domain,
1537 struct intel_iommu *iommu)
2c2e2c38
FY
1538{
1539 int num;
1540 unsigned long ndomains;
2c2e2c38 1541
ba395927 1542 ndomains = cap_ndoms(iommu->cap);
ba395927 1543 num = find_first_zero_bit(iommu->domain_ids, ndomains);
fb170fb4
JL
1544 if (num < ndomains) {
1545 set_bit(num, iommu->domain_ids);
1546 iommu->domains[num] = domain;
1547 } else {
1548 num = -ENOSPC;
ba395927
KA
1549 }
1550
fb170fb4
JL
1551 return num;
1552}
1553
1554static int iommu_attach_domain(struct dmar_domain *domain,
1555 struct intel_iommu *iommu)
1556{
1557 int num;
1558 unsigned long flags;
1559
1560 spin_lock_irqsave(&iommu->lock, flags);
1561 num = __iommu_attach_domain(domain, iommu);
44bde614 1562 spin_unlock_irqrestore(&iommu->lock, flags);
fb170fb4
JL
1563 if (num < 0)
1564 pr_err("IOMMU: no free domain ids\n");
ba395927 1565
fb170fb4 1566 return num;
ba395927
KA
1567}
1568
44bde614
JL
1569static int iommu_attach_vm_domain(struct dmar_domain *domain,
1570 struct intel_iommu *iommu)
1571{
1572 int num;
1573 unsigned long ndomains;
1574
1575 ndomains = cap_ndoms(iommu->cap);
1576 for_each_set_bit(num, iommu->domain_ids, ndomains)
1577 if (iommu->domains[num] == domain)
1578 return num;
1579
1580 return __iommu_attach_domain(domain, iommu);
1581}
1582
2c2e2c38
FY
1583static void iommu_detach_domain(struct dmar_domain *domain,
1584 struct intel_iommu *iommu)
ba395927
KA
1585{
1586 unsigned long flags;
2c2e2c38 1587 int num, ndomains;
ba395927 1588
8c11e798 1589 spin_lock_irqsave(&iommu->lock, flags);
fb170fb4
JL
1590 if (domain_type_is_vm_or_si(domain)) {
1591 ndomains = cap_ndoms(iommu->cap);
1592 for_each_set_bit(num, iommu->domain_ids, ndomains) {
1593 if (iommu->domains[num] == domain) {
1594 clear_bit(num, iommu->domain_ids);
1595 iommu->domains[num] = NULL;
1596 break;
1597 }
2c2e2c38 1598 }
fb170fb4
JL
1599 } else {
1600 clear_bit(domain->id, iommu->domain_ids);
1601 iommu->domains[domain->id] = NULL;
2c2e2c38 1602 }
8c11e798 1603 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927
KA
1604}
1605
fb170fb4
JL
1606static void domain_attach_iommu(struct dmar_domain *domain,
1607 struct intel_iommu *iommu)
1608{
1609 unsigned long flags;
1610
1611 spin_lock_irqsave(&domain->iommu_lock, flags);
1612 if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
1613 domain->iommu_count++;
1614 if (domain->iommu_count == 1)
1615 domain->nid = iommu->node;
1616 domain_update_iommu_cap(domain);
1617 }
1618 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1619}
1620
1621static int domain_detach_iommu(struct dmar_domain *domain,
1622 struct intel_iommu *iommu)
1623{
1624 unsigned long flags;
1625 int count = INT_MAX;
1626
1627 spin_lock_irqsave(&domain->iommu_lock, flags);
1628 if (test_and_clear_bit(iommu->seq_id, domain->iommu_bmp)) {
1629 count = --domain->iommu_count;
1630 domain_update_iommu_cap(domain);
1631 }
1632 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1633
1634 return count;
1635}
1636
ba395927 1637static struct iova_domain reserved_iova_list;
8a443df4 1638static struct lock_class_key reserved_rbtree_key;
ba395927 1639
51a63e67 1640static int dmar_init_reserved_ranges(void)
ba395927
KA
1641{
1642 struct pci_dev *pdev = NULL;
1643 struct iova *iova;
1644 int i;
ba395927 1645
f661197e 1646 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
ba395927 1647
8a443df4
MG
1648 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1649 &reserved_rbtree_key);
1650
ba395927
KA
1651 /* IOAPIC ranges shouldn't be accessed by DMA */
1652 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1653 IOVA_PFN(IOAPIC_RANGE_END));
51a63e67 1654 if (!iova) {
ba395927 1655 printk(KERN_ERR "Reserve IOAPIC range failed\n");
51a63e67
JC
1656 return -ENODEV;
1657 }
ba395927
KA
1658
1659 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1660 for_each_pci_dev(pdev) {
1661 struct resource *r;
1662
1663 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1664 r = &pdev->resource[i];
1665 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1666 continue;
1a4a4551
DW
1667 iova = reserve_iova(&reserved_iova_list,
1668 IOVA_PFN(r->start),
1669 IOVA_PFN(r->end));
51a63e67 1670 if (!iova) {
ba395927 1671 printk(KERN_ERR "Reserve iova failed\n");
51a63e67
JC
1672 return -ENODEV;
1673 }
ba395927
KA
1674 }
1675 }
51a63e67 1676 return 0;
ba395927
KA
1677}
1678
1679static void domain_reserve_special_ranges(struct dmar_domain *domain)
1680{
1681 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1682}
1683
1684static inline int guestwidth_to_adjustwidth(int gaw)
1685{
1686 int agaw;
1687 int r = (gaw - 12) % 9;
1688
1689 if (r == 0)
1690 agaw = gaw;
1691 else
1692 agaw = gaw + 9 - r;
1693 if (agaw > 64)
1694 agaw = 64;
1695 return agaw;
1696}
1697
1698static int domain_init(struct dmar_domain *domain, int guest_width)
1699{
1700 struct intel_iommu *iommu;
1701 int adjust_width, agaw;
1702 unsigned long sagaw;
1703
f661197e 1704 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
ba395927
KA
1705 domain_reserve_special_ranges(domain);
1706
1707 /* calculate AGAW */
8c11e798 1708 iommu = domain_get_iommu(domain);
ba395927
KA
1709 if (guest_width > cap_mgaw(iommu->cap))
1710 guest_width = cap_mgaw(iommu->cap);
1711 domain->gaw = guest_width;
1712 adjust_width = guestwidth_to_adjustwidth(guest_width);
1713 agaw = width_to_agaw(adjust_width);
1714 sagaw = cap_sagaw(iommu->cap);
1715 if (!test_bit(agaw, &sagaw)) {
1716 /* hardware doesn't support it, choose a bigger one */
1717 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1718 agaw = find_next_bit(&sagaw, 5, agaw);
1719 if (agaw >= 5)
1720 return -ENODEV;
1721 }
1722 domain->agaw = agaw;
ba395927 1723
8e604097
WH
1724 if (ecap_coherent(iommu->ecap))
1725 domain->iommu_coherency = 1;
1726 else
1727 domain->iommu_coherency = 0;
1728
58c610bd
SY
1729 if (ecap_sc_support(iommu->ecap))
1730 domain->iommu_snooping = 1;
1731 else
1732 domain->iommu_snooping = 0;
1733
214e39aa
DW
1734 if (intel_iommu_superpage)
1735 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1736 else
1737 domain->iommu_superpage = 0;
1738
4c923d47 1739 domain->nid = iommu->node;
c7151a8d 1740
ba395927 1741 /* always allocate the top pgd */
4c923d47 1742 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
ba395927
KA
1743 if (!domain->pgd)
1744 return -ENOMEM;
5b6985ce 1745 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1746 return 0;
1747}
1748
1749static void domain_exit(struct dmar_domain *domain)
1750{
2c2e2c38
FY
1751 struct dmar_drhd_unit *drhd;
1752 struct intel_iommu *iommu;
ea8ea460 1753 struct page *freelist = NULL;
ba395927
KA
1754
1755 /* Domain 0 is reserved, so dont process it */
1756 if (!domain)
1757 return;
1758
7b668357
AW
1759 /* Flush any lazy unmaps that may reference this domain */
1760 if (!intel_iommu_strict)
1761 flush_unmaps_timeout(0);
1762
92d03cc8 1763 /* remove associated devices */
ba395927 1764 domain_remove_dev_info(domain);
92d03cc8 1765
ba395927
KA
1766 /* destroy iovas */
1767 put_iova_domain(&domain->iovad);
ba395927 1768
ea8ea460 1769 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927 1770
92d03cc8 1771 /* clear attached or cached domains */
0e242612 1772 rcu_read_lock();
2c2e2c38 1773 for_each_active_iommu(iommu, drhd)
fb170fb4 1774 iommu_detach_domain(domain, iommu);
0e242612 1775 rcu_read_unlock();
2c2e2c38 1776
ea8ea460
DW
1777 dma_free_pagelist(freelist);
1778
ba395927
KA
1779 free_domain_mem(domain);
1780}
1781
64ae892b
DW
1782static int domain_context_mapping_one(struct dmar_domain *domain,
1783 struct intel_iommu *iommu,
1784 u8 bus, u8 devfn, int translation)
ba395927
KA
1785{
1786 struct context_entry *context;
ba395927 1787 unsigned long flags;
ea6606b0 1788 struct dma_pte *pgd;
ea6606b0
WH
1789 int id;
1790 int agaw;
93a23a72 1791 struct device_domain_info *info = NULL;
ba395927
KA
1792
1793 pr_debug("Set context mapping for %02x:%02x.%d\n",
1794 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 1795
ba395927 1796 BUG_ON(!domain->pgd);
4ed0d3e6
FY
1797 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1798 translation != CONTEXT_TT_MULTI_LEVEL);
5331fe6f 1799
ba395927
KA
1800 context = device_to_context_entry(iommu, bus, devfn);
1801 if (!context)
1802 return -ENOMEM;
1803 spin_lock_irqsave(&iommu->lock, flags);
c07e7d21 1804 if (context_present(context)) {
ba395927
KA
1805 spin_unlock_irqrestore(&iommu->lock, flags);
1806 return 0;
1807 }
1808
ea6606b0
WH
1809 id = domain->id;
1810 pgd = domain->pgd;
1811
ab8dfe25 1812 if (domain_type_is_vm_or_si(domain)) {
44bde614
JL
1813 if (domain_type_is_vm(domain)) {
1814 id = iommu_attach_vm_domain(domain, iommu);
fb170fb4 1815 if (id < 0) {
ea6606b0 1816 spin_unlock_irqrestore(&iommu->lock, flags);
fb170fb4 1817 pr_err("IOMMU: no free domain ids\n");
ea6606b0
WH
1818 return -EFAULT;
1819 }
ea6606b0
WH
1820 }
1821
1822 /* Skip top levels of page tables for
1823 * iommu which has less agaw than default.
1672af11 1824 * Unnecessary for PT mode.
ea6606b0 1825 */
1672af11
CW
1826 if (translation != CONTEXT_TT_PASS_THROUGH) {
1827 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1828 pgd = phys_to_virt(dma_pte_addr(pgd));
1829 if (!dma_pte_present(pgd)) {
1830 spin_unlock_irqrestore(&iommu->lock, flags);
1831 return -ENOMEM;
1832 }
ea6606b0
WH
1833 }
1834 }
1835 }
1836
1837 context_set_domain_id(context, id);
4ed0d3e6 1838
93a23a72 1839 if (translation != CONTEXT_TT_PASS_THROUGH) {
64ae892b 1840 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
93a23a72
YZ
1841 translation = info ? CONTEXT_TT_DEV_IOTLB :
1842 CONTEXT_TT_MULTI_LEVEL;
1843 }
4ed0d3e6
FY
1844 /*
1845 * In pass through mode, AW must be programmed to indicate the largest
1846 * AGAW value supported by hardware. And ASR is ignored by hardware.
1847 */
93a23a72 1848 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
4ed0d3e6 1849 context_set_address_width(context, iommu->msagaw);
93a23a72
YZ
1850 else {
1851 context_set_address_root(context, virt_to_phys(pgd));
1852 context_set_address_width(context, iommu->agaw);
1853 }
4ed0d3e6
FY
1854
1855 context_set_translation_type(context, translation);
c07e7d21
MM
1856 context_set_fault_enable(context);
1857 context_set_present(context);
5331fe6f 1858 domain_flush_cache(domain, context, sizeof(*context));
ba395927 1859
4c25a2c1
DW
1860 /*
1861 * It's a non-present to present mapping. If hardware doesn't cache
1862 * non-present entry we only need to flush the write-buffer. If the
1863 * _does_ cache non-present entries, then it does so in the special
1864 * domain #0, which we have to flush:
1865 */
1866 if (cap_caching_mode(iommu->cap)) {
1867 iommu->flush.flush_context(iommu, 0,
1868 (((u16)bus) << 8) | devfn,
1869 DMA_CCMD_MASK_NOBIT,
1870 DMA_CCMD_DEVICE_INVL);
18fd779a 1871 iommu->flush.flush_iotlb(iommu, id, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 1872 } else {
ba395927 1873 iommu_flush_write_buffer(iommu);
4c25a2c1 1874 }
93a23a72 1875 iommu_enable_dev_iotlb(info);
ba395927 1876 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d 1877
fb170fb4
JL
1878 domain_attach_iommu(domain, iommu);
1879
ba395927
KA
1880 return 0;
1881}
1882
579305f7
AW
1883struct domain_context_mapping_data {
1884 struct dmar_domain *domain;
1885 struct intel_iommu *iommu;
1886 int translation;
1887};
1888
1889static int domain_context_mapping_cb(struct pci_dev *pdev,
1890 u16 alias, void *opaque)
1891{
1892 struct domain_context_mapping_data *data = opaque;
1893
1894 return domain_context_mapping_one(data->domain, data->iommu,
1895 PCI_BUS_NUM(alias), alias & 0xff,
1896 data->translation);
1897}
1898
ba395927 1899static int
e1f167f3
DW
1900domain_context_mapping(struct dmar_domain *domain, struct device *dev,
1901 int translation)
ba395927 1902{
64ae892b 1903 struct intel_iommu *iommu;
156baca8 1904 u8 bus, devfn;
579305f7 1905 struct domain_context_mapping_data data;
64ae892b 1906
e1f167f3 1907 iommu = device_to_iommu(dev, &bus, &devfn);
64ae892b
DW
1908 if (!iommu)
1909 return -ENODEV;
ba395927 1910
579305f7
AW
1911 if (!dev_is_pci(dev))
1912 return domain_context_mapping_one(domain, iommu, bus, devfn,
4ed0d3e6 1913 translation);
579305f7
AW
1914
1915 data.domain = domain;
1916 data.iommu = iommu;
1917 data.translation = translation;
1918
1919 return pci_for_each_dma_alias(to_pci_dev(dev),
1920 &domain_context_mapping_cb, &data);
1921}
1922
1923static int domain_context_mapped_cb(struct pci_dev *pdev,
1924 u16 alias, void *opaque)
1925{
1926 struct intel_iommu *iommu = opaque;
1927
1928 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
ba395927
KA
1929}
1930
e1f167f3 1931static int domain_context_mapped(struct device *dev)
ba395927 1932{
5331fe6f 1933 struct intel_iommu *iommu;
156baca8 1934 u8 bus, devfn;
5331fe6f 1935
e1f167f3 1936 iommu = device_to_iommu(dev, &bus, &devfn);
5331fe6f
WH
1937 if (!iommu)
1938 return -ENODEV;
ba395927 1939
579305f7
AW
1940 if (!dev_is_pci(dev))
1941 return device_context_mapped(iommu, bus, devfn);
e1f167f3 1942
579305f7
AW
1943 return !pci_for_each_dma_alias(to_pci_dev(dev),
1944 domain_context_mapped_cb, iommu);
ba395927
KA
1945}
1946
f532959b
FY
1947/* Returns a number of VTD pages, but aligned to MM page size */
1948static inline unsigned long aligned_nrpages(unsigned long host_addr,
1949 size_t size)
1950{
1951 host_addr &= ~PAGE_MASK;
1952 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1953}
1954
6dd9a7c7
YS
1955/* Return largest possible superpage level for a given mapping */
1956static inline int hardware_largepage_caps(struct dmar_domain *domain,
1957 unsigned long iov_pfn,
1958 unsigned long phy_pfn,
1959 unsigned long pages)
1960{
1961 int support, level = 1;
1962 unsigned long pfnmerge;
1963
1964 support = domain->iommu_superpage;
1965
1966 /* To use a large page, the virtual *and* physical addresses
1967 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1968 of them will mean we have to use smaller pages. So just
1969 merge them and check both at once. */
1970 pfnmerge = iov_pfn | phy_pfn;
1971
1972 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1973 pages >>= VTD_STRIDE_SHIFT;
1974 if (!pages)
1975 break;
1976 pfnmerge >>= VTD_STRIDE_SHIFT;
1977 level++;
1978 support--;
1979 }
1980 return level;
1981}
1982
9051aa02
DW
1983static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1984 struct scatterlist *sg, unsigned long phys_pfn,
1985 unsigned long nr_pages, int prot)
e1605495
DW
1986{
1987 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 1988 phys_addr_t uninitialized_var(pteval);
cc4f14aa 1989 unsigned long sg_res = 0;
6dd9a7c7
YS
1990 unsigned int largepage_lvl = 0;
1991 unsigned long lvl_pages = 0;
e1605495 1992
162d1b10 1993 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
e1605495
DW
1994
1995 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1996 return -EINVAL;
1997
1998 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1999
cc4f14aa
JL
2000 if (!sg) {
2001 sg_res = nr_pages;
9051aa02
DW
2002 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2003 }
2004
6dd9a7c7 2005 while (nr_pages > 0) {
c85994e4
DW
2006 uint64_t tmp;
2007
e1605495 2008 if (!sg_res) {
f532959b 2009 sg_res = aligned_nrpages(sg->offset, sg->length);
e1605495
DW
2010 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
2011 sg->dma_length = sg->length;
2012 pteval = page_to_phys(sg_page(sg)) | prot;
6dd9a7c7 2013 phys_pfn = pteval >> VTD_PAGE_SHIFT;
e1605495 2014 }
6dd9a7c7 2015
e1605495 2016 if (!pte) {
6dd9a7c7
YS
2017 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2018
5cf0a76f 2019 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
e1605495
DW
2020 if (!pte)
2021 return -ENOMEM;
6dd9a7c7 2022 /* It is large page*/
6491d4d0 2023 if (largepage_lvl > 1) {
6dd9a7c7 2024 pteval |= DMA_PTE_LARGE_PAGE;
d41a4adb
JL
2025 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2026 /*
2027 * Ensure that old small page tables are
2028 * removed to make room for superpage,
2029 * if they exist.
2030 */
6491d4d0 2031 dma_pte_free_pagetable(domain, iov_pfn,
d41a4adb 2032 iov_pfn + lvl_pages - 1);
6491d4d0 2033 } else {
6dd9a7c7 2034 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
6491d4d0 2035 }
6dd9a7c7 2036
e1605495
DW
2037 }
2038 /* We don't need lock here, nobody else
2039 * touches the iova range
2040 */
7766a3fb 2041 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 2042 if (tmp) {
1bf20f0d 2043 static int dumps = 5;
c85994e4
DW
2044 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2045 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
2046 if (dumps) {
2047 dumps--;
2048 debug_dma_dump_mappings(NULL);
2049 }
2050 WARN_ON(1);
2051 }
6dd9a7c7
YS
2052
2053 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2054
2055 BUG_ON(nr_pages < lvl_pages);
2056 BUG_ON(sg_res < lvl_pages);
2057
2058 nr_pages -= lvl_pages;
2059 iov_pfn += lvl_pages;
2060 phys_pfn += lvl_pages;
2061 pteval += lvl_pages * VTD_PAGE_SIZE;
2062 sg_res -= lvl_pages;
2063
2064 /* If the next PTE would be the first in a new page, then we
2065 need to flush the cache on the entries we've just written.
2066 And then we'll need to recalculate 'pte', so clear it and
2067 let it get set again in the if (!pte) block above.
2068
2069 If we're done (!nr_pages) we need to flush the cache too.
2070
2071 Also if we've been setting superpages, we may need to
2072 recalculate 'pte' and switch back to smaller pages for the
2073 end of the mapping, if the trailing size is not enough to
2074 use another superpage (i.e. sg_res < lvl_pages). */
e1605495 2075 pte++;
6dd9a7c7
YS
2076 if (!nr_pages || first_pte_in_page(pte) ||
2077 (largepage_lvl > 1 && sg_res < lvl_pages)) {
e1605495
DW
2078 domain_flush_cache(domain, first_pte,
2079 (void *)pte - (void *)first_pte);
2080 pte = NULL;
2081 }
6dd9a7c7
YS
2082
2083 if (!sg_res && nr_pages)
e1605495
DW
2084 sg = sg_next(sg);
2085 }
2086 return 0;
2087}
2088
9051aa02
DW
2089static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2090 struct scatterlist *sg, unsigned long nr_pages,
2091 int prot)
ba395927 2092{
9051aa02
DW
2093 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2094}
6f6a00e4 2095
9051aa02
DW
2096static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2097 unsigned long phys_pfn, unsigned long nr_pages,
2098 int prot)
2099{
2100 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
2101}
2102
c7151a8d 2103static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 2104{
c7151a8d
WH
2105 if (!iommu)
2106 return;
8c11e798
WH
2107
2108 clear_context_table(iommu, bus, devfn);
2109 iommu->flush.flush_context(iommu, 0, 0, 0,
4c25a2c1 2110 DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 2111 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
ba395927
KA
2112}
2113
109b9b04
DW
2114static inline void unlink_domain_info(struct device_domain_info *info)
2115{
2116 assert_spin_locked(&device_domain_lock);
2117 list_del(&info->link);
2118 list_del(&info->global);
2119 if (info->dev)
0bcb3e28 2120 info->dev->archdata.iommu = NULL;
109b9b04
DW
2121}
2122
ba395927
KA
2123static void domain_remove_dev_info(struct dmar_domain *domain)
2124{
3a74ca01 2125 struct device_domain_info *info, *tmp;
fb170fb4 2126 unsigned long flags;
ba395927
KA
2127
2128 spin_lock_irqsave(&device_domain_lock, flags);
3a74ca01 2129 list_for_each_entry_safe(info, tmp, &domain->devices, link) {
109b9b04 2130 unlink_domain_info(info);
ba395927
KA
2131 spin_unlock_irqrestore(&device_domain_lock, flags);
2132
93a23a72 2133 iommu_disable_dev_iotlb(info);
7c7faa11 2134 iommu_detach_dev(info->iommu, info->bus, info->devfn);
ba395927 2135
ab8dfe25 2136 if (domain_type_is_vm(domain)) {
7c7faa11 2137 iommu_detach_dependent_devices(info->iommu, info->dev);
fb170fb4 2138 domain_detach_iommu(domain, info->iommu);
92d03cc8
JL
2139 }
2140
2141 free_devinfo_mem(info);
ba395927
KA
2142 spin_lock_irqsave(&device_domain_lock, flags);
2143 }
2144 spin_unlock_irqrestore(&device_domain_lock, flags);
2145}
2146
2147/*
2148 * find_domain
1525a29a 2149 * Note: we use struct device->archdata.iommu stores the info
ba395927 2150 */
1525a29a 2151static struct dmar_domain *find_domain(struct device *dev)
ba395927
KA
2152{
2153 struct device_domain_info *info;
2154
2155 /* No lock here, assumes no domain exit in normal case */
1525a29a 2156 info = dev->archdata.iommu;
ba395927
KA
2157 if (info)
2158 return info->domain;
2159 return NULL;
2160}
2161
5a8f40e8 2162static inline struct device_domain_info *
745f2586
JL
2163dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2164{
2165 struct device_domain_info *info;
2166
2167 list_for_each_entry(info, &device_domain_list, global)
41e80dca 2168 if (info->iommu->segment == segment && info->bus == bus &&
745f2586 2169 info->devfn == devfn)
5a8f40e8 2170 return info;
745f2586
JL
2171
2172 return NULL;
2173}
2174
5a8f40e8 2175static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu,
41e80dca 2176 int bus, int devfn,
b718cd3d
DW
2177 struct device *dev,
2178 struct dmar_domain *domain)
745f2586 2179{
5a8f40e8 2180 struct dmar_domain *found = NULL;
745f2586
JL
2181 struct device_domain_info *info;
2182 unsigned long flags;
2183
2184 info = alloc_devinfo_mem();
2185 if (!info)
b718cd3d 2186 return NULL;
745f2586 2187
745f2586
JL
2188 info->bus = bus;
2189 info->devfn = devfn;
2190 info->dev = dev;
2191 info->domain = domain;
5a8f40e8 2192 info->iommu = iommu;
745f2586
JL
2193
2194 spin_lock_irqsave(&device_domain_lock, flags);
2195 if (dev)
0bcb3e28 2196 found = find_domain(dev);
5a8f40e8
DW
2197 else {
2198 struct device_domain_info *info2;
41e80dca 2199 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
5a8f40e8
DW
2200 if (info2)
2201 found = info2->domain;
2202 }
745f2586
JL
2203 if (found) {
2204 spin_unlock_irqrestore(&device_domain_lock, flags);
2205 free_devinfo_mem(info);
b718cd3d
DW
2206 /* Caller must free the original domain */
2207 return found;
745f2586
JL
2208 }
2209
b718cd3d
DW
2210 list_add(&info->link, &domain->devices);
2211 list_add(&info->global, &device_domain_list);
2212 if (dev)
2213 dev->archdata.iommu = info;
2214 spin_unlock_irqrestore(&device_domain_lock, flags);
2215
2216 return domain;
745f2586
JL
2217}
2218
579305f7
AW
2219static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2220{
2221 *(u16 *)opaque = alias;
2222 return 0;
2223}
2224
ba395927 2225/* domain is initialized */
146922ec 2226static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
ba395927 2227{
579305f7
AW
2228 struct dmar_domain *domain, *tmp;
2229 struct intel_iommu *iommu;
5a8f40e8 2230 struct device_domain_info *info;
579305f7 2231 u16 dma_alias;
ba395927 2232 unsigned long flags;
aa4d066a 2233 u8 bus, devfn;
ba395927 2234
146922ec 2235 domain = find_domain(dev);
ba395927
KA
2236 if (domain)
2237 return domain;
2238
579305f7
AW
2239 iommu = device_to_iommu(dev, &bus, &devfn);
2240 if (!iommu)
2241 return NULL;
2242
146922ec
DW
2243 if (dev_is_pci(dev)) {
2244 struct pci_dev *pdev = to_pci_dev(dev);
276dbf99 2245
579305f7
AW
2246 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2247
2248 spin_lock_irqsave(&device_domain_lock, flags);
2249 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2250 PCI_BUS_NUM(dma_alias),
2251 dma_alias & 0xff);
2252 if (info) {
2253 iommu = info->iommu;
2254 domain = info->domain;
5a8f40e8 2255 }
579305f7 2256 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927 2257
579305f7
AW
2258 /* DMA alias already has a domain, uses it */
2259 if (info)
2260 goto found_domain;
2261 }
ba395927 2262
146922ec 2263 /* Allocate and initialize new domain for the device */
ab8dfe25 2264 domain = alloc_domain(0);
745f2586 2265 if (!domain)
579305f7 2266 return NULL;
44bde614
JL
2267 domain->id = iommu_attach_domain(domain, iommu);
2268 if (domain->id < 0) {
2fe9723d 2269 free_domain_mem(domain);
579305f7 2270 return NULL;
2c2e2c38 2271 }
fb170fb4 2272 domain_attach_iommu(domain, iommu);
579305f7
AW
2273 if (domain_init(domain, gaw)) {
2274 domain_exit(domain);
2275 return NULL;
2c2e2c38 2276 }
ba395927 2277
579305f7
AW
2278 /* register PCI DMA alias device */
2279 if (dev_is_pci(dev)) {
2280 tmp = dmar_insert_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2281 dma_alias & 0xff, NULL, domain);
2282
2283 if (!tmp || tmp != domain) {
2284 domain_exit(domain);
2285 domain = tmp;
2286 }
2287
b718cd3d 2288 if (!domain)
579305f7 2289 return NULL;
ba395927
KA
2290 }
2291
2292found_domain:
579305f7
AW
2293 tmp = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
2294
2295 if (!tmp || tmp != domain) {
2296 domain_exit(domain);
2297 domain = tmp;
2298 }
b718cd3d
DW
2299
2300 return domain;
ba395927
KA
2301}
2302
2c2e2c38 2303static int iommu_identity_mapping;
e0fc7e0b
DW
2304#define IDENTMAP_ALL 1
2305#define IDENTMAP_GFX 2
2306#define IDENTMAP_AZALIA 4
2c2e2c38 2307
b213203e
DW
2308static int iommu_domain_identity_map(struct dmar_domain *domain,
2309 unsigned long long start,
2310 unsigned long long end)
ba395927 2311{
c5395d5c
DW
2312 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2313 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2314
2315 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2316 dma_to_mm_pfn(last_vpfn))) {
ba395927 2317 printk(KERN_ERR "IOMMU: reserve iova failed\n");
b213203e 2318 return -ENOMEM;
ba395927
KA
2319 }
2320
c5395d5c
DW
2321 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2322 start, end, domain->id);
ba395927
KA
2323 /*
2324 * RMRR range might have overlap with physical memory range,
2325 * clear it first
2326 */
c5395d5c 2327 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 2328
c5395d5c
DW
2329 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2330 last_vpfn - first_vpfn + 1,
61df7443 2331 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
2332}
2333
0b9d9753 2334static int iommu_prepare_identity_map(struct device *dev,
b213203e
DW
2335 unsigned long long start,
2336 unsigned long long end)
2337{
2338 struct dmar_domain *domain;
2339 int ret;
2340
0b9d9753 2341 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
b213203e
DW
2342 if (!domain)
2343 return -ENOMEM;
2344
19943b0e
DW
2345 /* For _hardware_ passthrough, don't bother. But for software
2346 passthrough, we do it anyway -- it may indicate a memory
2347 range which is reserved in E820, so which didn't get set
2348 up to start with in si_domain */
2349 if (domain == si_domain && hw_pass_through) {
2350 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
0b9d9753 2351 dev_name(dev), start, end);
19943b0e
DW
2352 return 0;
2353 }
2354
2355 printk(KERN_INFO
2356 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
0b9d9753 2357 dev_name(dev), start, end);
2ff729f5 2358
5595b528
DW
2359 if (end < start) {
2360 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2361 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2362 dmi_get_system_info(DMI_BIOS_VENDOR),
2363 dmi_get_system_info(DMI_BIOS_VERSION),
2364 dmi_get_system_info(DMI_PRODUCT_VERSION));
2365 ret = -EIO;
2366 goto error;
2367 }
2368
2ff729f5
DW
2369 if (end >> agaw_to_width(domain->agaw)) {
2370 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2371 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2372 agaw_to_width(domain->agaw),
2373 dmi_get_system_info(DMI_BIOS_VENDOR),
2374 dmi_get_system_info(DMI_BIOS_VERSION),
2375 dmi_get_system_info(DMI_PRODUCT_VERSION));
2376 ret = -EIO;
2377 goto error;
2378 }
19943b0e 2379
b213203e 2380 ret = iommu_domain_identity_map(domain, start, end);
ba395927
KA
2381 if (ret)
2382 goto error;
2383
2384 /* context entry init */
0b9d9753 2385 ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
b213203e
DW
2386 if (ret)
2387 goto error;
2388
2389 return 0;
2390
2391 error:
ba395927
KA
2392 domain_exit(domain);
2393 return ret;
ba395927
KA
2394}
2395
2396static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
0b9d9753 2397 struct device *dev)
ba395927 2398{
0b9d9753 2399 if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927 2400 return 0;
0b9d9753
DW
2401 return iommu_prepare_identity_map(dev, rmrr->base_address,
2402 rmrr->end_address);
ba395927
KA
2403}
2404
d3f13810 2405#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
49a0429e
KA
2406static inline void iommu_prepare_isa(void)
2407{
2408 struct pci_dev *pdev;
2409 int ret;
2410
2411 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2412 if (!pdev)
2413 return;
2414
c7ab48d2 2415 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
0b9d9753 2416 ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
49a0429e
KA
2417
2418 if (ret)
c7ab48d2
DW
2419 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2420 "floppy might not work\n");
49a0429e 2421
9b27e82d 2422 pci_dev_put(pdev);
49a0429e
KA
2423}
2424#else
2425static inline void iommu_prepare_isa(void)
2426{
2427 return;
2428}
d3f13810 2429#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
49a0429e 2430
2c2e2c38 2431static int md_domain_init(struct dmar_domain *domain, int guest_width);
c7ab48d2 2432
071e1374 2433static int __init si_domain_init(int hw)
2c2e2c38
FY
2434{
2435 struct dmar_drhd_unit *drhd;
2436 struct intel_iommu *iommu;
c7ab48d2 2437 int nid, ret = 0;
44bde614 2438 bool first = true;
2c2e2c38 2439
ab8dfe25 2440 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2c2e2c38
FY
2441 if (!si_domain)
2442 return -EFAULT;
2443
2c2e2c38
FY
2444 for_each_active_iommu(iommu, drhd) {
2445 ret = iommu_attach_domain(si_domain, iommu);
fb170fb4 2446 if (ret < 0) {
2c2e2c38
FY
2447 domain_exit(si_domain);
2448 return -EFAULT;
44bde614
JL
2449 } else if (first) {
2450 si_domain->id = ret;
2451 first = false;
2452 } else if (si_domain->id != ret) {
2453 domain_exit(si_domain);
2454 return -EFAULT;
2c2e2c38 2455 }
fb170fb4 2456 domain_attach_iommu(si_domain, iommu);
2c2e2c38
FY
2457 }
2458
2459 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2460 domain_exit(si_domain);
2461 return -EFAULT;
2462 }
2463
9544c003
JL
2464 pr_debug("IOMMU: identity mapping domain is domain %d\n",
2465 si_domain->id);
2c2e2c38 2466
19943b0e
DW
2467 if (hw)
2468 return 0;
2469
c7ab48d2 2470 for_each_online_node(nid) {
5dfe8660
TH
2471 unsigned long start_pfn, end_pfn;
2472 int i;
2473
2474 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2475 ret = iommu_domain_identity_map(si_domain,
2476 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2477 if (ret)
2478 return ret;
2479 }
c7ab48d2
DW
2480 }
2481
2c2e2c38
FY
2482 return 0;
2483}
2484
9b226624 2485static int identity_mapping(struct device *dev)
2c2e2c38
FY
2486{
2487 struct device_domain_info *info;
2488
2489 if (likely(!iommu_identity_mapping))
2490 return 0;
2491
9b226624 2492 info = dev->archdata.iommu;
cb452a40
MT
2493 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2494 return (info->domain == si_domain);
2c2e2c38 2495
2c2e2c38
FY
2496 return 0;
2497}
2498
2499static int domain_add_dev_info(struct dmar_domain *domain,
5913c9bf 2500 struct device *dev, int translation)
2c2e2c38 2501{
0ac72664 2502 struct dmar_domain *ndomain;
5a8f40e8 2503 struct intel_iommu *iommu;
156baca8 2504 u8 bus, devfn;
5fe60f4e 2505 int ret;
2c2e2c38 2506
5913c9bf 2507 iommu = device_to_iommu(dev, &bus, &devfn);
5a8f40e8
DW
2508 if (!iommu)
2509 return -ENODEV;
2510
5913c9bf 2511 ndomain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
0ac72664
DW
2512 if (ndomain != domain)
2513 return -EBUSY;
2c2e2c38 2514
5913c9bf 2515 ret = domain_context_mapping(domain, dev, translation);
e2ad23d0 2516 if (ret) {
5913c9bf 2517 domain_remove_one_dev_info(domain, dev);
e2ad23d0
DW
2518 return ret;
2519 }
2520
2c2e2c38
FY
2521 return 0;
2522}
2523
0b9d9753 2524static bool device_has_rmrr(struct device *dev)
ea2447f7
TM
2525{
2526 struct dmar_rmrr_unit *rmrr;
832bd858 2527 struct device *tmp;
ea2447f7
TM
2528 int i;
2529
0e242612 2530 rcu_read_lock();
ea2447f7 2531 for_each_rmrr_units(rmrr) {
b683b230
JL
2532 /*
2533 * Return TRUE if this RMRR contains the device that
2534 * is passed in.
2535 */
2536 for_each_active_dev_scope(rmrr->devices,
2537 rmrr->devices_cnt, i, tmp)
0b9d9753 2538 if (tmp == dev) {
0e242612 2539 rcu_read_unlock();
ea2447f7 2540 return true;
b683b230 2541 }
ea2447f7 2542 }
0e242612 2543 rcu_read_unlock();
ea2447f7
TM
2544 return false;
2545}
2546
c875d2c1
AW
2547/*
2548 * There are a couple cases where we need to restrict the functionality of
2549 * devices associated with RMRRs. The first is when evaluating a device for
2550 * identity mapping because problems exist when devices are moved in and out
2551 * of domains and their respective RMRR information is lost. This means that
2552 * a device with associated RMRRs will never be in a "passthrough" domain.
2553 * The second is use of the device through the IOMMU API. This interface
2554 * expects to have full control of the IOVA space for the device. We cannot
2555 * satisfy both the requirement that RMRR access is maintained and have an
2556 * unencumbered IOVA space. We also have no ability to quiesce the device's
2557 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2558 * We therefore prevent devices associated with an RMRR from participating in
2559 * the IOMMU API, which eliminates them from device assignment.
2560 *
2561 * In both cases we assume that PCI USB devices with RMRRs have them largely
2562 * for historical reasons and that the RMRR space is not actively used post
2563 * boot. This exclusion may change if vendors begin to abuse it.
2564 */
2565static bool device_is_rmrr_locked(struct device *dev)
2566{
2567 if (!device_has_rmrr(dev))
2568 return false;
2569
2570 if (dev_is_pci(dev)) {
2571 struct pci_dev *pdev = to_pci_dev(dev);
2572
2573 if ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
2574 return false;
2575 }
2576
2577 return true;
2578}
2579
3bdb2591 2580static int iommu_should_identity_map(struct device *dev, int startup)
6941af28 2581{
ea2447f7 2582
3bdb2591
DW
2583 if (dev_is_pci(dev)) {
2584 struct pci_dev *pdev = to_pci_dev(dev);
ea2447f7 2585
c875d2c1 2586 if (device_is_rmrr_locked(dev))
3bdb2591 2587 return 0;
e0fc7e0b 2588
3bdb2591
DW
2589 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2590 return 1;
e0fc7e0b 2591
3bdb2591
DW
2592 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2593 return 1;
6941af28 2594
3bdb2591 2595 if (!(iommu_identity_mapping & IDENTMAP_ALL))
3dfc813d 2596 return 0;
3bdb2591
DW
2597
2598 /*
2599 * We want to start off with all devices in the 1:1 domain, and
2600 * take them out later if we find they can't access all of memory.
2601 *
2602 * However, we can't do this for PCI devices behind bridges,
2603 * because all PCI devices behind the same bridge will end up
2604 * with the same source-id on their transactions.
2605 *
2606 * Practically speaking, we can't change things around for these
2607 * devices at run-time, because we can't be sure there'll be no
2608 * DMA transactions in flight for any of their siblings.
2609 *
2610 * So PCI devices (unless they're on the root bus) as well as
2611 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2612 * the 1:1 domain, just in _case_ one of their siblings turns out
2613 * not to be able to map all of memory.
2614 */
2615 if (!pci_is_pcie(pdev)) {
2616 if (!pci_is_root_bus(pdev->bus))
2617 return 0;
2618 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2619 return 0;
2620 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
3dfc813d 2621 return 0;
3bdb2591
DW
2622 } else {
2623 if (device_has_rmrr(dev))
2624 return 0;
2625 }
3dfc813d 2626
3bdb2591 2627 /*
3dfc813d 2628 * At boot time, we don't yet know if devices will be 64-bit capable.
3bdb2591 2629 * Assume that they will — if they turn out not to be, then we can
3dfc813d
DW
2630 * take them out of the 1:1 domain later.
2631 */
8fcc5372
CW
2632 if (!startup) {
2633 /*
2634 * If the device's dma_mask is less than the system's memory
2635 * size then this is not a candidate for identity mapping.
2636 */
3bdb2591 2637 u64 dma_mask = *dev->dma_mask;
8fcc5372 2638
3bdb2591
DW
2639 if (dev->coherent_dma_mask &&
2640 dev->coherent_dma_mask < dma_mask)
2641 dma_mask = dev->coherent_dma_mask;
8fcc5372 2642
3bdb2591 2643 return dma_mask >= dma_get_required_mask(dev);
8fcc5372 2644 }
6941af28
DW
2645
2646 return 1;
2647}
2648
cf04eee8
DW
2649static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2650{
2651 int ret;
2652
2653 if (!iommu_should_identity_map(dev, 1))
2654 return 0;
2655
2656 ret = domain_add_dev_info(si_domain, dev,
2657 hw ? CONTEXT_TT_PASS_THROUGH :
2658 CONTEXT_TT_MULTI_LEVEL);
2659 if (!ret)
2660 pr_info("IOMMU: %s identity mapping for device %s\n",
2661 hw ? "hardware" : "software", dev_name(dev));
2662 else if (ret == -ENODEV)
2663 /* device not associated with an iommu */
2664 ret = 0;
2665
2666 return ret;
2667}
2668
2669
071e1374 2670static int __init iommu_prepare_static_identity_mapping(int hw)
2c2e2c38 2671{
2c2e2c38 2672 struct pci_dev *pdev = NULL;
cf04eee8
DW
2673 struct dmar_drhd_unit *drhd;
2674 struct intel_iommu *iommu;
2675 struct device *dev;
2676 int i;
2677 int ret = 0;
2c2e2c38 2678
19943b0e 2679 ret = si_domain_init(hw);
2c2e2c38
FY
2680 if (ret)
2681 return -EFAULT;
2682
2c2e2c38 2683 for_each_pci_dev(pdev) {
cf04eee8
DW
2684 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2685 if (ret)
2686 return ret;
2687 }
2688
2689 for_each_active_iommu(iommu, drhd)
2690 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2691 struct acpi_device_physical_node *pn;
2692 struct acpi_device *adev;
2693
2694 if (dev->bus != &acpi_bus_type)
2695 continue;
2696
2697 adev= to_acpi_device(dev);
2698 mutex_lock(&adev->physical_node_lock);
2699 list_for_each_entry(pn, &adev->physical_node_list, node) {
2700 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2701 if (ret)
2702 break;
eae460b6 2703 }
cf04eee8
DW
2704 mutex_unlock(&adev->physical_node_lock);
2705 if (ret)
2706 return ret;
62edf5dc 2707 }
2c2e2c38
FY
2708
2709 return 0;
2710}
2711
ffebeb46
JL
2712static void intel_iommu_init_qi(struct intel_iommu *iommu)
2713{
2714 /*
2715 * Start from the sane iommu hardware state.
2716 * If the queued invalidation is already initialized by us
2717 * (for example, while enabling interrupt-remapping) then
2718 * we got the things already rolling from a sane state.
2719 */
2720 if (!iommu->qi) {
2721 /*
2722 * Clear any previous faults.
2723 */
2724 dmar_fault(-1, iommu);
2725 /*
2726 * Disable queued invalidation if supported and already enabled
2727 * before OS handover.
2728 */
2729 dmar_disable_qi(iommu);
2730 }
2731
2732 if (dmar_enable_qi(iommu)) {
2733 /*
2734 * Queued Invalidate not enabled, use Register Based Invalidate
2735 */
2736 iommu->flush.flush_context = __iommu_flush_context;
2737 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2738 pr_info("IOMMU: %s using Register based invalidation\n",
2739 iommu->name);
2740 } else {
2741 iommu->flush.flush_context = qi_flush_context;
2742 iommu->flush.flush_iotlb = qi_flush_iotlb;
2743 pr_info("IOMMU: %s using Queued invalidation\n", iommu->name);
2744 }
2745}
2746
b779260b 2747static int __init init_dmars(void)
ba395927
KA
2748{
2749 struct dmar_drhd_unit *drhd;
2750 struct dmar_rmrr_unit *rmrr;
832bd858 2751 struct device *dev;
ba395927 2752 struct intel_iommu *iommu;
9d783ba0 2753 int i, ret;
2c2e2c38 2754
ba395927
KA
2755 /*
2756 * for each drhd
2757 * allocate root
2758 * initialize and program root entry to not present
2759 * endfor
2760 */
2761 for_each_drhd_unit(drhd) {
5e0d2a6f 2762 /*
2763 * lock not needed as this is only incremented in the single
2764 * threaded kernel __init code path all other access are read
2765 * only
2766 */
78d8e704 2767 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
1b198bb0
MT
2768 g_num_of_iommus++;
2769 continue;
2770 }
2771 printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n",
78d8e704 2772 DMAR_UNITS_SUPPORTED);
5e0d2a6f 2773 }
2774
ffebeb46
JL
2775 /* Preallocate enough resources for IOMMU hot-addition */
2776 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
2777 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
2778
d9630fe9
WH
2779 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2780 GFP_KERNEL);
2781 if (!g_iommus) {
2782 printk(KERN_ERR "Allocating global iommu array failed\n");
2783 ret = -ENOMEM;
2784 goto error;
2785 }
2786
80b20dd8 2787 deferred_flush = kzalloc(g_num_of_iommus *
2788 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2789 if (!deferred_flush) {
5e0d2a6f 2790 ret = -ENOMEM;
989d51fc 2791 goto free_g_iommus;
5e0d2a6f 2792 }
2793
7c919779 2794 for_each_active_iommu(iommu, drhd) {
d9630fe9 2795 g_iommus[iommu->seq_id] = iommu;
ba395927 2796
e61d98d8
SS
2797 ret = iommu_init_domains(iommu);
2798 if (ret)
989d51fc 2799 goto free_iommu;
e61d98d8 2800
ba395927
KA
2801 /*
2802 * TBD:
2803 * we could share the same root & context tables
25985edc 2804 * among all IOMMU's. Need to Split it later.
ba395927
KA
2805 */
2806 ret = iommu_alloc_root_entry(iommu);
ffebeb46 2807 if (ret)
989d51fc 2808 goto free_iommu;
4ed0d3e6 2809 if (!ecap_pass_through(iommu->ecap))
19943b0e 2810 hw_pass_through = 0;
ba395927
KA
2811 }
2812
ffebeb46
JL
2813 for_each_active_iommu(iommu, drhd)
2814 intel_iommu_init_qi(iommu);
a77b67d4 2815
19943b0e 2816 if (iommu_pass_through)
e0fc7e0b
DW
2817 iommu_identity_mapping |= IDENTMAP_ALL;
2818
d3f13810 2819#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
e0fc7e0b 2820 iommu_identity_mapping |= IDENTMAP_GFX;
19943b0e 2821#endif
e0fc7e0b
DW
2822
2823 check_tylersburg_isoch();
2824
ba395927 2825 /*
19943b0e
DW
2826 * If pass through is not set or not enabled, setup context entries for
2827 * identity mappings for rmrr, gfx, and isa and may fall back to static
2828 * identity mapping if iommu_identity_mapping is set.
ba395927 2829 */
19943b0e
DW
2830 if (iommu_identity_mapping) {
2831 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
4ed0d3e6 2832 if (ret) {
19943b0e 2833 printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
989d51fc 2834 goto free_iommu;
ba395927
KA
2835 }
2836 }
ba395927 2837 /*
19943b0e
DW
2838 * For each rmrr
2839 * for each dev attached to rmrr
2840 * do
2841 * locate drhd for dev, alloc domain for dev
2842 * allocate free domain
2843 * allocate page table entries for rmrr
2844 * if context not allocated for bus
2845 * allocate and init context
2846 * set present in root table for this bus
2847 * init context with domain, translation etc
2848 * endfor
2849 * endfor
ba395927 2850 */
19943b0e
DW
2851 printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2852 for_each_rmrr_units(rmrr) {
b683b230
JL
2853 /* some BIOS lists non-exist devices in DMAR table. */
2854 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
832bd858 2855 i, dev) {
0b9d9753 2856 ret = iommu_prepare_rmrr_dev(rmrr, dev);
19943b0e
DW
2857 if (ret)
2858 printk(KERN_ERR
2859 "IOMMU: mapping reserved region failed\n");
ba395927 2860 }
4ed0d3e6 2861 }
49a0429e 2862
19943b0e
DW
2863 iommu_prepare_isa();
2864
ba395927
KA
2865 /*
2866 * for each drhd
2867 * enable fault log
2868 * global invalidate context cache
2869 * global invalidate iotlb
2870 * enable translation
2871 */
7c919779 2872 for_each_iommu(iommu, drhd) {
51a63e67
JC
2873 if (drhd->ignored) {
2874 /*
2875 * we always have to disable PMRs or DMA may fail on
2876 * this device
2877 */
2878 if (force_on)
7c919779 2879 iommu_disable_protect_mem_regions(iommu);
ba395927 2880 continue;
51a63e67 2881 }
ba395927
KA
2882
2883 iommu_flush_write_buffer(iommu);
2884
3460a6d9
KA
2885 ret = dmar_set_interrupt(iommu);
2886 if (ret)
989d51fc 2887 goto free_iommu;
3460a6d9 2888
ba395927
KA
2889 iommu_set_root_entry(iommu);
2890
4c25a2c1 2891 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 2892 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2a41ccee 2893 iommu_enable_translation(iommu);
b94996c9 2894 iommu_disable_protect_mem_regions(iommu);
ba395927
KA
2895 }
2896
2897 return 0;
989d51fc
JL
2898
2899free_iommu:
ffebeb46
JL
2900 for_each_active_iommu(iommu, drhd) {
2901 disable_dmar_iommu(iommu);
a868e6b7 2902 free_dmar_iommu(iommu);
ffebeb46 2903 }
9bdc531e 2904 kfree(deferred_flush);
989d51fc 2905free_g_iommus:
d9630fe9 2906 kfree(g_iommus);
989d51fc 2907error:
ba395927
KA
2908 return ret;
2909}
2910
5a5e02a6 2911/* This takes a number of _MM_ pages, not VTD pages */
875764de
DW
2912static struct iova *intel_alloc_iova(struct device *dev,
2913 struct dmar_domain *domain,
2914 unsigned long nrpages, uint64_t dma_mask)
ba395927 2915{
ba395927 2916 struct iova *iova = NULL;
ba395927 2917
875764de
DW
2918 /* Restrict dma_mask to the width that the iommu can handle */
2919 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2920
2921 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
2922 /*
2923 * First try to allocate an io virtual address in
284901a9 2924 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 2925 * from higher range
ba395927 2926 */
875764de
DW
2927 iova = alloc_iova(&domain->iovad, nrpages,
2928 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2929 if (iova)
2930 return iova;
2931 }
2932 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2933 if (unlikely(!iova)) {
2934 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
207e3592 2935 nrpages, dev_name(dev));
f76aec76
KA
2936 return NULL;
2937 }
2938
2939 return iova;
2940}
2941
d4b709f4 2942static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
f76aec76
KA
2943{
2944 struct dmar_domain *domain;
2945 int ret;
2946
d4b709f4 2947 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
f76aec76 2948 if (!domain) {
d4b709f4
DW
2949 printk(KERN_ERR "Allocating domain for %s failed",
2950 dev_name(dev));
4fe05bbc 2951 return NULL;
ba395927
KA
2952 }
2953
2954 /* make sure context mapping is ok */
d4b709f4
DW
2955 if (unlikely(!domain_context_mapped(dev))) {
2956 ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
f76aec76 2957 if (ret) {
d4b709f4
DW
2958 printk(KERN_ERR "Domain context map for %s failed",
2959 dev_name(dev));
4fe05bbc 2960 return NULL;
f76aec76 2961 }
ba395927
KA
2962 }
2963
f76aec76
KA
2964 return domain;
2965}
2966
d4b709f4 2967static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
147202aa
DW
2968{
2969 struct device_domain_info *info;
2970
2971 /* No lock here, assumes no domain exit in normal case */
d4b709f4 2972 info = dev->archdata.iommu;
147202aa
DW
2973 if (likely(info))
2974 return info->domain;
2975
2976 return __get_valid_domain_for_dev(dev);
2977}
2978
3d89194a 2979static int iommu_dummy(struct device *dev)
2c2e2c38 2980{
3d89194a 2981 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2c2e2c38
FY
2982}
2983
ecb509ec 2984/* Check if the dev needs to go through non-identity map and unmap process.*/
73676832 2985static int iommu_no_mapping(struct device *dev)
2c2e2c38
FY
2986{
2987 int found;
2988
3d89194a 2989 if (iommu_dummy(dev))
1e4c64c4
DW
2990 return 1;
2991
2c2e2c38 2992 if (!iommu_identity_mapping)
1e4c64c4 2993 return 0;
2c2e2c38 2994
9b226624 2995 found = identity_mapping(dev);
2c2e2c38 2996 if (found) {
ecb509ec 2997 if (iommu_should_identity_map(dev, 0))
2c2e2c38
FY
2998 return 1;
2999 else {
3000 /*
3001 * 32 bit DMA is removed from si_domain and fall back
3002 * to non-identity mapping.
3003 */
bf9c9eda 3004 domain_remove_one_dev_info(si_domain, dev);
2c2e2c38 3005 printk(KERN_INFO "32bit %s uses non-identity mapping\n",
ecb509ec 3006 dev_name(dev));
2c2e2c38
FY
3007 return 0;
3008 }
3009 } else {
3010 /*
3011 * In case of a detached 64 bit DMA device from vm, the device
3012 * is put into si_domain for identity mapping.
3013 */
ecb509ec 3014 if (iommu_should_identity_map(dev, 0)) {
2c2e2c38 3015 int ret;
5913c9bf 3016 ret = domain_add_dev_info(si_domain, dev,
5fe60f4e
DW
3017 hw_pass_through ?
3018 CONTEXT_TT_PASS_THROUGH :
3019 CONTEXT_TT_MULTI_LEVEL);
2c2e2c38
FY
3020 if (!ret) {
3021 printk(KERN_INFO "64bit %s uses identity mapping\n",
ecb509ec 3022 dev_name(dev));
2c2e2c38
FY
3023 return 1;
3024 }
3025 }
3026 }
3027
1e4c64c4 3028 return 0;
2c2e2c38
FY
3029}
3030
5040a918 3031static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
bb9e6d65 3032 size_t size, int dir, u64 dma_mask)
f76aec76 3033{
f76aec76 3034 struct dmar_domain *domain;
5b6985ce 3035 phys_addr_t start_paddr;
f76aec76
KA
3036 struct iova *iova;
3037 int prot = 0;
6865f0d1 3038 int ret;
8c11e798 3039 struct intel_iommu *iommu;
33041ec0 3040 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
3041
3042 BUG_ON(dir == DMA_NONE);
2c2e2c38 3043
5040a918 3044 if (iommu_no_mapping(dev))
6865f0d1 3045 return paddr;
f76aec76 3046
5040a918 3047 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3048 if (!domain)
3049 return 0;
3050
8c11e798 3051 iommu = domain_get_iommu(domain);
88cb6a74 3052 size = aligned_nrpages(paddr, size);
f76aec76 3053
5040a918 3054 iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
f76aec76
KA
3055 if (!iova)
3056 goto error;
3057
ba395927
KA
3058 /*
3059 * Check if DMAR supports zero-length reads on write only
3060 * mappings..
3061 */
3062 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3063 !cap_zlr(iommu->cap))
ba395927
KA
3064 prot |= DMA_PTE_READ;
3065 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3066 prot |= DMA_PTE_WRITE;
3067 /*
6865f0d1 3068 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 3069 * page. Note: if two part of one page are separately mapped, we
6865f0d1 3070 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
3071 * is not a big problem
3072 */
0ab36de2 3073 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
33041ec0 3074 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
3075 if (ret)
3076 goto error;
3077
1f0ef2aa
DW
3078 /* it's a non-present to present mapping. Only flush if caching mode */
3079 if (cap_caching_mode(iommu->cap))
ea8ea460 3080 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 0, 1);
1f0ef2aa 3081 else
8c11e798 3082 iommu_flush_write_buffer(iommu);
f76aec76 3083
03d6a246
DW
3084 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
3085 start_paddr += paddr & ~PAGE_MASK;
3086 return start_paddr;
ba395927 3087
ba395927 3088error:
f76aec76
KA
3089 if (iova)
3090 __free_iova(&domain->iovad, iova);
4cf2e75d 3091 printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
5040a918 3092 dev_name(dev), size, (unsigned long long)paddr, dir);
ba395927
KA
3093 return 0;
3094}
3095
ffbbef5c
FT
3096static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3097 unsigned long offset, size_t size,
3098 enum dma_data_direction dir,
3099 struct dma_attrs *attrs)
bb9e6d65 3100{
ffbbef5c 3101 return __intel_map_single(dev, page_to_phys(page) + offset, size,
46333e37 3102 dir, *dev->dma_mask);
bb9e6d65
FT
3103}
3104
5e0d2a6f 3105static void flush_unmaps(void)
3106{
80b20dd8 3107 int i, j;
5e0d2a6f 3108
5e0d2a6f 3109 timer_on = 0;
3110
3111 /* just flush them all */
3112 for (i = 0; i < g_num_of_iommus; i++) {
a2bb8459
WH
3113 struct intel_iommu *iommu = g_iommus[i];
3114 if (!iommu)
3115 continue;
c42d9f32 3116
9dd2fe89
YZ
3117 if (!deferred_flush[i].next)
3118 continue;
3119
78d5f0f5
NA
3120 /* In caching mode, global flushes turn emulation expensive */
3121 if (!cap_caching_mode(iommu->cap))
3122 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
93a23a72 3123 DMA_TLB_GLOBAL_FLUSH);
9dd2fe89 3124 for (j = 0; j < deferred_flush[i].next; j++) {
93a23a72
YZ
3125 unsigned long mask;
3126 struct iova *iova = deferred_flush[i].iova[j];
78d5f0f5
NA
3127 struct dmar_domain *domain = deferred_flush[i].domain[j];
3128
3129 /* On real hardware multiple invalidations are expensive */
3130 if (cap_caching_mode(iommu->cap))
3131 iommu_flush_iotlb_psi(iommu, domain->id,
a156ef99 3132 iova->pfn_lo, iova_size(iova),
ea8ea460 3133 !deferred_flush[i].freelist[j], 0);
78d5f0f5 3134 else {
a156ef99 3135 mask = ilog2(mm_to_dma_pfn(iova_size(iova)));
78d5f0f5
NA
3136 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
3137 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
3138 }
93a23a72 3139 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
ea8ea460
DW
3140 if (deferred_flush[i].freelist[j])
3141 dma_free_pagelist(deferred_flush[i].freelist[j]);
80b20dd8 3142 }
9dd2fe89 3143 deferred_flush[i].next = 0;
5e0d2a6f 3144 }
3145
5e0d2a6f 3146 list_size = 0;
5e0d2a6f 3147}
3148
3149static void flush_unmaps_timeout(unsigned long data)
3150{
80b20dd8 3151 unsigned long flags;
3152
3153 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 3154 flush_unmaps();
80b20dd8 3155 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 3156}
3157
ea8ea460 3158static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)
5e0d2a6f 3159{
3160 unsigned long flags;
80b20dd8 3161 int next, iommu_id;
8c11e798 3162 struct intel_iommu *iommu;
5e0d2a6f 3163
3164 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 3165 if (list_size == HIGH_WATER_MARK)
3166 flush_unmaps();
3167
8c11e798
WH
3168 iommu = domain_get_iommu(dom);
3169 iommu_id = iommu->seq_id;
c42d9f32 3170
80b20dd8 3171 next = deferred_flush[iommu_id].next;
3172 deferred_flush[iommu_id].domain[next] = dom;
3173 deferred_flush[iommu_id].iova[next] = iova;
ea8ea460 3174 deferred_flush[iommu_id].freelist[next] = freelist;
80b20dd8 3175 deferred_flush[iommu_id].next++;
5e0d2a6f 3176
3177 if (!timer_on) {
3178 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
3179 timer_on = 1;
3180 }
3181 list_size++;
3182 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3183}
3184
d41a4adb 3185static void intel_unmap(struct device *dev, dma_addr_t dev_addr)
ba395927 3186{
f76aec76 3187 struct dmar_domain *domain;
d794dc9b 3188 unsigned long start_pfn, last_pfn;
ba395927 3189 struct iova *iova;
8c11e798 3190 struct intel_iommu *iommu;
ea8ea460 3191 struct page *freelist;
ba395927 3192
73676832 3193 if (iommu_no_mapping(dev))
f76aec76 3194 return;
2c2e2c38 3195
1525a29a 3196 domain = find_domain(dev);
ba395927
KA
3197 BUG_ON(!domain);
3198
8c11e798
WH
3199 iommu = domain_get_iommu(domain);
3200
ba395927 3201 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
85b98276
DW
3202 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
3203 (unsigned long long)dev_addr))
ba395927 3204 return;
ba395927 3205
d794dc9b
DW
3206 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3207 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
ba395927 3208
d794dc9b 3209 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
207e3592 3210 dev_name(dev), start_pfn, last_pfn);
ba395927 3211
ea8ea460 3212 freelist = domain_unmap(domain, start_pfn, last_pfn);
d794dc9b 3213
5e0d2a6f 3214 if (intel_iommu_strict) {
03d6a246 3215 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
ea8ea460 3216 last_pfn - start_pfn + 1, !freelist, 0);
5e0d2a6f 3217 /* free iova */
3218 __free_iova(&domain->iovad, iova);
ea8ea460 3219 dma_free_pagelist(freelist);
5e0d2a6f 3220 } else {
ea8ea460 3221 add_unmap(domain, iova, freelist);
5e0d2a6f 3222 /*
3223 * queue up the release of the unmap to save the 1/6th of the
3224 * cpu used up by the iotlb flush operation...
3225 */
5e0d2a6f 3226 }
ba395927
KA
3227}
3228
d41a4adb
JL
3229static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3230 size_t size, enum dma_data_direction dir,
3231 struct dma_attrs *attrs)
3232{
3233 intel_unmap(dev, dev_addr);
3234}
3235
5040a918 3236static void *intel_alloc_coherent(struct device *dev, size_t size,
baa676fc
AP
3237 dma_addr_t *dma_handle, gfp_t flags,
3238 struct dma_attrs *attrs)
ba395927 3239{
36746436 3240 struct page *page = NULL;
ba395927
KA
3241 int order;
3242
5b6985ce 3243 size = PAGE_ALIGN(size);
ba395927 3244 order = get_order(size);
e8bb910d 3245
5040a918 3246 if (!iommu_no_mapping(dev))
e8bb910d 3247 flags &= ~(GFP_DMA | GFP_DMA32);
5040a918
DW
3248 else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3249 if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
e8bb910d
AW
3250 flags |= GFP_DMA;
3251 else
3252 flags |= GFP_DMA32;
3253 }
ba395927 3254
36746436
AM
3255 if (flags & __GFP_WAIT) {
3256 unsigned int count = size >> PAGE_SHIFT;
3257
3258 page = dma_alloc_from_contiguous(dev, count, order);
3259 if (page && iommu_no_mapping(dev) &&
3260 page_to_phys(page) + size > dev->coherent_dma_mask) {
3261 dma_release_from_contiguous(dev, page, count);
3262 page = NULL;
3263 }
3264 }
3265
3266 if (!page)
3267 page = alloc_pages(flags, order);
3268 if (!page)
ba395927 3269 return NULL;
36746436 3270 memset(page_address(page), 0, size);
ba395927 3271
36746436 3272 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
bb9e6d65 3273 DMA_BIDIRECTIONAL,
5040a918 3274 dev->coherent_dma_mask);
ba395927 3275 if (*dma_handle)
36746436
AM
3276 return page_address(page);
3277 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3278 __free_pages(page, order);
3279
ba395927
KA
3280 return NULL;
3281}
3282
5040a918 3283static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
baa676fc 3284 dma_addr_t dma_handle, struct dma_attrs *attrs)
ba395927
KA
3285{
3286 int order;
36746436 3287 struct page *page = virt_to_page(vaddr);
ba395927 3288
5b6985ce 3289 size = PAGE_ALIGN(size);
ba395927
KA
3290 order = get_order(size);
3291
d41a4adb 3292 intel_unmap(dev, dma_handle);
36746436
AM
3293 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3294 __free_pages(page, order);
ba395927
KA
3295}
3296
5040a918 3297static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
d7ab5c46
FT
3298 int nelems, enum dma_data_direction dir,
3299 struct dma_attrs *attrs)
ba395927 3300{
d41a4adb 3301 intel_unmap(dev, sglist[0].dma_address);
ba395927
KA
3302}
3303
ba395927 3304static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 3305 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
3306{
3307 int i;
c03ab37c 3308 struct scatterlist *sg;
ba395927 3309
c03ab37c 3310 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 3311 BUG_ON(!sg_page(sg));
4cf2e75d 3312 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
c03ab37c 3313 sg->dma_length = sg->length;
ba395927
KA
3314 }
3315 return nelems;
3316}
3317
5040a918 3318static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
d7ab5c46 3319 enum dma_data_direction dir, struct dma_attrs *attrs)
ba395927 3320{
ba395927 3321 int i;
ba395927 3322 struct dmar_domain *domain;
f76aec76
KA
3323 size_t size = 0;
3324 int prot = 0;
f76aec76
KA
3325 struct iova *iova = NULL;
3326 int ret;
c03ab37c 3327 struct scatterlist *sg;
b536d24d 3328 unsigned long start_vpfn;
8c11e798 3329 struct intel_iommu *iommu;
ba395927
KA
3330
3331 BUG_ON(dir == DMA_NONE);
5040a918
DW
3332 if (iommu_no_mapping(dev))
3333 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
ba395927 3334
5040a918 3335 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3336 if (!domain)
3337 return 0;
3338
8c11e798
WH
3339 iommu = domain_get_iommu(domain);
3340
b536d24d 3341 for_each_sg(sglist, sg, nelems, i)
88cb6a74 3342 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 3343
5040a918
DW
3344 iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3345 *dev->dma_mask);
f76aec76 3346 if (!iova) {
c03ab37c 3347 sglist->dma_length = 0;
f76aec76
KA
3348 return 0;
3349 }
3350
3351 /*
3352 * Check if DMAR supports zero-length reads on write only
3353 * mappings..
3354 */
3355 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3356 !cap_zlr(iommu->cap))
f76aec76
KA
3357 prot |= DMA_PTE_READ;
3358 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3359 prot |= DMA_PTE_WRITE;
3360
b536d24d 3361 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
e1605495 3362
f532959b 3363 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495 3364 if (unlikely(ret)) {
e1605495
DW
3365 dma_pte_free_pagetable(domain, start_vpfn,
3366 start_vpfn + size - 1);
e1605495
DW
3367 __free_iova(&domain->iovad, iova);
3368 return 0;
ba395927
KA
3369 }
3370
1f0ef2aa
DW
3371 /* it's a non-present to present mapping. Only flush if caching mode */
3372 if (cap_caching_mode(iommu->cap))
ea8ea460 3373 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 0, 1);
1f0ef2aa 3374 else
8c11e798 3375 iommu_flush_write_buffer(iommu);
1f0ef2aa 3376
ba395927
KA
3377 return nelems;
3378}
3379
dfb805e8
FT
3380static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3381{
3382 return !dma_addr;
3383}
3384
160c1d8e 3385struct dma_map_ops intel_dma_ops = {
baa676fc
AP
3386 .alloc = intel_alloc_coherent,
3387 .free = intel_free_coherent,
ba395927
KA
3388 .map_sg = intel_map_sg,
3389 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
3390 .map_page = intel_map_page,
3391 .unmap_page = intel_unmap_page,
dfb805e8 3392 .mapping_error = intel_mapping_error,
ba395927
KA
3393};
3394
3395static inline int iommu_domain_cache_init(void)
3396{
3397 int ret = 0;
3398
3399 iommu_domain_cache = kmem_cache_create("iommu_domain",
3400 sizeof(struct dmar_domain),
3401 0,
3402 SLAB_HWCACHE_ALIGN,
3403
3404 NULL);
3405 if (!iommu_domain_cache) {
3406 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3407 ret = -ENOMEM;
3408 }
3409
3410 return ret;
3411}
3412
3413static inline int iommu_devinfo_cache_init(void)
3414{
3415 int ret = 0;
3416
3417 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3418 sizeof(struct device_domain_info),
3419 0,
3420 SLAB_HWCACHE_ALIGN,
ba395927
KA
3421 NULL);
3422 if (!iommu_devinfo_cache) {
3423 printk(KERN_ERR "Couldn't create devinfo cache\n");
3424 ret = -ENOMEM;
3425 }
3426
3427 return ret;
3428}
3429
3430static inline int iommu_iova_cache_init(void)
3431{
3432 int ret = 0;
3433
3434 iommu_iova_cache = kmem_cache_create("iommu_iova",
3435 sizeof(struct iova),
3436 0,
3437 SLAB_HWCACHE_ALIGN,
ba395927
KA
3438 NULL);
3439 if (!iommu_iova_cache) {
3440 printk(KERN_ERR "Couldn't create iova cache\n");
3441 ret = -ENOMEM;
3442 }
3443
3444 return ret;
3445}
3446
3447static int __init iommu_init_mempool(void)
3448{
3449 int ret;
3450 ret = iommu_iova_cache_init();
3451 if (ret)
3452 return ret;
3453
3454 ret = iommu_domain_cache_init();
3455 if (ret)
3456 goto domain_error;
3457
3458 ret = iommu_devinfo_cache_init();
3459 if (!ret)
3460 return ret;
3461
3462 kmem_cache_destroy(iommu_domain_cache);
3463domain_error:
3464 kmem_cache_destroy(iommu_iova_cache);
3465
3466 return -ENOMEM;
3467}
3468
3469static void __init iommu_exit_mempool(void)
3470{
3471 kmem_cache_destroy(iommu_devinfo_cache);
3472 kmem_cache_destroy(iommu_domain_cache);
3473 kmem_cache_destroy(iommu_iova_cache);
3474
3475}
3476
556ab45f
DW
3477static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3478{
3479 struct dmar_drhd_unit *drhd;
3480 u32 vtbar;
3481 int rc;
3482
3483 /* We know that this device on this chipset has its own IOMMU.
3484 * If we find it under a different IOMMU, then the BIOS is lying
3485 * to us. Hope that the IOMMU for this device is actually
3486 * disabled, and it needs no translation...
3487 */
3488 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3489 if (rc) {
3490 /* "can't" happen */
3491 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3492 return;
3493 }
3494 vtbar &= 0xffff0000;
3495
3496 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3497 drhd = dmar_find_matched_drhd_unit(pdev);
3498 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3499 TAINT_FIRMWARE_WORKAROUND,
3500 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3501 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3502}
3503DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3504
ba395927
KA
3505static void __init init_no_remapping_devices(void)
3506{
3507 struct dmar_drhd_unit *drhd;
832bd858 3508 struct device *dev;
b683b230 3509 int i;
ba395927
KA
3510
3511 for_each_drhd_unit(drhd) {
3512 if (!drhd->include_all) {
b683b230
JL
3513 for_each_active_dev_scope(drhd->devices,
3514 drhd->devices_cnt, i, dev)
3515 break;
832bd858 3516 /* ignore DMAR unit if no devices exist */
ba395927
KA
3517 if (i == drhd->devices_cnt)
3518 drhd->ignored = 1;
3519 }
3520 }
3521
7c919779 3522 for_each_active_drhd_unit(drhd) {
7c919779 3523 if (drhd->include_all)
ba395927
KA
3524 continue;
3525
b683b230
JL
3526 for_each_active_dev_scope(drhd->devices,
3527 drhd->devices_cnt, i, dev)
832bd858 3528 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
ba395927 3529 break;
ba395927
KA
3530 if (i < drhd->devices_cnt)
3531 continue;
3532
c0771df8
DW
3533 /* This IOMMU has *only* gfx devices. Either bypass it or
3534 set the gfx_mapped flag, as appropriate */
3535 if (dmar_map_gfx) {
3536 intel_iommu_gfx_mapped = 1;
3537 } else {
3538 drhd->ignored = 1;
b683b230
JL
3539 for_each_active_dev_scope(drhd->devices,
3540 drhd->devices_cnt, i, dev)
832bd858 3541 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
3542 }
3543 }
3544}
3545
f59c7b69
FY
3546#ifdef CONFIG_SUSPEND
3547static int init_iommu_hw(void)
3548{
3549 struct dmar_drhd_unit *drhd;
3550 struct intel_iommu *iommu = NULL;
3551
3552 for_each_active_iommu(iommu, drhd)
3553 if (iommu->qi)
3554 dmar_reenable_qi(iommu);
3555
b779260b
JC
3556 for_each_iommu(iommu, drhd) {
3557 if (drhd->ignored) {
3558 /*
3559 * we always have to disable PMRs or DMA may fail on
3560 * this device
3561 */
3562 if (force_on)
3563 iommu_disable_protect_mem_regions(iommu);
3564 continue;
3565 }
3566
f59c7b69
FY
3567 iommu_flush_write_buffer(iommu);
3568
3569 iommu_set_root_entry(iommu);
3570
3571 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3572 DMA_CCMD_GLOBAL_INVL);
2a41ccee
JL
3573 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3574 iommu_enable_translation(iommu);
b94996c9 3575 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
3576 }
3577
3578 return 0;
3579}
3580
3581static void iommu_flush_all(void)
3582{
3583 struct dmar_drhd_unit *drhd;
3584 struct intel_iommu *iommu;
3585
3586 for_each_active_iommu(iommu, drhd) {
3587 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3588 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3589 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3590 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
3591 }
3592}
3593
134fac3f 3594static int iommu_suspend(void)
f59c7b69
FY
3595{
3596 struct dmar_drhd_unit *drhd;
3597 struct intel_iommu *iommu = NULL;
3598 unsigned long flag;
3599
3600 for_each_active_iommu(iommu, drhd) {
3601 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3602 GFP_ATOMIC);
3603 if (!iommu->iommu_state)
3604 goto nomem;
3605 }
3606
3607 iommu_flush_all();
3608
3609 for_each_active_iommu(iommu, drhd) {
3610 iommu_disable_translation(iommu);
3611
1f5b3c3f 3612 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
3613
3614 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3615 readl(iommu->reg + DMAR_FECTL_REG);
3616 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3617 readl(iommu->reg + DMAR_FEDATA_REG);
3618 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3619 readl(iommu->reg + DMAR_FEADDR_REG);
3620 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3621 readl(iommu->reg + DMAR_FEUADDR_REG);
3622
1f5b3c3f 3623 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
3624 }
3625 return 0;
3626
3627nomem:
3628 for_each_active_iommu(iommu, drhd)
3629 kfree(iommu->iommu_state);
3630
3631 return -ENOMEM;
3632}
3633
134fac3f 3634static void iommu_resume(void)
f59c7b69
FY
3635{
3636 struct dmar_drhd_unit *drhd;
3637 struct intel_iommu *iommu = NULL;
3638 unsigned long flag;
3639
3640 if (init_iommu_hw()) {
b779260b
JC
3641 if (force_on)
3642 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3643 else
3644 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 3645 return;
f59c7b69
FY
3646 }
3647
3648 for_each_active_iommu(iommu, drhd) {
3649
1f5b3c3f 3650 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
3651
3652 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3653 iommu->reg + DMAR_FECTL_REG);
3654 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3655 iommu->reg + DMAR_FEDATA_REG);
3656 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3657 iommu->reg + DMAR_FEADDR_REG);
3658 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3659 iommu->reg + DMAR_FEUADDR_REG);
3660
1f5b3c3f 3661 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
3662 }
3663
3664 for_each_active_iommu(iommu, drhd)
3665 kfree(iommu->iommu_state);
f59c7b69
FY
3666}
3667
134fac3f 3668static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
3669 .resume = iommu_resume,
3670 .suspend = iommu_suspend,
3671};
3672
134fac3f 3673static void __init init_iommu_pm_ops(void)
f59c7b69 3674{
134fac3f 3675 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
3676}
3677
3678#else
99592ba4 3679static inline void init_iommu_pm_ops(void) {}
f59c7b69
FY
3680#endif /* CONFIG_PM */
3681
318fe7df 3682
c2a0b538 3683int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
318fe7df
SS
3684{
3685 struct acpi_dmar_reserved_memory *rmrr;
3686 struct dmar_rmrr_unit *rmrru;
3687
3688 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3689 if (!rmrru)
3690 return -ENOMEM;
3691
3692 rmrru->hdr = header;
3693 rmrr = (struct acpi_dmar_reserved_memory *)header;
3694 rmrru->base_address = rmrr->base_address;
3695 rmrru->end_address = rmrr->end_address;
2e455289
JL
3696 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
3697 ((void *)rmrr) + rmrr->header.length,
3698 &rmrru->devices_cnt);
3699 if (rmrru->devices_cnt && rmrru->devices == NULL) {
3700 kfree(rmrru);
3701 return -ENOMEM;
3702 }
318fe7df 3703
2e455289 3704 list_add(&rmrru->list, &dmar_rmrr_units);
318fe7df 3705
2e455289 3706 return 0;
318fe7df
SS
3707}
3708
6b197249
JL
3709static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
3710{
3711 struct dmar_atsr_unit *atsru;
3712 struct acpi_dmar_atsr *tmp;
3713
3714 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
3715 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
3716 if (atsr->segment != tmp->segment)
3717 continue;
3718 if (atsr->header.length != tmp->header.length)
3719 continue;
3720 if (memcmp(atsr, tmp, atsr->header.length) == 0)
3721 return atsru;
3722 }
3723
3724 return NULL;
3725}
3726
3727int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
318fe7df
SS
3728{
3729 struct acpi_dmar_atsr *atsr;
3730 struct dmar_atsr_unit *atsru;
3731
6b197249
JL
3732 if (system_state != SYSTEM_BOOTING && !intel_iommu_enabled)
3733 return 0;
3734
318fe7df 3735 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
6b197249
JL
3736 atsru = dmar_find_atsr(atsr);
3737 if (atsru)
3738 return 0;
3739
3740 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
318fe7df
SS
3741 if (!atsru)
3742 return -ENOMEM;
3743
6b197249
JL
3744 /*
3745 * If memory is allocated from slab by ACPI _DSM method, we need to
3746 * copy the memory content because the memory buffer will be freed
3747 * on return.
3748 */
3749 atsru->hdr = (void *)(atsru + 1);
3750 memcpy(atsru->hdr, hdr, hdr->length);
318fe7df 3751 atsru->include_all = atsr->flags & 0x1;
2e455289
JL
3752 if (!atsru->include_all) {
3753 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
3754 (void *)atsr + atsr->header.length,
3755 &atsru->devices_cnt);
3756 if (atsru->devices_cnt && atsru->devices == NULL) {
3757 kfree(atsru);
3758 return -ENOMEM;
3759 }
3760 }
318fe7df 3761
0e242612 3762 list_add_rcu(&atsru->list, &dmar_atsr_units);
318fe7df
SS
3763
3764 return 0;
3765}
3766
9bdc531e
JL
3767static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
3768{
3769 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
3770 kfree(atsru);
3771}
3772
6b197249
JL
3773int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3774{
3775 struct acpi_dmar_atsr *atsr;
3776 struct dmar_atsr_unit *atsru;
3777
3778 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3779 atsru = dmar_find_atsr(atsr);
3780 if (atsru) {
3781 list_del_rcu(&atsru->list);
3782 synchronize_rcu();
3783 intel_iommu_free_atsr(atsru);
3784 }
3785
3786 return 0;
3787}
3788
3789int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3790{
3791 int i;
3792 struct device *dev;
3793 struct acpi_dmar_atsr *atsr;
3794 struct dmar_atsr_unit *atsru;
3795
3796 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3797 atsru = dmar_find_atsr(atsr);
3798 if (!atsru)
3799 return 0;
3800
3801 if (!atsru->include_all && atsru->devices && atsru->devices_cnt)
3802 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
3803 i, dev)
3804 return -EBUSY;
3805
3806 return 0;
3807}
3808
ffebeb46
JL
3809static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
3810{
3811 int sp, ret = 0;
3812 struct intel_iommu *iommu = dmaru->iommu;
3813
3814 if (g_iommus[iommu->seq_id])
3815 return 0;
3816
3817 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
3818 pr_warn("IOMMU: %s doesn't support hardware pass through.\n",
3819 iommu->name);
3820 return -ENXIO;
3821 }
3822 if (!ecap_sc_support(iommu->ecap) &&
3823 domain_update_iommu_snooping(iommu)) {
3824 pr_warn("IOMMU: %s doesn't support snooping.\n",
3825 iommu->name);
3826 return -ENXIO;
3827 }
3828 sp = domain_update_iommu_superpage(iommu) - 1;
3829 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
3830 pr_warn("IOMMU: %s doesn't support large page.\n",
3831 iommu->name);
3832 return -ENXIO;
3833 }
3834
3835 /*
3836 * Disable translation if already enabled prior to OS handover.
3837 */
3838 if (iommu->gcmd & DMA_GCMD_TE)
3839 iommu_disable_translation(iommu);
3840
3841 g_iommus[iommu->seq_id] = iommu;
3842 ret = iommu_init_domains(iommu);
3843 if (ret == 0)
3844 ret = iommu_alloc_root_entry(iommu);
3845 if (ret)
3846 goto out;
3847
3848 if (dmaru->ignored) {
3849 /*
3850 * we always have to disable PMRs or DMA may fail on this device
3851 */
3852 if (force_on)
3853 iommu_disable_protect_mem_regions(iommu);
3854 return 0;
3855 }
3856
3857 intel_iommu_init_qi(iommu);
3858 iommu_flush_write_buffer(iommu);
3859 ret = dmar_set_interrupt(iommu);
3860 if (ret)
3861 goto disable_iommu;
3862
3863 iommu_set_root_entry(iommu);
3864 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3865 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3866 iommu_enable_translation(iommu);
3867
3868 if (si_domain) {
3869 ret = iommu_attach_domain(si_domain, iommu);
3870 if (ret < 0 || si_domain->id != ret)
3871 goto disable_iommu;
3872 domain_attach_iommu(si_domain, iommu);
3873 }
3874
3875 iommu_disable_protect_mem_regions(iommu);
3876 return 0;
3877
3878disable_iommu:
3879 disable_dmar_iommu(iommu);
3880out:
3881 free_dmar_iommu(iommu);
3882 return ret;
3883}
3884
6b197249
JL
3885int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
3886{
ffebeb46
JL
3887 int ret = 0;
3888 struct intel_iommu *iommu = dmaru->iommu;
3889
3890 if (!intel_iommu_enabled)
3891 return 0;
3892 if (iommu == NULL)
3893 return -EINVAL;
3894
3895 if (insert) {
3896 ret = intel_iommu_add(dmaru);
3897 } else {
3898 disable_dmar_iommu(iommu);
3899 free_dmar_iommu(iommu);
3900 }
3901
3902 return ret;
6b197249
JL
3903}
3904
9bdc531e
JL
3905static void intel_iommu_free_dmars(void)
3906{
3907 struct dmar_rmrr_unit *rmrru, *rmrr_n;
3908 struct dmar_atsr_unit *atsru, *atsr_n;
3909
3910 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
3911 list_del(&rmrru->list);
3912 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
3913 kfree(rmrru);
318fe7df
SS
3914 }
3915
9bdc531e
JL
3916 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
3917 list_del(&atsru->list);
3918 intel_iommu_free_atsr(atsru);
3919 }
318fe7df
SS
3920}
3921
3922int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3923{
b683b230 3924 int i, ret = 1;
318fe7df 3925 struct pci_bus *bus;
832bd858
DW
3926 struct pci_dev *bridge = NULL;
3927 struct device *tmp;
318fe7df
SS
3928 struct acpi_dmar_atsr *atsr;
3929 struct dmar_atsr_unit *atsru;
3930
3931 dev = pci_physfn(dev);
318fe7df 3932 for (bus = dev->bus; bus; bus = bus->parent) {
b5f82ddf 3933 bridge = bus->self;
318fe7df 3934 if (!bridge || !pci_is_pcie(bridge) ||
62f87c0e 3935 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
318fe7df 3936 return 0;
b5f82ddf 3937 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
318fe7df 3938 break;
318fe7df 3939 }
b5f82ddf
JL
3940 if (!bridge)
3941 return 0;
318fe7df 3942
0e242612 3943 rcu_read_lock();
b5f82ddf
JL
3944 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
3945 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3946 if (atsr->segment != pci_domain_nr(dev->bus))
3947 continue;
3948
b683b230 3949 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
832bd858 3950 if (tmp == &bridge->dev)
b683b230 3951 goto out;
b5f82ddf
JL
3952
3953 if (atsru->include_all)
b683b230 3954 goto out;
b5f82ddf 3955 }
b683b230
JL
3956 ret = 0;
3957out:
0e242612 3958 rcu_read_unlock();
318fe7df 3959
b683b230 3960 return ret;
318fe7df
SS
3961}
3962
59ce0515
JL
3963int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
3964{
3965 int ret = 0;
3966 struct dmar_rmrr_unit *rmrru;
3967 struct dmar_atsr_unit *atsru;
3968 struct acpi_dmar_atsr *atsr;
3969 struct acpi_dmar_reserved_memory *rmrr;
3970
3971 if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
3972 return 0;
3973
3974 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
3975 rmrr = container_of(rmrru->hdr,
3976 struct acpi_dmar_reserved_memory, header);
3977 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3978 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
3979 ((void *)rmrr) + rmrr->header.length,
3980 rmrr->segment, rmrru->devices,
3981 rmrru->devices_cnt);
27e24950 3982 if(ret < 0)
59ce0515
JL
3983 return ret;
3984 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
27e24950
JL
3985 dmar_remove_dev_scope(info, rmrr->segment,
3986 rmrru->devices, rmrru->devices_cnt);
59ce0515
JL
3987 }
3988 }
3989
3990 list_for_each_entry(atsru, &dmar_atsr_units, list) {
3991 if (atsru->include_all)
3992 continue;
3993
3994 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3995 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3996 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
3997 (void *)atsr + atsr->header.length,
3998 atsr->segment, atsru->devices,
3999 atsru->devices_cnt);
4000 if (ret > 0)
4001 break;
4002 else if(ret < 0)
4003 return ret;
4004 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
4005 if (dmar_remove_dev_scope(info, atsr->segment,
4006 atsru->devices, atsru->devices_cnt))
4007 break;
4008 }
4009 }
4010
4011 return 0;
4012}
4013
99dcaded
FY
4014/*
4015 * Here we only respond to action of unbound device from driver.
4016 *
4017 * Added device is not attached to its DMAR domain here yet. That will happen
4018 * when mapping the device to iova.
4019 */
4020static int device_notifier(struct notifier_block *nb,
4021 unsigned long action, void *data)
4022{
4023 struct device *dev = data;
99dcaded
FY
4024 struct dmar_domain *domain;
4025
3d89194a 4026 if (iommu_dummy(dev))
44cd613c
DW
4027 return 0;
4028
1196c2fb 4029 if (action != BUS_NOTIFY_REMOVED_DEVICE)
7e7dfab7
JL
4030 return 0;
4031
1525a29a 4032 domain = find_domain(dev);
99dcaded
FY
4033 if (!domain)
4034 return 0;
4035
3a5670e8 4036 down_read(&dmar_global_lock);
bf9c9eda 4037 domain_remove_one_dev_info(domain, dev);
ab8dfe25 4038 if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
7e7dfab7 4039 domain_exit(domain);
3a5670e8 4040 up_read(&dmar_global_lock);
a97590e5 4041
99dcaded
FY
4042 return 0;
4043}
4044
4045static struct notifier_block device_nb = {
4046 .notifier_call = device_notifier,
4047};
4048
75f05569
JL
4049static int intel_iommu_memory_notifier(struct notifier_block *nb,
4050 unsigned long val, void *v)
4051{
4052 struct memory_notify *mhp = v;
4053 unsigned long long start, end;
4054 unsigned long start_vpfn, last_vpfn;
4055
4056 switch (val) {
4057 case MEM_GOING_ONLINE:
4058 start = mhp->start_pfn << PAGE_SHIFT;
4059 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4060 if (iommu_domain_identity_map(si_domain, start, end)) {
4061 pr_warn("dmar: failed to build identity map for [%llx-%llx]\n",
4062 start, end);
4063 return NOTIFY_BAD;
4064 }
4065 break;
4066
4067 case MEM_OFFLINE:
4068 case MEM_CANCEL_ONLINE:
4069 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4070 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4071 while (start_vpfn <= last_vpfn) {
4072 struct iova *iova;
4073 struct dmar_drhd_unit *drhd;
4074 struct intel_iommu *iommu;
ea8ea460 4075 struct page *freelist;
75f05569
JL
4076
4077 iova = find_iova(&si_domain->iovad, start_vpfn);
4078 if (iova == NULL) {
4079 pr_debug("dmar: failed get IOVA for PFN %lx\n",
4080 start_vpfn);
4081 break;
4082 }
4083
4084 iova = split_and_remove_iova(&si_domain->iovad, iova,
4085 start_vpfn, last_vpfn);
4086 if (iova == NULL) {
4087 pr_warn("dmar: failed to split IOVA PFN [%lx-%lx]\n",
4088 start_vpfn, last_vpfn);
4089 return NOTIFY_BAD;
4090 }
4091
ea8ea460
DW
4092 freelist = domain_unmap(si_domain, iova->pfn_lo,
4093 iova->pfn_hi);
4094
75f05569
JL
4095 rcu_read_lock();
4096 for_each_active_iommu(iommu, drhd)
4097 iommu_flush_iotlb_psi(iommu, si_domain->id,
a156ef99 4098 iova->pfn_lo, iova_size(iova),
ea8ea460 4099 !freelist, 0);
75f05569 4100 rcu_read_unlock();
ea8ea460 4101 dma_free_pagelist(freelist);
75f05569
JL
4102
4103 start_vpfn = iova->pfn_hi + 1;
4104 free_iova_mem(iova);
4105 }
4106 break;
4107 }
4108
4109 return NOTIFY_OK;
4110}
4111
4112static struct notifier_block intel_iommu_memory_nb = {
4113 .notifier_call = intel_iommu_memory_notifier,
4114 .priority = 0
4115};
4116
a5459cfe
AW
4117
4118static ssize_t intel_iommu_show_version(struct device *dev,
4119 struct device_attribute *attr,
4120 char *buf)
4121{
4122 struct intel_iommu *iommu = dev_get_drvdata(dev);
4123 u32 ver = readl(iommu->reg + DMAR_VER_REG);
4124 return sprintf(buf, "%d:%d\n",
4125 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4126}
4127static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4128
4129static ssize_t intel_iommu_show_address(struct device *dev,
4130 struct device_attribute *attr,
4131 char *buf)
4132{
4133 struct intel_iommu *iommu = dev_get_drvdata(dev);
4134 return sprintf(buf, "%llx\n", iommu->reg_phys);
4135}
4136static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4137
4138static ssize_t intel_iommu_show_cap(struct device *dev,
4139 struct device_attribute *attr,
4140 char *buf)
4141{
4142 struct intel_iommu *iommu = dev_get_drvdata(dev);
4143 return sprintf(buf, "%llx\n", iommu->cap);
4144}
4145static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4146
4147static ssize_t intel_iommu_show_ecap(struct device *dev,
4148 struct device_attribute *attr,
4149 char *buf)
4150{
4151 struct intel_iommu *iommu = dev_get_drvdata(dev);
4152 return sprintf(buf, "%llx\n", iommu->ecap);
4153}
4154static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4155
4156static struct attribute *intel_iommu_attrs[] = {
4157 &dev_attr_version.attr,
4158 &dev_attr_address.attr,
4159 &dev_attr_cap.attr,
4160 &dev_attr_ecap.attr,
4161 NULL,
4162};
4163
4164static struct attribute_group intel_iommu_group = {
4165 .name = "intel-iommu",
4166 .attrs = intel_iommu_attrs,
4167};
4168
4169const struct attribute_group *intel_iommu_groups[] = {
4170 &intel_iommu_group,
4171 NULL,
4172};
4173
ba395927
KA
4174int __init intel_iommu_init(void)
4175{
9bdc531e 4176 int ret = -ENODEV;
3a93c841 4177 struct dmar_drhd_unit *drhd;
7c919779 4178 struct intel_iommu *iommu;
ba395927 4179
a59b50e9
JC
4180 /* VT-d is required for a TXT/tboot launch, so enforce that */
4181 force_on = tboot_force_iommu();
4182
3a5670e8
JL
4183 if (iommu_init_mempool()) {
4184 if (force_on)
4185 panic("tboot: Failed to initialize iommu memory\n");
4186 return -ENOMEM;
4187 }
4188
4189 down_write(&dmar_global_lock);
a59b50e9
JC
4190 if (dmar_table_init()) {
4191 if (force_on)
4192 panic("tboot: Failed to initialize DMAR table\n");
9bdc531e 4193 goto out_free_dmar;
a59b50e9 4194 }
ba395927 4195
3a93c841
TI
4196 /*
4197 * Disable translation if already enabled prior to OS handover.
4198 */
7c919779 4199 for_each_active_iommu(iommu, drhd)
3a93c841
TI
4200 if (iommu->gcmd & DMA_GCMD_TE)
4201 iommu_disable_translation(iommu);
3a93c841 4202
c2c7286a 4203 if (dmar_dev_scope_init() < 0) {
a59b50e9
JC
4204 if (force_on)
4205 panic("tboot: Failed to initialize DMAR device scope\n");
9bdc531e 4206 goto out_free_dmar;
a59b50e9 4207 }
1886e8a9 4208
75f1cdf1 4209 if (no_iommu || dmar_disabled)
9bdc531e 4210 goto out_free_dmar;
2ae21010 4211
318fe7df
SS
4212 if (list_empty(&dmar_rmrr_units))
4213 printk(KERN_INFO "DMAR: No RMRR found\n");
4214
4215 if (list_empty(&dmar_atsr_units))
4216 printk(KERN_INFO "DMAR: No ATSR found\n");
4217
51a63e67
JC
4218 if (dmar_init_reserved_ranges()) {
4219 if (force_on)
4220 panic("tboot: Failed to reserve iommu ranges\n");
3a5670e8 4221 goto out_free_reserved_range;
51a63e67 4222 }
ba395927
KA
4223
4224 init_no_remapping_devices();
4225
b779260b 4226 ret = init_dmars();
ba395927 4227 if (ret) {
a59b50e9
JC
4228 if (force_on)
4229 panic("tboot: Failed to initialize DMARs\n");
ba395927 4230 printk(KERN_ERR "IOMMU: dmar init failed\n");
9bdc531e 4231 goto out_free_reserved_range;
ba395927 4232 }
3a5670e8 4233 up_write(&dmar_global_lock);
ba395927
KA
4234 printk(KERN_INFO
4235 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
4236
5e0d2a6f 4237 init_timer(&unmap_timer);
75f1cdf1
FT
4238#ifdef CONFIG_SWIOTLB
4239 swiotlb = 0;
4240#endif
19943b0e 4241 dma_ops = &intel_dma_ops;
4ed0d3e6 4242
134fac3f 4243 init_iommu_pm_ops();
a8bcbb0d 4244
a5459cfe
AW
4245 for_each_active_iommu(iommu, drhd)
4246 iommu->iommu_dev = iommu_device_create(NULL, iommu,
4247 intel_iommu_groups,
4248 iommu->name);
4249
4236d97d 4250 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
99dcaded 4251 bus_register_notifier(&pci_bus_type, &device_nb);
75f05569
JL
4252 if (si_domain && !hw_pass_through)
4253 register_memory_notifier(&intel_iommu_memory_nb);
99dcaded 4254
8bc1f85c
ED
4255 intel_iommu_enabled = 1;
4256
ba395927 4257 return 0;
9bdc531e
JL
4258
4259out_free_reserved_range:
4260 put_iova_domain(&reserved_iova_list);
9bdc531e
JL
4261out_free_dmar:
4262 intel_iommu_free_dmars();
3a5670e8
JL
4263 up_write(&dmar_global_lock);
4264 iommu_exit_mempool();
9bdc531e 4265 return ret;
ba395927 4266}
e820482c 4267
579305f7
AW
4268static int iommu_detach_dev_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4269{
4270 struct intel_iommu *iommu = opaque;
4271
4272 iommu_detach_dev(iommu, PCI_BUS_NUM(alias), alias & 0xff);
4273 return 0;
4274}
4275
4276/*
4277 * NB - intel-iommu lacks any sort of reference counting for the users of
4278 * dependent devices. If multiple endpoints have intersecting dependent
4279 * devices, unbinding the driver from any one of them will possibly leave
4280 * the others unable to operate.
4281 */
3199aa6b 4282static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
0bcb3e28 4283 struct device *dev)
3199aa6b 4284{
0bcb3e28 4285 if (!iommu || !dev || !dev_is_pci(dev))
3199aa6b
HW
4286 return;
4287
579305f7 4288 pci_for_each_dma_alias(to_pci_dev(dev), &iommu_detach_dev_cb, iommu);
3199aa6b
HW
4289}
4290
2c2e2c38 4291static void domain_remove_one_dev_info(struct dmar_domain *domain,
bf9c9eda 4292 struct device *dev)
c7151a8d 4293{
bca2b916 4294 struct device_domain_info *info, *tmp;
c7151a8d
WH
4295 struct intel_iommu *iommu;
4296 unsigned long flags;
4297 int found = 0;
156baca8 4298 u8 bus, devfn;
c7151a8d 4299
bf9c9eda 4300 iommu = device_to_iommu(dev, &bus, &devfn);
c7151a8d
WH
4301 if (!iommu)
4302 return;
4303
4304 spin_lock_irqsave(&device_domain_lock, flags);
bca2b916 4305 list_for_each_entry_safe(info, tmp, &domain->devices, link) {
bf9c9eda
DW
4306 if (info->iommu == iommu && info->bus == bus &&
4307 info->devfn == devfn) {
109b9b04 4308 unlink_domain_info(info);
c7151a8d
WH
4309 spin_unlock_irqrestore(&device_domain_lock, flags);
4310
93a23a72 4311 iommu_disable_dev_iotlb(info);
c7151a8d 4312 iommu_detach_dev(iommu, info->bus, info->devfn);
bf9c9eda 4313 iommu_detach_dependent_devices(iommu, dev);
c7151a8d
WH
4314 free_devinfo_mem(info);
4315
4316 spin_lock_irqsave(&device_domain_lock, flags);
4317
4318 if (found)
4319 break;
4320 else
4321 continue;
4322 }
4323
4324 /* if there is no other devices under the same iommu
4325 * owned by this domain, clear this iommu in iommu_bmp
4326 * update iommu count and coherency
4327 */
8bbc4410 4328 if (info->iommu == iommu)
c7151a8d
WH
4329 found = 1;
4330 }
4331
3e7abe25
RD
4332 spin_unlock_irqrestore(&device_domain_lock, flags);
4333
c7151a8d 4334 if (found == 0) {
fb170fb4
JL
4335 domain_detach_iommu(domain, iommu);
4336 if (!domain_type_is_vm_or_si(domain))
4337 iommu_detach_domain(domain, iommu);
c7151a8d 4338 }
c7151a8d
WH
4339}
4340
2c2e2c38 4341static int md_domain_init(struct dmar_domain *domain, int guest_width)
5e98c4b1
WH
4342{
4343 int adjust_width;
4344
4345 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
5e98c4b1
WH
4346 domain_reserve_special_ranges(domain);
4347
4348 /* calculate AGAW */
4349 domain->gaw = guest_width;
4350 adjust_width = guestwidth_to_adjustwidth(guest_width);
4351 domain->agaw = width_to_agaw(adjust_width);
4352
5e98c4b1 4353 domain->iommu_coherency = 0;
c5b15255 4354 domain->iommu_snooping = 0;
6dd9a7c7 4355 domain->iommu_superpage = 0;
fe40f1e0 4356 domain->max_addr = 0;
5e98c4b1
WH
4357
4358 /* always allocate the top pgd */
4c923d47 4359 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5e98c4b1
WH
4360 if (!domain->pgd)
4361 return -ENOMEM;
4362 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4363 return 0;
4364}
4365
5d450806 4366static int intel_iommu_domain_init(struct iommu_domain *domain)
38717946 4367{
5d450806 4368 struct dmar_domain *dmar_domain;
38717946 4369
ab8dfe25 4370 dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
5d450806 4371 if (!dmar_domain) {
38717946 4372 printk(KERN_ERR
5d450806
JR
4373 "intel_iommu_domain_init: dmar_domain == NULL\n");
4374 return -ENOMEM;
38717946 4375 }
2c2e2c38 4376 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
38717946 4377 printk(KERN_ERR
5d450806 4378 "intel_iommu_domain_init() failed\n");
92d03cc8 4379 domain_exit(dmar_domain);
5d450806 4380 return -ENOMEM;
38717946 4381 }
8140a95d 4382 domain_update_iommu_cap(dmar_domain);
5d450806 4383 domain->priv = dmar_domain;
faa3d6f5 4384
8a0e715b
JR
4385 domain->geometry.aperture_start = 0;
4386 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4387 domain->geometry.force_aperture = true;
4388
5d450806 4389 return 0;
38717946 4390}
38717946 4391
5d450806 4392static void intel_iommu_domain_destroy(struct iommu_domain *domain)
38717946 4393{
5d450806
JR
4394 struct dmar_domain *dmar_domain = domain->priv;
4395
4396 domain->priv = NULL;
92d03cc8 4397 domain_exit(dmar_domain);
38717946 4398}
38717946 4399
4c5478c9
JR
4400static int intel_iommu_attach_device(struct iommu_domain *domain,
4401 struct device *dev)
38717946 4402{
4c5478c9 4403 struct dmar_domain *dmar_domain = domain->priv;
fe40f1e0
WH
4404 struct intel_iommu *iommu;
4405 int addr_width;
156baca8 4406 u8 bus, devfn;
faa3d6f5 4407
c875d2c1
AW
4408 if (device_is_rmrr_locked(dev)) {
4409 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
4410 return -EPERM;
4411 }
4412
7207d8f9
DW
4413 /* normally dev is not mapped */
4414 if (unlikely(domain_context_mapped(dev))) {
faa3d6f5
WH
4415 struct dmar_domain *old_domain;
4416
1525a29a 4417 old_domain = find_domain(dev);
faa3d6f5 4418 if (old_domain) {
ab8dfe25 4419 if (domain_type_is_vm_or_si(dmar_domain))
bf9c9eda 4420 domain_remove_one_dev_info(old_domain, dev);
faa3d6f5
WH
4421 else
4422 domain_remove_dev_info(old_domain);
62c22167
JR
4423
4424 if (!domain_type_is_vm_or_si(old_domain) &&
4425 list_empty(&old_domain->devices))
4426 domain_exit(old_domain);
faa3d6f5
WH
4427 }
4428 }
4429
156baca8 4430 iommu = device_to_iommu(dev, &bus, &devfn);
fe40f1e0
WH
4431 if (!iommu)
4432 return -ENODEV;
4433
4434 /* check if this iommu agaw is sufficient for max mapped address */
4435 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
4436 if (addr_width > cap_mgaw(iommu->cap))
4437 addr_width = cap_mgaw(iommu->cap);
4438
4439 if (dmar_domain->max_addr > (1LL << addr_width)) {
4440 printk(KERN_ERR "%s: iommu width (%d) is not "
fe40f1e0 4441 "sufficient for the mapped address (%llx)\n",
a99c47a2 4442 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
4443 return -EFAULT;
4444 }
a99c47a2
TL
4445 dmar_domain->gaw = addr_width;
4446
4447 /*
4448 * Knock out extra levels of page tables if necessary
4449 */
4450 while (iommu->agaw < dmar_domain->agaw) {
4451 struct dma_pte *pte;
4452
4453 pte = dmar_domain->pgd;
4454 if (dma_pte_present(pte)) {
25cbff16
SY
4455 dmar_domain->pgd = (struct dma_pte *)
4456 phys_to_virt(dma_pte_addr(pte));
7a661013 4457 free_pgtable_page(pte);
a99c47a2
TL
4458 }
4459 dmar_domain->agaw--;
4460 }
fe40f1e0 4461
5913c9bf 4462 return domain_add_dev_info(dmar_domain, dev, CONTEXT_TT_MULTI_LEVEL);
38717946 4463}
38717946 4464
4c5478c9
JR
4465static void intel_iommu_detach_device(struct iommu_domain *domain,
4466 struct device *dev)
38717946 4467{
4c5478c9 4468 struct dmar_domain *dmar_domain = domain->priv;
4c5478c9 4469
bf9c9eda 4470 domain_remove_one_dev_info(dmar_domain, dev);
faa3d6f5 4471}
c7151a8d 4472
b146a1c9
JR
4473static int intel_iommu_map(struct iommu_domain *domain,
4474 unsigned long iova, phys_addr_t hpa,
5009065d 4475 size_t size, int iommu_prot)
faa3d6f5 4476{
dde57a21 4477 struct dmar_domain *dmar_domain = domain->priv;
fe40f1e0 4478 u64 max_addr;
dde57a21 4479 int prot = 0;
faa3d6f5 4480 int ret;
fe40f1e0 4481
dde57a21
JR
4482 if (iommu_prot & IOMMU_READ)
4483 prot |= DMA_PTE_READ;
4484 if (iommu_prot & IOMMU_WRITE)
4485 prot |= DMA_PTE_WRITE;
9cf06697
SY
4486 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4487 prot |= DMA_PTE_SNP;
dde57a21 4488
163cc52c 4489 max_addr = iova + size;
dde57a21 4490 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
4491 u64 end;
4492
4493 /* check if minimum agaw is sufficient for mapped address */
8954da1f 4494 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 4495 if (end < max_addr) {
8954da1f 4496 printk(KERN_ERR "%s: iommu width (%d) is not "
fe40f1e0 4497 "sufficient for the mapped address (%llx)\n",
8954da1f 4498 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
4499 return -EFAULT;
4500 }
dde57a21 4501 dmar_domain->max_addr = max_addr;
fe40f1e0 4502 }
ad051221
DW
4503 /* Round up size to next multiple of PAGE_SIZE, if it and
4504 the low bits of hpa would take us onto the next page */
88cb6a74 4505 size = aligned_nrpages(hpa, size);
ad051221
DW
4506 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4507 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 4508 return ret;
38717946 4509}
38717946 4510
5009065d 4511static size_t intel_iommu_unmap(struct iommu_domain *domain,
ea8ea460 4512 unsigned long iova, size_t size)
38717946 4513{
dde57a21 4514 struct dmar_domain *dmar_domain = domain->priv;
ea8ea460
DW
4515 struct page *freelist = NULL;
4516 struct intel_iommu *iommu;
4517 unsigned long start_pfn, last_pfn;
4518 unsigned int npages;
4519 int iommu_id, num, ndomains, level = 0;
5cf0a76f
DW
4520
4521 /* Cope with horrid API which requires us to unmap more than the
4522 size argument if it happens to be a large-page mapping. */
4523 if (!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level))
4524 BUG();
4525
4526 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4527 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4b99d352 4528
ea8ea460
DW
4529 start_pfn = iova >> VTD_PAGE_SHIFT;
4530 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4531
4532 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
4533
4534 npages = last_pfn - start_pfn + 1;
4535
4536 for_each_set_bit(iommu_id, dmar_domain->iommu_bmp, g_num_of_iommus) {
4537 iommu = g_iommus[iommu_id];
4538
4539 /*
4540 * find bit position of dmar_domain
4541 */
4542 ndomains = cap_ndoms(iommu->cap);
4543 for_each_set_bit(num, iommu->domain_ids, ndomains) {
4544 if (iommu->domains[num] == dmar_domain)
4545 iommu_flush_iotlb_psi(iommu, num, start_pfn,
4546 npages, !freelist, 0);
4547 }
4548
4549 }
4550
4551 dma_free_pagelist(freelist);
fe40f1e0 4552
163cc52c
DW
4553 if (dmar_domain->max_addr == iova + size)
4554 dmar_domain->max_addr = iova;
b146a1c9 4555
5cf0a76f 4556 return size;
38717946 4557}
38717946 4558
d14d6577 4559static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
bb5547ac 4560 dma_addr_t iova)
38717946 4561{
d14d6577 4562 struct dmar_domain *dmar_domain = domain->priv;
38717946 4563 struct dma_pte *pte;
5cf0a76f 4564 int level = 0;
faa3d6f5 4565 u64 phys = 0;
38717946 4566
5cf0a76f 4567 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
38717946 4568 if (pte)
faa3d6f5 4569 phys = dma_pte_addr(pte);
38717946 4570
faa3d6f5 4571 return phys;
38717946 4572}
a8bcbb0d 4573
5d587b8d 4574static bool intel_iommu_capable(enum iommu_cap cap)
dbb9fd86 4575{
dbb9fd86 4576 if (cap == IOMMU_CAP_CACHE_COHERENCY)
5d587b8d 4577 return domain_update_iommu_snooping(NULL) == 1;
323f99cb 4578 if (cap == IOMMU_CAP_INTR_REMAP)
5d587b8d 4579 return irq_remapping_enabled == 1;
dbb9fd86 4580
5d587b8d 4581 return false;
dbb9fd86
SY
4582}
4583
abdfdde2
AW
4584static int intel_iommu_add_device(struct device *dev)
4585{
a5459cfe 4586 struct intel_iommu *iommu;
abdfdde2 4587 struct iommu_group *group;
156baca8 4588 u8 bus, devfn;
70ae6f0d 4589
a5459cfe
AW
4590 iommu = device_to_iommu(dev, &bus, &devfn);
4591 if (!iommu)
70ae6f0d
AW
4592 return -ENODEV;
4593
a5459cfe 4594 iommu_device_link(iommu->iommu_dev, dev);
a4ff1fc2 4595
e17f9ff4 4596 group = iommu_group_get_for_dev(dev);
783f157b 4597
e17f9ff4
AW
4598 if (IS_ERR(group))
4599 return PTR_ERR(group);
bcb71abe 4600
abdfdde2 4601 iommu_group_put(group);
e17f9ff4 4602 return 0;
abdfdde2 4603}
70ae6f0d 4604
abdfdde2
AW
4605static void intel_iommu_remove_device(struct device *dev)
4606{
a5459cfe
AW
4607 struct intel_iommu *iommu;
4608 u8 bus, devfn;
4609
4610 iommu = device_to_iommu(dev, &bus, &devfn);
4611 if (!iommu)
4612 return;
4613
abdfdde2 4614 iommu_group_remove_device(dev);
a5459cfe
AW
4615
4616 iommu_device_unlink(iommu->iommu_dev, dev);
70ae6f0d
AW
4617}
4618
b22f6434 4619static const struct iommu_ops intel_iommu_ops = {
5d587b8d 4620 .capable = intel_iommu_capable,
a8bcbb0d
JR
4621 .domain_init = intel_iommu_domain_init,
4622 .domain_destroy = intel_iommu_domain_destroy,
4623 .attach_dev = intel_iommu_attach_device,
4624 .detach_dev = intel_iommu_detach_device,
b146a1c9
JR
4625 .map = intel_iommu_map,
4626 .unmap = intel_iommu_unmap,
315786eb 4627 .map_sg = default_iommu_map_sg,
a8bcbb0d 4628 .iova_to_phys = intel_iommu_iova_to_phys,
abdfdde2
AW
4629 .add_device = intel_iommu_add_device,
4630 .remove_device = intel_iommu_remove_device,
6d1c56a9 4631 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
a8bcbb0d 4632};
9af88143 4633
9452618e
DV
4634static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
4635{
4636 /* G4x/GM45 integrated gfx dmar support is totally busted. */
4637 printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
4638 dmar_map_gfx = 0;
4639}
4640
4641DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
4642DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
4643DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
4644DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
4645DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
4646DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
4647DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
4648
d34d6517 4649static void quirk_iommu_rwbf(struct pci_dev *dev)
9af88143
DW
4650{
4651 /*
4652 * Mobile 4 Series Chipset neglects to set RWBF capability,
210561ff 4653 * but needs it. Same seems to hold for the desktop versions.
9af88143
DW
4654 */
4655 printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
4656 rwbf_quirk = 1;
4657}
4658
4659DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
210561ff
DV
4660DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4661DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4662DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4663DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4664DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4665DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
e0fc7e0b 4666
eecfd57f
AJ
4667#define GGC 0x52
4668#define GGC_MEMORY_SIZE_MASK (0xf << 8)
4669#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
4670#define GGC_MEMORY_SIZE_1M (0x1 << 8)
4671#define GGC_MEMORY_SIZE_2M (0x3 << 8)
4672#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
4673#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
4674#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
4675#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
4676
d34d6517 4677static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
9eecabcb
DW
4678{
4679 unsigned short ggc;
4680
eecfd57f 4681 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
4682 return;
4683
eecfd57f 4684 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
9eecabcb
DW
4685 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4686 dmar_map_gfx = 0;
6fbcfb3e
DW
4687 } else if (dmar_map_gfx) {
4688 /* we have to ensure the gfx device is idle before we flush */
4689 printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
4690 intel_iommu_strict = 1;
4691 }
9eecabcb
DW
4692}
4693DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4694DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4695DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4696DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4697
e0fc7e0b
DW
4698/* On Tylersburg chipsets, some BIOSes have been known to enable the
4699 ISOCH DMAR unit for the Azalia sound device, but not give it any
4700 TLB entries, which causes it to deadlock. Check for that. We do
4701 this in a function called from init_dmars(), instead of in a PCI
4702 quirk, because we don't want to print the obnoxious "BIOS broken"
4703 message if VT-d is actually disabled.
4704*/
4705static void __init check_tylersburg_isoch(void)
4706{
4707 struct pci_dev *pdev;
4708 uint32_t vtisochctrl;
4709
4710 /* If there's no Azalia in the system anyway, forget it. */
4711 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4712 if (!pdev)
4713 return;
4714 pci_dev_put(pdev);
4715
4716 /* System Management Registers. Might be hidden, in which case
4717 we can't do the sanity check. But that's OK, because the
4718 known-broken BIOSes _don't_ actually hide it, so far. */
4719 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4720 if (!pdev)
4721 return;
4722
4723 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4724 pci_dev_put(pdev);
4725 return;
4726 }
4727
4728 pci_dev_put(pdev);
4729
4730 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4731 if (vtisochctrl & 1)
4732 return;
4733
4734 /* Drop all bits other than the number of TLB entries */
4735 vtisochctrl &= 0x1c;
4736
4737 /* If we have the recommended number of TLB entries (16), fine. */
4738 if (vtisochctrl == 0x10)
4739 return;
4740
4741 /* Zero TLB entries? You get to ride the short bus to school. */
4742 if (!vtisochctrl) {
4743 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4744 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4745 dmi_get_system_info(DMI_BIOS_VENDOR),
4746 dmi_get_system_info(DMI_BIOS_VERSION),
4747 dmi_get_system_info(DMI_PRODUCT_VERSION));
4748 iommu_identity_mapping |= IDENTMAP_AZALIA;
4749 return;
4750 }
4751
4752 printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4753 vtisochctrl);
4754}
This page took 0.939523 seconds and 5 git commands to generate.