iommu/vt-d: correct flush_unmaps pfn usage
[deliverable/linux.git] / drivers / iommu / intel-iommu.c
CommitLineData
ba395927 1/*
ea8ea460 2 * Copyright © 2006-2014 Intel Corporation.
ba395927
KA
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
ea8ea460
DW
13 * Authors: David Woodhouse <dwmw2@infradead.org>,
14 * Ashok Raj <ashok.raj@intel.com>,
15 * Shaohua Li <shaohua.li@intel.com>,
16 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17 * Fenghua Yu <fenghua.yu@intel.com>
9f10e5bf 18 * Joerg Roedel <jroedel@suse.de>
ba395927
KA
19 */
20
9f10e5bf
JR
21#define pr_fmt(fmt) "DMAR: " fmt
22
ba395927
KA
23#include <linux/init.h>
24#include <linux/bitmap.h>
5e0d2a6f 25#include <linux/debugfs.h>
54485c30 26#include <linux/export.h>
ba395927
KA
27#include <linux/slab.h>
28#include <linux/irq.h>
29#include <linux/interrupt.h>
ba395927
KA
30#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
75f05569 35#include <linux/memory.h>
aa473240 36#include <linux/cpu.h>
5e0d2a6f 37#include <linux/timer.h>
dfddb969 38#include <linux/io.h>
38717946 39#include <linux/iova.h>
5d450806 40#include <linux/iommu.h>
38717946 41#include <linux/intel-iommu.h>
134fac3f 42#include <linux/syscore_ops.h>
69575d38 43#include <linux/tboot.h>
adb2fe02 44#include <linux/dmi.h>
5cdede24 45#include <linux/pci-ats.h>
0ee332c1 46#include <linux/memblock.h>
36746436 47#include <linux/dma-contiguous.h>
091d42e4 48#include <linux/crash_dump.h>
8a8f422d 49#include <asm/irq_remapping.h>
ba395927 50#include <asm/cacheflush.h>
46a7fa27 51#include <asm/iommu.h>
ba395927 52
078e1ee2
JR
53#include "irq_remapping.h"
54
5b6985ce
FY
55#define ROOT_SIZE VTD_PAGE_SIZE
56#define CONTEXT_SIZE VTD_PAGE_SIZE
57
ba395927 58#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
18436afd 59#define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
ba395927 60#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 61#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
62
63#define IOAPIC_RANGE_START (0xfee00000)
64#define IOAPIC_RANGE_END (0xfeefffff)
65#define IOVA_START_ADDR (0x1000)
66
67#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
68
4ed0d3e6 69#define MAX_AGAW_WIDTH 64
5c645b35 70#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
4ed0d3e6 71
2ebe3151
DW
72#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
73#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
74
75/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
76 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
77#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
78 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
79#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 80
1b722500
RM
81/* IO virtual address start page frame number */
82#define IOVA_START_PFN (1)
83
f27be03b 84#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
284901a9 85#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
6a35528a 86#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
5e0d2a6f 87
df08cdc7
AM
88/* page table handling */
89#define LEVEL_STRIDE (9)
90#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
91
6d1c56a9
OBC
92/*
93 * This bitmap is used to advertise the page sizes our hardware support
94 * to the IOMMU core, which will then use this information to split
95 * physically contiguous memory regions it is mapping into page sizes
96 * that we support.
97 *
98 * Traditionally the IOMMU core just handed us the mappings directly,
99 * after making sure the size is an order of a 4KiB page and that the
100 * mapping has natural alignment.
101 *
102 * To retain this behavior, we currently advertise that we support
103 * all page sizes that are an order of 4KiB.
104 *
105 * If at some point we'd like to utilize the IOMMU core's new behavior,
106 * we could change this to advertise the real page sizes we support.
107 */
108#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
109
df08cdc7
AM
110static inline int agaw_to_level(int agaw)
111{
112 return agaw + 2;
113}
114
115static inline int agaw_to_width(int agaw)
116{
5c645b35 117 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
df08cdc7
AM
118}
119
120static inline int width_to_agaw(int width)
121{
5c645b35 122 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
df08cdc7
AM
123}
124
125static inline unsigned int level_to_offset_bits(int level)
126{
127 return (level - 1) * LEVEL_STRIDE;
128}
129
130static inline int pfn_level_offset(unsigned long pfn, int level)
131{
132 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
133}
134
135static inline unsigned long level_mask(int level)
136{
137 return -1UL << level_to_offset_bits(level);
138}
139
140static inline unsigned long level_size(int level)
141{
142 return 1UL << level_to_offset_bits(level);
143}
144
145static inline unsigned long align_to_level(unsigned long pfn, int level)
146{
147 return (pfn + level_size(level) - 1) & level_mask(level);
148}
fd18de50 149
6dd9a7c7
YS
150static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
151{
5c645b35 152 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
6dd9a7c7
YS
153}
154
dd4e8319
DW
155/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
156 are never going to work. */
157static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
158{
159 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
160}
161
162static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
163{
164 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
165}
166static inline unsigned long page_to_dma_pfn(struct page *pg)
167{
168 return mm_to_dma_pfn(page_to_pfn(pg));
169}
170static inline unsigned long virt_to_dma_pfn(void *p)
171{
172 return page_to_dma_pfn(virt_to_page(p));
173}
174
d9630fe9
WH
175/* global iommu list, set NULL for ignored DMAR units */
176static struct intel_iommu **g_iommus;
177
e0fc7e0b 178static void __init check_tylersburg_isoch(void);
9af88143
DW
179static int rwbf_quirk;
180
b779260b
JC
181/*
182 * set to 1 to panic kernel if can't successfully enable VT-d
183 * (used when kernel is launched w/ TXT)
184 */
185static int force_on = 0;
186
46b08e1a
MM
187/*
188 * 0: Present
189 * 1-11: Reserved
190 * 12-63: Context Ptr (12 - (haw-1))
191 * 64-127: Reserved
192 */
193struct root_entry {
03ecc32c
DW
194 u64 lo;
195 u64 hi;
46b08e1a
MM
196};
197#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
46b08e1a 198
091d42e4
JR
199/*
200 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
201 * if marked present.
202 */
203static phys_addr_t root_entry_lctp(struct root_entry *re)
204{
205 if (!(re->lo & 1))
206 return 0;
207
208 return re->lo & VTD_PAGE_MASK;
209}
210
211/*
212 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
213 * if marked present.
214 */
215static phys_addr_t root_entry_uctp(struct root_entry *re)
216{
217 if (!(re->hi & 1))
218 return 0;
46b08e1a 219
091d42e4
JR
220 return re->hi & VTD_PAGE_MASK;
221}
7a8fc25e
MM
222/*
223 * low 64 bits:
224 * 0: present
225 * 1: fault processing disable
226 * 2-3: translation type
227 * 12-63: address space root
228 * high 64 bits:
229 * 0-2: address width
230 * 3-6: aval
231 * 8-23: domain id
232 */
233struct context_entry {
234 u64 lo;
235 u64 hi;
236};
c07e7d21 237
cf484d0e
JR
238static inline void context_clear_pasid_enable(struct context_entry *context)
239{
240 context->lo &= ~(1ULL << 11);
241}
242
243static inline bool context_pasid_enabled(struct context_entry *context)
244{
245 return !!(context->lo & (1ULL << 11));
246}
247
248static inline void context_set_copied(struct context_entry *context)
249{
250 context->hi |= (1ull << 3);
251}
252
253static inline bool context_copied(struct context_entry *context)
254{
255 return !!(context->hi & (1ULL << 3));
256}
257
258static inline bool __context_present(struct context_entry *context)
c07e7d21
MM
259{
260 return (context->lo & 1);
261}
cf484d0e
JR
262
263static inline bool context_present(struct context_entry *context)
264{
265 return context_pasid_enabled(context) ?
266 __context_present(context) :
267 __context_present(context) && !context_copied(context);
268}
269
c07e7d21
MM
270static inline void context_set_present(struct context_entry *context)
271{
272 context->lo |= 1;
273}
274
275static inline void context_set_fault_enable(struct context_entry *context)
276{
277 context->lo &= (((u64)-1) << 2) | 1;
278}
279
c07e7d21
MM
280static inline void context_set_translation_type(struct context_entry *context,
281 unsigned long value)
282{
283 context->lo &= (((u64)-1) << 4) | 3;
284 context->lo |= (value & 3) << 2;
285}
286
287static inline void context_set_address_root(struct context_entry *context,
288 unsigned long value)
289{
1a2262f9 290 context->lo &= ~VTD_PAGE_MASK;
c07e7d21
MM
291 context->lo |= value & VTD_PAGE_MASK;
292}
293
294static inline void context_set_address_width(struct context_entry *context,
295 unsigned long value)
296{
297 context->hi |= value & 7;
298}
299
300static inline void context_set_domain_id(struct context_entry *context,
301 unsigned long value)
302{
303 context->hi |= (value & ((1 << 16) - 1)) << 8;
304}
305
dbcd861f
JR
306static inline int context_domain_id(struct context_entry *c)
307{
308 return((c->hi >> 8) & 0xffff);
309}
310
c07e7d21
MM
311static inline void context_clear_entry(struct context_entry *context)
312{
313 context->lo = 0;
314 context->hi = 0;
315}
7a8fc25e 316
622ba12a
MM
317/*
318 * 0: readable
319 * 1: writable
320 * 2-6: reserved
321 * 7: super page
9cf06697
SY
322 * 8-10: available
323 * 11: snoop behavior
622ba12a
MM
324 * 12-63: Host physcial address
325 */
326struct dma_pte {
327 u64 val;
328};
622ba12a 329
19c239ce
MM
330static inline void dma_clear_pte(struct dma_pte *pte)
331{
332 pte->val = 0;
333}
334
19c239ce
MM
335static inline u64 dma_pte_addr(struct dma_pte *pte)
336{
c85994e4
DW
337#ifdef CONFIG_64BIT
338 return pte->val & VTD_PAGE_MASK;
339#else
340 /* Must have a full atomic 64-bit read */
1a8bd481 341 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
c85994e4 342#endif
19c239ce
MM
343}
344
19c239ce
MM
345static inline bool dma_pte_present(struct dma_pte *pte)
346{
347 return (pte->val & 3) != 0;
348}
622ba12a 349
4399c8bf
AK
350static inline bool dma_pte_superpage(struct dma_pte *pte)
351{
c3c75eb7 352 return (pte->val & DMA_PTE_LARGE_PAGE);
4399c8bf
AK
353}
354
75e6bf96
DW
355static inline int first_pte_in_page(struct dma_pte *pte)
356{
357 return !((unsigned long)pte & ~VTD_PAGE_MASK);
358}
359
2c2e2c38
FY
360/*
361 * This domain is a statically identity mapping domain.
362 * 1. This domain creats a static 1:1 mapping to all usable memory.
363 * 2. It maps to each iommu if successful.
364 * 3. Each iommu mapps to this domain if successful.
365 */
19943b0e
DW
366static struct dmar_domain *si_domain;
367static int hw_pass_through = 1;
2c2e2c38 368
28ccce0d
JR
369/*
370 * Domain represents a virtual machine, more than one devices
1ce28feb
WH
371 * across iommus may be owned in one domain, e.g. kvm guest.
372 */
ab8dfe25 373#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 0)
1ce28feb 374
2c2e2c38 375/* si_domain contains mulitple devices */
ab8dfe25 376#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 1)
2c2e2c38 377
29a27719
JR
378#define for_each_domain_iommu(idx, domain) \
379 for (idx = 0; idx < g_num_of_iommus; idx++) \
380 if (domain->iommu_refcnt[idx])
381
99126f7c 382struct dmar_domain {
4c923d47 383 int nid; /* node id */
29a27719
JR
384
385 unsigned iommu_refcnt[DMAR_UNITS_SUPPORTED];
386 /* Refcount of devices per iommu */
387
99126f7c 388
c0e8a6c8
JR
389 u16 iommu_did[DMAR_UNITS_SUPPORTED];
390 /* Domain ids per IOMMU. Use u16 since
391 * domain ids are 16 bit wide according
392 * to VT-d spec, section 9.3 */
99126f7c 393
00a77deb 394 struct list_head devices; /* all devices' list */
99126f7c
MM
395 struct iova_domain iovad; /* iova's that belong to this domain */
396
397 struct dma_pte *pgd; /* virtual address */
99126f7c
MM
398 int gaw; /* max guest address width */
399
400 /* adjusted guest address width, 0 is level 2 30-bit */
401 int agaw;
402
3b5410e7 403 int flags; /* flags to find out type of domain */
8e604097
WH
404
405 int iommu_coherency;/* indicate coherency of iommu access */
58c610bd 406 int iommu_snooping; /* indicate snooping control feature*/
c7151a8d 407 int iommu_count; /* reference count of iommu */
6dd9a7c7
YS
408 int iommu_superpage;/* Level of superpages supported:
409 0 == 4KiB (no superpages), 1 == 2MiB,
410 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
fe40f1e0 411 u64 max_addr; /* maximum mapped address */
00a77deb
JR
412
413 struct iommu_domain domain; /* generic domain data structure for
414 iommu core */
99126f7c
MM
415};
416
a647dacb
MM
417/* PCI domain-device relationship */
418struct device_domain_info {
419 struct list_head link; /* link to domain siblings */
420 struct list_head global; /* link to global list */
276dbf99 421 u8 bus; /* PCI bus number */
a647dacb 422 u8 devfn; /* PCI devfn number */
b16d0cb9
DW
423 u8 pasid_supported:3;
424 u8 pasid_enabled:1;
425 u8 pri_supported:1;
426 u8 pri_enabled:1;
427 u8 ats_supported:1;
428 u8 ats_enabled:1;
429 u8 ats_qdep;
0bcb3e28 430 struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
93a23a72 431 struct intel_iommu *iommu; /* IOMMU used by this device */
a647dacb
MM
432 struct dmar_domain *domain; /* pointer to domain */
433};
434
b94e4117
JL
435struct dmar_rmrr_unit {
436 struct list_head list; /* list of rmrr units */
437 struct acpi_dmar_header *hdr; /* ACPI header */
438 u64 base_address; /* reserved base address*/
439 u64 end_address; /* reserved end address */
832bd858 440 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
441 int devices_cnt; /* target device count */
442};
443
444struct dmar_atsr_unit {
445 struct list_head list; /* list of ATSR units */
446 struct acpi_dmar_header *hdr; /* ACPI header */
832bd858 447 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
448 int devices_cnt; /* target device count */
449 u8 include_all:1; /* include all ports */
450};
451
452static LIST_HEAD(dmar_atsr_units);
453static LIST_HEAD(dmar_rmrr_units);
454
455#define for_each_rmrr_units(rmrr) \
456 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
457
5e0d2a6f 458static void flush_unmaps_timeout(unsigned long data);
459
314f1dc1
OP
460struct deferred_flush_entry {
461 struct iova *iova;
462 struct dmar_domain *domain;
463 struct page *freelist;
464};
465
80b20dd8 466#define HIGH_WATER_MARK 250
314f1dc1 467struct deferred_flush_table {
80b20dd8 468 int next;
314f1dc1 469 struct deferred_flush_entry entries[HIGH_WATER_MARK];
80b20dd8 470};
471
aa473240
OP
472struct deferred_flush_data {
473 spinlock_t lock;
474 int timer_on;
475 struct timer_list timer;
476 long size;
477 struct deferred_flush_table *tables;
478};
479
480DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush);
80b20dd8 481
5e0d2a6f 482/* bitmap for indexing intel_iommus */
5e0d2a6f 483static int g_num_of_iommus;
484
92d03cc8 485static void domain_exit(struct dmar_domain *domain);
ba395927 486static void domain_remove_dev_info(struct dmar_domain *domain);
e6de0f8d
JR
487static void dmar_remove_one_dev_info(struct dmar_domain *domain,
488 struct device *dev);
127c7615 489static void __dmar_remove_one_dev_info(struct device_domain_info *info);
2452d9db
JR
490static void domain_context_clear(struct intel_iommu *iommu,
491 struct device *dev);
2a46ddf7
JL
492static int domain_detach_iommu(struct dmar_domain *domain,
493 struct intel_iommu *iommu);
ba395927 494
d3f13810 495#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
0cd5c3c8
KM
496int dmar_disabled = 0;
497#else
498int dmar_disabled = 1;
d3f13810 499#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
0cd5c3c8 500
8bc1f85c
ED
501int intel_iommu_enabled = 0;
502EXPORT_SYMBOL_GPL(intel_iommu_enabled);
503
2d9e667e 504static int dmar_map_gfx = 1;
7d3b03ce 505static int dmar_forcedac;
5e0d2a6f 506static int intel_iommu_strict;
6dd9a7c7 507static int intel_iommu_superpage = 1;
c83b2f20 508static int intel_iommu_ecs = 1;
ae853ddb
DW
509static int intel_iommu_pasid28;
510static int iommu_identity_mapping;
c83b2f20 511
ae853ddb
DW
512#define IDENTMAP_ALL 1
513#define IDENTMAP_GFX 2
514#define IDENTMAP_AZALIA 4
c83b2f20 515
d42fde70
DW
516/* Broadwell and Skylake have broken ECS support — normal so-called "second
517 * level" translation of DMA requests-without-PASID doesn't actually happen
518 * unless you also set the NESTE bit in an extended context-entry. Which of
519 * course means that SVM doesn't work because it's trying to do nested
520 * translation of the physical addresses it finds in the process page tables,
521 * through the IOVA->phys mapping found in the "second level" page tables.
522 *
523 * The VT-d specification was retroactively changed to change the definition
524 * of the capability bits and pretend that Broadwell/Skylake never happened...
525 * but unfortunately the wrong bit was changed. It's ECS which is broken, but
526 * for some reason it was the PASID capability bit which was redefined (from
527 * bit 28 on BDW/SKL to bit 40 in future).
528 *
529 * So our test for ECS needs to eschew those implementations which set the old
530 * PASID capabiity bit 28, since those are the ones on which ECS is broken.
531 * Unless we are working around the 'pasid28' limitations, that is, by putting
532 * the device into passthrough mode for normal DMA and thus masking the bug.
533 */
c83b2f20 534#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \
d42fde70
DW
535 (intel_iommu_pasid28 || !ecap_broken_pasid(iommu->ecap)))
536/* PASID support is thus enabled if ECS is enabled and *either* of the old
537 * or new capability bits are set. */
538#define pasid_enabled(iommu) (ecs_enabled(iommu) && \
539 (ecap_pasid(iommu->ecap) || ecap_broken_pasid(iommu->ecap)))
ba395927 540
c0771df8
DW
541int intel_iommu_gfx_mapped;
542EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
543
ba395927
KA
544#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
545static DEFINE_SPINLOCK(device_domain_lock);
546static LIST_HEAD(device_domain_list);
547
b22f6434 548static const struct iommu_ops intel_iommu_ops;
a8bcbb0d 549
4158c2ec
JR
550static bool translation_pre_enabled(struct intel_iommu *iommu)
551{
552 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
553}
554
091d42e4
JR
555static void clear_translation_pre_enabled(struct intel_iommu *iommu)
556{
557 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
558}
559
4158c2ec
JR
560static void init_translation_status(struct intel_iommu *iommu)
561{
562 u32 gsts;
563
564 gsts = readl(iommu->reg + DMAR_GSTS_REG);
565 if (gsts & DMA_GSTS_TES)
566 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
567}
568
00a77deb
JR
569/* Convert generic 'struct iommu_domain to private struct dmar_domain */
570static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
571{
572 return container_of(dom, struct dmar_domain, domain);
573}
574
ba395927
KA
575static int __init intel_iommu_setup(char *str)
576{
577 if (!str)
578 return -EINVAL;
579 while (*str) {
0cd5c3c8
KM
580 if (!strncmp(str, "on", 2)) {
581 dmar_disabled = 0;
9f10e5bf 582 pr_info("IOMMU enabled\n");
0cd5c3c8 583 } else if (!strncmp(str, "off", 3)) {
ba395927 584 dmar_disabled = 1;
9f10e5bf 585 pr_info("IOMMU disabled\n");
ba395927
KA
586 } else if (!strncmp(str, "igfx_off", 8)) {
587 dmar_map_gfx = 0;
9f10e5bf 588 pr_info("Disable GFX device mapping\n");
7d3b03ce 589 } else if (!strncmp(str, "forcedac", 8)) {
9f10e5bf 590 pr_info("Forcing DAC for PCI devices\n");
7d3b03ce 591 dmar_forcedac = 1;
5e0d2a6f 592 } else if (!strncmp(str, "strict", 6)) {
9f10e5bf 593 pr_info("Disable batched IOTLB flush\n");
5e0d2a6f 594 intel_iommu_strict = 1;
6dd9a7c7 595 } else if (!strncmp(str, "sp_off", 6)) {
9f10e5bf 596 pr_info("Disable supported super page\n");
6dd9a7c7 597 intel_iommu_superpage = 0;
c83b2f20
DW
598 } else if (!strncmp(str, "ecs_off", 7)) {
599 printk(KERN_INFO
600 "Intel-IOMMU: disable extended context table support\n");
601 intel_iommu_ecs = 0;
ae853ddb
DW
602 } else if (!strncmp(str, "pasid28", 7)) {
603 printk(KERN_INFO
604 "Intel-IOMMU: enable pre-production PASID support\n");
605 intel_iommu_pasid28 = 1;
606 iommu_identity_mapping |= IDENTMAP_GFX;
ba395927
KA
607 }
608
609 str += strcspn(str, ",");
610 while (*str == ',')
611 str++;
612 }
613 return 0;
614}
615__setup("intel_iommu=", intel_iommu_setup);
616
617static struct kmem_cache *iommu_domain_cache;
618static struct kmem_cache *iommu_devinfo_cache;
ba395927 619
9452d5bf
JR
620static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
621{
8bf47816
JR
622 struct dmar_domain **domains;
623 int idx = did >> 8;
624
625 domains = iommu->domains[idx];
626 if (!domains)
627 return NULL;
628
629 return domains[did & 0xff];
9452d5bf
JR
630}
631
632static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
633 struct dmar_domain *domain)
634{
8bf47816
JR
635 struct dmar_domain **domains;
636 int idx = did >> 8;
637
638 if (!iommu->domains[idx]) {
639 size_t size = 256 * sizeof(struct dmar_domain *);
640 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
641 }
642
643 domains = iommu->domains[idx];
644 if (WARN_ON(!domains))
645 return;
646 else
647 domains[did & 0xff] = domain;
9452d5bf
JR
648}
649
4c923d47 650static inline void *alloc_pgtable_page(int node)
eb3fa7cb 651{
4c923d47
SS
652 struct page *page;
653 void *vaddr = NULL;
eb3fa7cb 654
4c923d47
SS
655 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
656 if (page)
657 vaddr = page_address(page);
eb3fa7cb 658 return vaddr;
ba395927
KA
659}
660
661static inline void free_pgtable_page(void *vaddr)
662{
663 free_page((unsigned long)vaddr);
664}
665
666static inline void *alloc_domain_mem(void)
667{
354bb65e 668 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
669}
670
38717946 671static void free_domain_mem(void *vaddr)
ba395927
KA
672{
673 kmem_cache_free(iommu_domain_cache, vaddr);
674}
675
676static inline void * alloc_devinfo_mem(void)
677{
354bb65e 678 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
679}
680
681static inline void free_devinfo_mem(void *vaddr)
682{
683 kmem_cache_free(iommu_devinfo_cache, vaddr);
684}
685
ab8dfe25
JL
686static inline int domain_type_is_vm(struct dmar_domain *domain)
687{
688 return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
689}
690
28ccce0d
JR
691static inline int domain_type_is_si(struct dmar_domain *domain)
692{
693 return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
694}
695
ab8dfe25
JL
696static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
697{
698 return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
699 DOMAIN_FLAG_STATIC_IDENTITY);
700}
1b573683 701
162d1b10
JL
702static inline int domain_pfn_supported(struct dmar_domain *domain,
703 unsigned long pfn)
704{
705 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
706
707 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
708}
709
4ed0d3e6 710static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
711{
712 unsigned long sagaw;
713 int agaw = -1;
714
715 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 716 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
717 agaw >= 0; agaw--) {
718 if (test_bit(agaw, &sagaw))
719 break;
720 }
721
722 return agaw;
723}
724
4ed0d3e6
FY
725/*
726 * Calculate max SAGAW for each iommu.
727 */
728int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
729{
730 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
731}
732
733/*
734 * calculate agaw for each iommu.
735 * "SAGAW" may be different across iommus, use a default agaw, and
736 * get a supported less agaw for iommus that don't support the default agaw.
737 */
738int iommu_calculate_agaw(struct intel_iommu *iommu)
739{
740 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
741}
742
2c2e2c38 743/* This functionin only returns single iommu in a domain */
8c11e798
WH
744static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
745{
746 int iommu_id;
747
2c2e2c38 748 /* si_domain and vm domain should not get here. */
ab8dfe25 749 BUG_ON(domain_type_is_vm_or_si(domain));
29a27719
JR
750 for_each_domain_iommu(iommu_id, domain)
751 break;
752
8c11e798
WH
753 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
754 return NULL;
755
756 return g_iommus[iommu_id];
757}
758
8e604097
WH
759static void domain_update_iommu_coherency(struct dmar_domain *domain)
760{
d0501960
DW
761 struct dmar_drhd_unit *drhd;
762 struct intel_iommu *iommu;
2f119c78
QL
763 bool found = false;
764 int i;
2e12bc29 765
d0501960 766 domain->iommu_coherency = 1;
8e604097 767
29a27719 768 for_each_domain_iommu(i, domain) {
2f119c78 769 found = true;
8e604097
WH
770 if (!ecap_coherent(g_iommus[i]->ecap)) {
771 domain->iommu_coherency = 0;
772 break;
773 }
8e604097 774 }
d0501960
DW
775 if (found)
776 return;
777
778 /* No hardware attached; use lowest common denominator */
779 rcu_read_lock();
780 for_each_active_iommu(iommu, drhd) {
781 if (!ecap_coherent(iommu->ecap)) {
782 domain->iommu_coherency = 0;
783 break;
784 }
785 }
786 rcu_read_unlock();
8e604097
WH
787}
788
161f6934 789static int domain_update_iommu_snooping(struct intel_iommu *skip)
58c610bd 790{
161f6934
JL
791 struct dmar_drhd_unit *drhd;
792 struct intel_iommu *iommu;
793 int ret = 1;
58c610bd 794
161f6934
JL
795 rcu_read_lock();
796 for_each_active_iommu(iommu, drhd) {
797 if (iommu != skip) {
798 if (!ecap_sc_support(iommu->ecap)) {
799 ret = 0;
800 break;
801 }
58c610bd 802 }
58c610bd 803 }
161f6934
JL
804 rcu_read_unlock();
805
806 return ret;
58c610bd
SY
807}
808
161f6934 809static int domain_update_iommu_superpage(struct intel_iommu *skip)
6dd9a7c7 810{
8140a95d 811 struct dmar_drhd_unit *drhd;
161f6934 812 struct intel_iommu *iommu;
8140a95d 813 int mask = 0xf;
6dd9a7c7
YS
814
815 if (!intel_iommu_superpage) {
161f6934 816 return 0;
6dd9a7c7
YS
817 }
818
8140a95d 819 /* set iommu_superpage to the smallest common denominator */
0e242612 820 rcu_read_lock();
8140a95d 821 for_each_active_iommu(iommu, drhd) {
161f6934
JL
822 if (iommu != skip) {
823 mask &= cap_super_page_val(iommu->cap);
824 if (!mask)
825 break;
6dd9a7c7
YS
826 }
827 }
0e242612
JL
828 rcu_read_unlock();
829
161f6934 830 return fls(mask);
6dd9a7c7
YS
831}
832
58c610bd
SY
833/* Some capabilities may be different across iommus */
834static void domain_update_iommu_cap(struct dmar_domain *domain)
835{
836 domain_update_iommu_coherency(domain);
161f6934
JL
837 domain->iommu_snooping = domain_update_iommu_snooping(NULL);
838 domain->iommu_superpage = domain_update_iommu_superpage(NULL);
58c610bd
SY
839}
840
03ecc32c
DW
841static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu,
842 u8 bus, u8 devfn, int alloc)
843{
844 struct root_entry *root = &iommu->root_entry[bus];
845 struct context_entry *context;
846 u64 *entry;
847
4df4eab1 848 entry = &root->lo;
c83b2f20 849 if (ecs_enabled(iommu)) {
03ecc32c
DW
850 if (devfn >= 0x80) {
851 devfn -= 0x80;
852 entry = &root->hi;
853 }
854 devfn *= 2;
855 }
03ecc32c
DW
856 if (*entry & 1)
857 context = phys_to_virt(*entry & VTD_PAGE_MASK);
858 else {
859 unsigned long phy_addr;
860 if (!alloc)
861 return NULL;
862
863 context = alloc_pgtable_page(iommu->node);
864 if (!context)
865 return NULL;
866
867 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
868 phy_addr = virt_to_phys((void *)context);
869 *entry = phy_addr | 1;
870 __iommu_flush_cache(iommu, entry, sizeof(*entry));
871 }
872 return &context[devfn];
873}
874
4ed6a540
DW
875static int iommu_dummy(struct device *dev)
876{
877 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
878}
879
156baca8 880static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
c7151a8d
WH
881{
882 struct dmar_drhd_unit *drhd = NULL;
b683b230 883 struct intel_iommu *iommu;
156baca8
DW
884 struct device *tmp;
885 struct pci_dev *ptmp, *pdev = NULL;
aa4d066a 886 u16 segment = 0;
c7151a8d
WH
887 int i;
888
4ed6a540
DW
889 if (iommu_dummy(dev))
890 return NULL;
891
156baca8
DW
892 if (dev_is_pci(dev)) {
893 pdev = to_pci_dev(dev);
894 segment = pci_domain_nr(pdev->bus);
ca5b74d2 895 } else if (has_acpi_companion(dev))
156baca8
DW
896 dev = &ACPI_COMPANION(dev)->dev;
897
0e242612 898 rcu_read_lock();
b683b230 899 for_each_active_iommu(iommu, drhd) {
156baca8 900 if (pdev && segment != drhd->segment)
276dbf99 901 continue;
c7151a8d 902
b683b230 903 for_each_active_dev_scope(drhd->devices,
156baca8
DW
904 drhd->devices_cnt, i, tmp) {
905 if (tmp == dev) {
906 *bus = drhd->devices[i].bus;
907 *devfn = drhd->devices[i].devfn;
b683b230 908 goto out;
156baca8
DW
909 }
910
911 if (!pdev || !dev_is_pci(tmp))
912 continue;
913
914 ptmp = to_pci_dev(tmp);
915 if (ptmp->subordinate &&
916 ptmp->subordinate->number <= pdev->bus->number &&
917 ptmp->subordinate->busn_res.end >= pdev->bus->number)
918 goto got_pdev;
924b6231 919 }
c7151a8d 920
156baca8
DW
921 if (pdev && drhd->include_all) {
922 got_pdev:
923 *bus = pdev->bus->number;
924 *devfn = pdev->devfn;
b683b230 925 goto out;
156baca8 926 }
c7151a8d 927 }
b683b230 928 iommu = NULL;
156baca8 929 out:
0e242612 930 rcu_read_unlock();
c7151a8d 931
b683b230 932 return iommu;
c7151a8d
WH
933}
934
5331fe6f
WH
935static void domain_flush_cache(struct dmar_domain *domain,
936 void *addr, int size)
937{
938 if (!domain->iommu_coherency)
939 clflush_cache_range(addr, size);
940}
941
ba395927
KA
942static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
943{
ba395927 944 struct context_entry *context;
03ecc32c 945 int ret = 0;
ba395927
KA
946 unsigned long flags;
947
948 spin_lock_irqsave(&iommu->lock, flags);
03ecc32c
DW
949 context = iommu_context_addr(iommu, bus, devfn, 0);
950 if (context)
951 ret = context_present(context);
ba395927
KA
952 spin_unlock_irqrestore(&iommu->lock, flags);
953 return ret;
954}
955
956static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
957{
ba395927
KA
958 struct context_entry *context;
959 unsigned long flags;
960
961 spin_lock_irqsave(&iommu->lock, flags);
03ecc32c 962 context = iommu_context_addr(iommu, bus, devfn, 0);
ba395927 963 if (context) {
03ecc32c
DW
964 context_clear_entry(context);
965 __iommu_flush_cache(iommu, context, sizeof(*context));
ba395927
KA
966 }
967 spin_unlock_irqrestore(&iommu->lock, flags);
968}
969
970static void free_context_table(struct intel_iommu *iommu)
971{
ba395927
KA
972 int i;
973 unsigned long flags;
974 struct context_entry *context;
975
976 spin_lock_irqsave(&iommu->lock, flags);
977 if (!iommu->root_entry) {
978 goto out;
979 }
980 for (i = 0; i < ROOT_ENTRY_NR; i++) {
03ecc32c 981 context = iommu_context_addr(iommu, i, 0, 0);
ba395927
KA
982 if (context)
983 free_pgtable_page(context);
03ecc32c 984
c83b2f20 985 if (!ecs_enabled(iommu))
03ecc32c
DW
986 continue;
987
988 context = iommu_context_addr(iommu, i, 0x80, 0);
989 if (context)
990 free_pgtable_page(context);
991
ba395927
KA
992 }
993 free_pgtable_page(iommu->root_entry);
994 iommu->root_entry = NULL;
995out:
996 spin_unlock_irqrestore(&iommu->lock, flags);
997}
998
b026fd28 999static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
5cf0a76f 1000 unsigned long pfn, int *target_level)
ba395927 1001{
ba395927
KA
1002 struct dma_pte *parent, *pte = NULL;
1003 int level = agaw_to_level(domain->agaw);
4399c8bf 1004 int offset;
ba395927
KA
1005
1006 BUG_ON(!domain->pgd);
f9423606 1007
162d1b10 1008 if (!domain_pfn_supported(domain, pfn))
f9423606
JS
1009 /* Address beyond IOMMU's addressing capabilities. */
1010 return NULL;
1011
ba395927
KA
1012 parent = domain->pgd;
1013
5cf0a76f 1014 while (1) {
ba395927
KA
1015 void *tmp_page;
1016
b026fd28 1017 offset = pfn_level_offset(pfn, level);
ba395927 1018 pte = &parent[offset];
5cf0a76f 1019 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
6dd9a7c7 1020 break;
5cf0a76f 1021 if (level == *target_level)
ba395927
KA
1022 break;
1023
19c239ce 1024 if (!dma_pte_present(pte)) {
c85994e4
DW
1025 uint64_t pteval;
1026
4c923d47 1027 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 1028
206a73c1 1029 if (!tmp_page)
ba395927 1030 return NULL;
206a73c1 1031
c85994e4 1032 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 1033 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
effad4b5 1034 if (cmpxchg64(&pte->val, 0ULL, pteval))
c85994e4
DW
1035 /* Someone else set it while we were thinking; use theirs. */
1036 free_pgtable_page(tmp_page);
effad4b5 1037 else
c85994e4 1038 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927 1039 }
5cf0a76f
DW
1040 if (level == 1)
1041 break;
1042
19c239ce 1043 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
1044 level--;
1045 }
1046
5cf0a76f
DW
1047 if (!*target_level)
1048 *target_level = level;
1049
ba395927
KA
1050 return pte;
1051}
1052
6dd9a7c7 1053
ba395927 1054/* return address's pte at specific level */
90dcfb5e
DW
1055static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
1056 unsigned long pfn,
6dd9a7c7 1057 int level, int *large_page)
ba395927
KA
1058{
1059 struct dma_pte *parent, *pte = NULL;
1060 int total = agaw_to_level(domain->agaw);
1061 int offset;
1062
1063 parent = domain->pgd;
1064 while (level <= total) {
90dcfb5e 1065 offset = pfn_level_offset(pfn, total);
ba395927
KA
1066 pte = &parent[offset];
1067 if (level == total)
1068 return pte;
1069
6dd9a7c7
YS
1070 if (!dma_pte_present(pte)) {
1071 *large_page = total;
ba395927 1072 break;
6dd9a7c7
YS
1073 }
1074
e16922af 1075 if (dma_pte_superpage(pte)) {
6dd9a7c7
YS
1076 *large_page = total;
1077 return pte;
1078 }
1079
19c239ce 1080 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
1081 total--;
1082 }
1083 return NULL;
1084}
1085
ba395927 1086/* clear last level pte, a tlb flush should be followed */
5cf0a76f 1087static void dma_pte_clear_range(struct dmar_domain *domain,
595badf5
DW
1088 unsigned long start_pfn,
1089 unsigned long last_pfn)
ba395927 1090{
6dd9a7c7 1091 unsigned int large_page = 1;
310a5ab9 1092 struct dma_pte *first_pte, *pte;
66eae846 1093
162d1b10
JL
1094 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1095 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 1096 BUG_ON(start_pfn > last_pfn);
ba395927 1097
04b18e65 1098 /* we don't need lock here; nobody else touches the iova range */
59c36286 1099 do {
6dd9a7c7
YS
1100 large_page = 1;
1101 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
310a5ab9 1102 if (!pte) {
6dd9a7c7 1103 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
310a5ab9
DW
1104 continue;
1105 }
6dd9a7c7 1106 do {
310a5ab9 1107 dma_clear_pte(pte);
6dd9a7c7 1108 start_pfn += lvl_to_nr_pages(large_page);
310a5ab9 1109 pte++;
75e6bf96
DW
1110 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
1111
310a5ab9
DW
1112 domain_flush_cache(domain, first_pte,
1113 (void *)pte - (void *)first_pte);
59c36286
DW
1114
1115 } while (start_pfn && start_pfn <= last_pfn);
ba395927
KA
1116}
1117
3269ee0b
AW
1118static void dma_pte_free_level(struct dmar_domain *domain, int level,
1119 struct dma_pte *pte, unsigned long pfn,
1120 unsigned long start_pfn, unsigned long last_pfn)
1121{
1122 pfn = max(start_pfn, pfn);
1123 pte = &pte[pfn_level_offset(pfn, level)];
1124
1125 do {
1126 unsigned long level_pfn;
1127 struct dma_pte *level_pte;
1128
1129 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1130 goto next;
1131
1132 level_pfn = pfn & level_mask(level - 1);
1133 level_pte = phys_to_virt(dma_pte_addr(pte));
1134
1135 if (level > 2)
1136 dma_pte_free_level(domain, level - 1, level_pte,
1137 level_pfn, start_pfn, last_pfn);
1138
1139 /* If range covers entire pagetable, free it */
1140 if (!(start_pfn > level_pfn ||
08336fd2 1141 last_pfn < level_pfn + level_size(level) - 1)) {
3269ee0b
AW
1142 dma_clear_pte(pte);
1143 domain_flush_cache(domain, pte, sizeof(*pte));
1144 free_pgtable_page(level_pte);
1145 }
1146next:
1147 pfn += level_size(level);
1148 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1149}
1150
ba395927
KA
1151/* free page table pages. last level pte should already be cleared */
1152static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b
DW
1153 unsigned long start_pfn,
1154 unsigned long last_pfn)
ba395927 1155{
162d1b10
JL
1156 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1157 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 1158 BUG_ON(start_pfn > last_pfn);
ba395927 1159
d41a4adb
JL
1160 dma_pte_clear_range(domain, start_pfn, last_pfn);
1161
f3a0a52f 1162 /* We don't need lock here; nobody else touches the iova range */
3269ee0b
AW
1163 dma_pte_free_level(domain, agaw_to_level(domain->agaw),
1164 domain->pgd, 0, start_pfn, last_pfn);
6660c63a 1165
ba395927 1166 /* free pgd */
d794dc9b 1167 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
1168 free_pgtable_page(domain->pgd);
1169 domain->pgd = NULL;
1170 }
1171}
1172
ea8ea460
DW
1173/* When a page at a given level is being unlinked from its parent, we don't
1174 need to *modify* it at all. All we need to do is make a list of all the
1175 pages which can be freed just as soon as we've flushed the IOTLB and we
1176 know the hardware page-walk will no longer touch them.
1177 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1178 be freed. */
1179static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1180 int level, struct dma_pte *pte,
1181 struct page *freelist)
1182{
1183 struct page *pg;
1184
1185 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1186 pg->freelist = freelist;
1187 freelist = pg;
1188
1189 if (level == 1)
1190 return freelist;
1191
adeb2590
JL
1192 pte = page_address(pg);
1193 do {
ea8ea460
DW
1194 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1195 freelist = dma_pte_list_pagetables(domain, level - 1,
1196 pte, freelist);
adeb2590
JL
1197 pte++;
1198 } while (!first_pte_in_page(pte));
ea8ea460
DW
1199
1200 return freelist;
1201}
1202
1203static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1204 struct dma_pte *pte, unsigned long pfn,
1205 unsigned long start_pfn,
1206 unsigned long last_pfn,
1207 struct page *freelist)
1208{
1209 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1210
1211 pfn = max(start_pfn, pfn);
1212 pte = &pte[pfn_level_offset(pfn, level)];
1213
1214 do {
1215 unsigned long level_pfn;
1216
1217 if (!dma_pte_present(pte))
1218 goto next;
1219
1220 level_pfn = pfn & level_mask(level);
1221
1222 /* If range covers entire pagetable, free it */
1223 if (start_pfn <= level_pfn &&
1224 last_pfn >= level_pfn + level_size(level) - 1) {
1225 /* These suborbinate page tables are going away entirely. Don't
1226 bother to clear them; we're just going to *free* them. */
1227 if (level > 1 && !dma_pte_superpage(pte))
1228 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1229
1230 dma_clear_pte(pte);
1231 if (!first_pte)
1232 first_pte = pte;
1233 last_pte = pte;
1234 } else if (level > 1) {
1235 /* Recurse down into a level that isn't *entirely* obsolete */
1236 freelist = dma_pte_clear_level(domain, level - 1,
1237 phys_to_virt(dma_pte_addr(pte)),
1238 level_pfn, start_pfn, last_pfn,
1239 freelist);
1240 }
1241next:
1242 pfn += level_size(level);
1243 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1244
1245 if (first_pte)
1246 domain_flush_cache(domain, first_pte,
1247 (void *)++last_pte - (void *)first_pte);
1248
1249 return freelist;
1250}
1251
1252/* We can't just free the pages because the IOMMU may still be walking
1253 the page tables, and may have cached the intermediate levels. The
1254 pages can only be freed after the IOTLB flush has been done. */
b690420a
JR
1255static struct page *domain_unmap(struct dmar_domain *domain,
1256 unsigned long start_pfn,
1257 unsigned long last_pfn)
ea8ea460 1258{
ea8ea460
DW
1259 struct page *freelist = NULL;
1260
162d1b10
JL
1261 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1262 BUG_ON(!domain_pfn_supported(domain, last_pfn));
ea8ea460
DW
1263 BUG_ON(start_pfn > last_pfn);
1264
1265 /* we don't need lock here; nobody else touches the iova range */
1266 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1267 domain->pgd, 0, start_pfn, last_pfn, NULL);
1268
1269 /* free pgd */
1270 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1271 struct page *pgd_page = virt_to_page(domain->pgd);
1272 pgd_page->freelist = freelist;
1273 freelist = pgd_page;
1274
1275 domain->pgd = NULL;
1276 }
1277
1278 return freelist;
1279}
1280
b690420a 1281static void dma_free_pagelist(struct page *freelist)
ea8ea460
DW
1282{
1283 struct page *pg;
1284
1285 while ((pg = freelist)) {
1286 freelist = pg->freelist;
1287 free_pgtable_page(page_address(pg));
1288 }
1289}
1290
ba395927
KA
1291/* iommu handling */
1292static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1293{
1294 struct root_entry *root;
1295 unsigned long flags;
1296
4c923d47 1297 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ffebeb46 1298 if (!root) {
9f10e5bf 1299 pr_err("Allocating root entry for %s failed\n",
ffebeb46 1300 iommu->name);
ba395927 1301 return -ENOMEM;
ffebeb46 1302 }
ba395927 1303
5b6985ce 1304 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
1305
1306 spin_lock_irqsave(&iommu->lock, flags);
1307 iommu->root_entry = root;
1308 spin_unlock_irqrestore(&iommu->lock, flags);
1309
1310 return 0;
1311}
1312
ba395927
KA
1313static void iommu_set_root_entry(struct intel_iommu *iommu)
1314{
03ecc32c 1315 u64 addr;
c416daa9 1316 u32 sts;
ba395927
KA
1317 unsigned long flag;
1318
03ecc32c 1319 addr = virt_to_phys(iommu->root_entry);
c83b2f20 1320 if (ecs_enabled(iommu))
03ecc32c 1321 addr |= DMA_RTADDR_RTT;
ba395927 1322
1f5b3c3f 1323 raw_spin_lock_irqsave(&iommu->register_lock, flag);
03ecc32c 1324 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
ba395927 1325
c416daa9 1326 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1327
1328 /* Make sure hardware complete it */
1329 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1330 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927 1331
1f5b3c3f 1332 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1333}
1334
1335static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1336{
1337 u32 val;
1338 unsigned long flag;
1339
9af88143 1340 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 1341 return;
ba395927 1342
1f5b3c3f 1343 raw_spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 1344 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1345
1346 /* Make sure hardware complete it */
1347 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1348 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927 1349
1f5b3c3f 1350 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1351}
1352
1353/* return value determine if we need a write buffer flush */
4c25a2c1
DW
1354static void __iommu_flush_context(struct intel_iommu *iommu,
1355 u16 did, u16 source_id, u8 function_mask,
1356 u64 type)
ba395927
KA
1357{
1358 u64 val = 0;
1359 unsigned long flag;
1360
ba395927
KA
1361 switch (type) {
1362 case DMA_CCMD_GLOBAL_INVL:
1363 val = DMA_CCMD_GLOBAL_INVL;
1364 break;
1365 case DMA_CCMD_DOMAIN_INVL:
1366 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1367 break;
1368 case DMA_CCMD_DEVICE_INVL:
1369 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1370 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1371 break;
1372 default:
1373 BUG();
1374 }
1375 val |= DMA_CCMD_ICC;
1376
1f5b3c3f 1377 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1378 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1379
1380 /* Make sure hardware complete it */
1381 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1382 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1383
1f5b3c3f 1384 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1385}
1386
ba395927 1387/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
1388static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1389 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
1390{
1391 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1392 u64 val = 0, val_iva = 0;
1393 unsigned long flag;
1394
ba395927
KA
1395 switch (type) {
1396 case DMA_TLB_GLOBAL_FLUSH:
1397 /* global flush doesn't need set IVA_REG */
1398 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1399 break;
1400 case DMA_TLB_DSI_FLUSH:
1401 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1402 break;
1403 case DMA_TLB_PSI_FLUSH:
1404 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
ea8ea460 1405 /* IH bit is passed in as part of address */
ba395927
KA
1406 val_iva = size_order | addr;
1407 break;
1408 default:
1409 BUG();
1410 }
1411 /* Note: set drain read/write */
1412#if 0
1413 /*
1414 * This is probably to be super secure.. Looks like we can
1415 * ignore it without any impact.
1416 */
1417 if (cap_read_drain(iommu->cap))
1418 val |= DMA_TLB_READ_DRAIN;
1419#endif
1420 if (cap_write_drain(iommu->cap))
1421 val |= DMA_TLB_WRITE_DRAIN;
1422
1f5b3c3f 1423 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1424 /* Note: Only uses first TLB reg currently */
1425 if (val_iva)
1426 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1427 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1428
1429 /* Make sure hardware complete it */
1430 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1431 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1432
1f5b3c3f 1433 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1434
1435 /* check IOTLB invalidation granularity */
1436 if (DMA_TLB_IAIG(val) == 0)
9f10e5bf 1437 pr_err("Flush IOTLB failed\n");
ba395927 1438 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
9f10e5bf 1439 pr_debug("TLB flush request %Lx, actual %Lx\n",
5b6985ce
FY
1440 (unsigned long long)DMA_TLB_IIRG(type),
1441 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1442}
1443
64ae892b
DW
1444static struct device_domain_info *
1445iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1446 u8 bus, u8 devfn)
93a23a72 1447{
93a23a72 1448 struct device_domain_info *info;
93a23a72 1449
55d94043
JR
1450 assert_spin_locked(&device_domain_lock);
1451
93a23a72
YZ
1452 if (!iommu->qi)
1453 return NULL;
1454
93a23a72 1455 list_for_each_entry(info, &domain->devices, link)
c3b497c6
JL
1456 if (info->iommu == iommu && info->bus == bus &&
1457 info->devfn == devfn) {
b16d0cb9
DW
1458 if (info->ats_supported && info->dev)
1459 return info;
93a23a72
YZ
1460 break;
1461 }
93a23a72 1462
b16d0cb9 1463 return NULL;
93a23a72
YZ
1464}
1465
1466static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1467{
fb0cc3aa
BH
1468 struct pci_dev *pdev;
1469
0bcb3e28 1470 if (!info || !dev_is_pci(info->dev))
93a23a72
YZ
1471 return;
1472
fb0cc3aa 1473 pdev = to_pci_dev(info->dev);
fb0cc3aa 1474
b16d0cb9
DW
1475#ifdef CONFIG_INTEL_IOMMU_SVM
1476 /* The PCIe spec, in its wisdom, declares that the behaviour of
1477 the device if you enable PASID support after ATS support is
1478 undefined. So always enable PASID support on devices which
1479 have it, even if we can't yet know if we're ever going to
1480 use it. */
1481 if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1482 info->pasid_enabled = 1;
1483
1484 if (info->pri_supported && !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
1485 info->pri_enabled = 1;
1486#endif
1487 if (info->ats_supported && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
1488 info->ats_enabled = 1;
1489 info->ats_qdep = pci_ats_queue_depth(pdev);
1490 }
93a23a72
YZ
1491}
1492
1493static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1494{
b16d0cb9
DW
1495 struct pci_dev *pdev;
1496
da972fb1 1497 if (!dev_is_pci(info->dev))
93a23a72
YZ
1498 return;
1499
b16d0cb9
DW
1500 pdev = to_pci_dev(info->dev);
1501
1502 if (info->ats_enabled) {
1503 pci_disable_ats(pdev);
1504 info->ats_enabled = 0;
1505 }
1506#ifdef CONFIG_INTEL_IOMMU_SVM
1507 if (info->pri_enabled) {
1508 pci_disable_pri(pdev);
1509 info->pri_enabled = 0;
1510 }
1511 if (info->pasid_enabled) {
1512 pci_disable_pasid(pdev);
1513 info->pasid_enabled = 0;
1514 }
1515#endif
93a23a72
YZ
1516}
1517
1518static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1519 u64 addr, unsigned mask)
1520{
1521 u16 sid, qdep;
1522 unsigned long flags;
1523 struct device_domain_info *info;
1524
1525 spin_lock_irqsave(&device_domain_lock, flags);
1526 list_for_each_entry(info, &domain->devices, link) {
b16d0cb9 1527 if (!info->ats_enabled)
93a23a72
YZ
1528 continue;
1529
1530 sid = info->bus << 8 | info->devfn;
b16d0cb9 1531 qdep = info->ats_qdep;
93a23a72
YZ
1532 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1533 }
1534 spin_unlock_irqrestore(&device_domain_lock, flags);
1535}
1536
a1ddcbe9
JR
1537static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1538 struct dmar_domain *domain,
1539 unsigned long pfn, unsigned int pages,
1540 int ih, int map)
ba395927 1541{
9dd2fe89 1542 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1543 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
a1ddcbe9 1544 u16 did = domain->iommu_did[iommu->seq_id];
ba395927 1545
ba395927
KA
1546 BUG_ON(pages == 0);
1547
ea8ea460
DW
1548 if (ih)
1549 ih = 1 << 6;
ba395927 1550 /*
9dd2fe89
YZ
1551 * Fallback to domain selective flush if no PSI support or the size is
1552 * too big.
ba395927
KA
1553 * PSI requires page size to be 2 ^ x, and the base address is naturally
1554 * aligned to the size
1555 */
9dd2fe89
YZ
1556 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1557 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1558 DMA_TLB_DSI_FLUSH);
9dd2fe89 1559 else
ea8ea460 1560 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
9dd2fe89 1561 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1562
1563 /*
82653633
NA
1564 * In caching mode, changes of pages from non-present to present require
1565 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1566 */
82653633 1567 if (!cap_caching_mode(iommu->cap) || !map)
9452d5bf
JR
1568 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1569 addr, mask);
ba395927
KA
1570}
1571
f8bab735 1572static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1573{
1574 u32 pmen;
1575 unsigned long flags;
1576
1f5b3c3f 1577 raw_spin_lock_irqsave(&iommu->register_lock, flags);
f8bab735 1578 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1579 pmen &= ~DMA_PMEN_EPM;
1580 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1581
1582 /* wait for the protected region status bit to clear */
1583 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1584 readl, !(pmen & DMA_PMEN_PRS), pmen);
1585
1f5b3c3f 1586 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
f8bab735 1587}
1588
2a41ccee 1589static void iommu_enable_translation(struct intel_iommu *iommu)
ba395927
KA
1590{
1591 u32 sts;
1592 unsigned long flags;
1593
1f5b3c3f 1594 raw_spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1595 iommu->gcmd |= DMA_GCMD_TE;
1596 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1597
1598 /* Make sure hardware complete it */
1599 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1600 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1601
1f5b3c3f 1602 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
ba395927
KA
1603}
1604
2a41ccee 1605static void iommu_disable_translation(struct intel_iommu *iommu)
ba395927
KA
1606{
1607 u32 sts;
1608 unsigned long flag;
1609
1f5b3c3f 1610 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1611 iommu->gcmd &= ~DMA_GCMD_TE;
1612 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1613
1614 /* Make sure hardware complete it */
1615 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1616 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927 1617
1f5b3c3f 1618 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1619}
1620
3460a6d9 1621
ba395927
KA
1622static int iommu_init_domains(struct intel_iommu *iommu)
1623{
8bf47816
JR
1624 u32 ndomains, nlongs;
1625 size_t size;
ba395927
KA
1626
1627 ndomains = cap_ndoms(iommu->cap);
8bf47816 1628 pr_debug("%s: Number of Domains supported <%d>\n",
9f10e5bf 1629 iommu->name, ndomains);
ba395927
KA
1630 nlongs = BITS_TO_LONGS(ndomains);
1631
94a91b50
DD
1632 spin_lock_init(&iommu->lock);
1633
ba395927
KA
1634 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1635 if (!iommu->domain_ids) {
9f10e5bf
JR
1636 pr_err("%s: Allocating domain id array failed\n",
1637 iommu->name);
ba395927
KA
1638 return -ENOMEM;
1639 }
8bf47816
JR
1640
1641 size = ((ndomains >> 8) + 1) * sizeof(struct dmar_domain **);
1642 iommu->domains = kzalloc(size, GFP_KERNEL);
1643
1644 if (iommu->domains) {
1645 size = 256 * sizeof(struct dmar_domain *);
1646 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1647 }
1648
1649 if (!iommu->domains || !iommu->domains[0]) {
9f10e5bf
JR
1650 pr_err("%s: Allocating domain array failed\n",
1651 iommu->name);
852bdb04 1652 kfree(iommu->domain_ids);
8bf47816 1653 kfree(iommu->domains);
852bdb04 1654 iommu->domain_ids = NULL;
8bf47816 1655 iommu->domains = NULL;
ba395927
KA
1656 return -ENOMEM;
1657 }
1658
8bf47816
JR
1659
1660
ba395927 1661 /*
c0e8a6c8
JR
1662 * If Caching mode is set, then invalid translations are tagged
1663 * with domain-id 0, hence we need to pre-allocate it. We also
1664 * use domain-id 0 as a marker for non-allocated domain-id, so
1665 * make sure it is not used for a real domain.
ba395927 1666 */
c0e8a6c8
JR
1667 set_bit(0, iommu->domain_ids);
1668
ba395927
KA
1669 return 0;
1670}
ba395927 1671
ffebeb46 1672static void disable_dmar_iommu(struct intel_iommu *iommu)
ba395927 1673{
29a27719 1674 struct device_domain_info *info, *tmp;
55d94043 1675 unsigned long flags;
ba395927 1676
29a27719
JR
1677 if (!iommu->domains || !iommu->domain_ids)
1678 return;
a4eaa86c 1679
55d94043 1680 spin_lock_irqsave(&device_domain_lock, flags);
29a27719
JR
1681 list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1682 struct dmar_domain *domain;
1683
1684 if (info->iommu != iommu)
1685 continue;
1686
1687 if (!info->dev || !info->domain)
1688 continue;
1689
1690 domain = info->domain;
1691
e6de0f8d 1692 dmar_remove_one_dev_info(domain, info->dev);
29a27719
JR
1693
1694 if (!domain_type_is_vm_or_si(domain))
1695 domain_exit(domain);
ba395927 1696 }
55d94043 1697 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927
KA
1698
1699 if (iommu->gcmd & DMA_GCMD_TE)
1700 iommu_disable_translation(iommu);
ffebeb46 1701}
ba395927 1702
ffebeb46
JL
1703static void free_dmar_iommu(struct intel_iommu *iommu)
1704{
1705 if ((iommu->domains) && (iommu->domain_ids)) {
8bf47816
JR
1706 int elems = (cap_ndoms(iommu->cap) >> 8) + 1;
1707 int i;
1708
1709 for (i = 0; i < elems; i++)
1710 kfree(iommu->domains[i]);
ffebeb46
JL
1711 kfree(iommu->domains);
1712 kfree(iommu->domain_ids);
1713 iommu->domains = NULL;
1714 iommu->domain_ids = NULL;
1715 }
ba395927 1716
d9630fe9
WH
1717 g_iommus[iommu->seq_id] = NULL;
1718
ba395927
KA
1719 /* free context mapping */
1720 free_context_table(iommu);
8a94ade4
DW
1721
1722#ifdef CONFIG_INTEL_IOMMU_SVM
a222a7f0
DW
1723 if (pasid_enabled(iommu)) {
1724 if (ecap_prs(iommu->ecap))
1725 intel_svm_finish_prq(iommu);
8a94ade4 1726 intel_svm_free_pasid_tables(iommu);
a222a7f0 1727 }
8a94ade4 1728#endif
ba395927
KA
1729}
1730
ab8dfe25 1731static struct dmar_domain *alloc_domain(int flags)
ba395927 1732{
ba395927 1733 struct dmar_domain *domain;
ba395927
KA
1734
1735 domain = alloc_domain_mem();
1736 if (!domain)
1737 return NULL;
1738
ab8dfe25 1739 memset(domain, 0, sizeof(*domain));
4c923d47 1740 domain->nid = -1;
ab8dfe25 1741 domain->flags = flags;
92d03cc8 1742 INIT_LIST_HEAD(&domain->devices);
2c2e2c38
FY
1743
1744 return domain;
1745}
1746
d160aca5
JR
1747/* Must be called with iommu->lock */
1748static int domain_attach_iommu(struct dmar_domain *domain,
fb170fb4
JL
1749 struct intel_iommu *iommu)
1750{
44bde614 1751 unsigned long ndomains;
55d94043 1752 int num;
44bde614 1753
55d94043 1754 assert_spin_locked(&device_domain_lock);
d160aca5 1755 assert_spin_locked(&iommu->lock);
ba395927 1756
29a27719
JR
1757 domain->iommu_refcnt[iommu->seq_id] += 1;
1758 domain->iommu_count += 1;
1759 if (domain->iommu_refcnt[iommu->seq_id] == 1) {
fb170fb4 1760 ndomains = cap_ndoms(iommu->cap);
d160aca5
JR
1761 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1762
1763 if (num >= ndomains) {
1764 pr_err("%s: No free domain ids\n", iommu->name);
1765 domain->iommu_refcnt[iommu->seq_id] -= 1;
1766 domain->iommu_count -= 1;
55d94043 1767 return -ENOSPC;
2c2e2c38 1768 }
ba395927 1769
d160aca5
JR
1770 set_bit(num, iommu->domain_ids);
1771 set_iommu_domain(iommu, num, domain);
1772
1773 domain->iommu_did[iommu->seq_id] = num;
1774 domain->nid = iommu->node;
fb170fb4 1775
fb170fb4
JL
1776 domain_update_iommu_cap(domain);
1777 }
d160aca5 1778
55d94043 1779 return 0;
fb170fb4
JL
1780}
1781
1782static int domain_detach_iommu(struct dmar_domain *domain,
1783 struct intel_iommu *iommu)
1784{
d160aca5 1785 int num, count = INT_MAX;
d160aca5 1786
55d94043 1787 assert_spin_locked(&device_domain_lock);
d160aca5 1788 assert_spin_locked(&iommu->lock);
fb170fb4 1789
29a27719
JR
1790 domain->iommu_refcnt[iommu->seq_id] -= 1;
1791 count = --domain->iommu_count;
1792 if (domain->iommu_refcnt[iommu->seq_id] == 0) {
d160aca5
JR
1793 num = domain->iommu_did[iommu->seq_id];
1794 clear_bit(num, iommu->domain_ids);
1795 set_iommu_domain(iommu, num, NULL);
fb170fb4 1796
fb170fb4 1797 domain_update_iommu_cap(domain);
c0e8a6c8 1798 domain->iommu_did[iommu->seq_id] = 0;
fb170fb4 1799 }
fb170fb4
JL
1800
1801 return count;
1802}
1803
ba395927 1804static struct iova_domain reserved_iova_list;
8a443df4 1805static struct lock_class_key reserved_rbtree_key;
ba395927 1806
51a63e67 1807static int dmar_init_reserved_ranges(void)
ba395927
KA
1808{
1809 struct pci_dev *pdev = NULL;
1810 struct iova *iova;
1811 int i;
ba395927 1812
0fb5fe87
RM
1813 init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN,
1814 DMA_32BIT_PFN);
ba395927 1815
8a443df4
MG
1816 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1817 &reserved_rbtree_key);
1818
ba395927
KA
1819 /* IOAPIC ranges shouldn't be accessed by DMA */
1820 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1821 IOVA_PFN(IOAPIC_RANGE_END));
51a63e67 1822 if (!iova) {
9f10e5bf 1823 pr_err("Reserve IOAPIC range failed\n");
51a63e67
JC
1824 return -ENODEV;
1825 }
ba395927
KA
1826
1827 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1828 for_each_pci_dev(pdev) {
1829 struct resource *r;
1830
1831 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1832 r = &pdev->resource[i];
1833 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1834 continue;
1a4a4551
DW
1835 iova = reserve_iova(&reserved_iova_list,
1836 IOVA_PFN(r->start),
1837 IOVA_PFN(r->end));
51a63e67 1838 if (!iova) {
9f10e5bf 1839 pr_err("Reserve iova failed\n");
51a63e67
JC
1840 return -ENODEV;
1841 }
ba395927
KA
1842 }
1843 }
51a63e67 1844 return 0;
ba395927
KA
1845}
1846
1847static void domain_reserve_special_ranges(struct dmar_domain *domain)
1848{
1849 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1850}
1851
1852static inline int guestwidth_to_adjustwidth(int gaw)
1853{
1854 int agaw;
1855 int r = (gaw - 12) % 9;
1856
1857 if (r == 0)
1858 agaw = gaw;
1859 else
1860 agaw = gaw + 9 - r;
1861 if (agaw > 64)
1862 agaw = 64;
1863 return agaw;
1864}
1865
dc534b25
JR
1866static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1867 int guest_width)
ba395927 1868{
ba395927
KA
1869 int adjust_width, agaw;
1870 unsigned long sagaw;
1871
0fb5fe87
RM
1872 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
1873 DMA_32BIT_PFN);
ba395927
KA
1874 domain_reserve_special_ranges(domain);
1875
1876 /* calculate AGAW */
ba395927
KA
1877 if (guest_width > cap_mgaw(iommu->cap))
1878 guest_width = cap_mgaw(iommu->cap);
1879 domain->gaw = guest_width;
1880 adjust_width = guestwidth_to_adjustwidth(guest_width);
1881 agaw = width_to_agaw(adjust_width);
1882 sagaw = cap_sagaw(iommu->cap);
1883 if (!test_bit(agaw, &sagaw)) {
1884 /* hardware doesn't support it, choose a bigger one */
9f10e5bf 1885 pr_debug("Hardware doesn't support agaw %d\n", agaw);
ba395927
KA
1886 agaw = find_next_bit(&sagaw, 5, agaw);
1887 if (agaw >= 5)
1888 return -ENODEV;
1889 }
1890 domain->agaw = agaw;
ba395927 1891
8e604097
WH
1892 if (ecap_coherent(iommu->ecap))
1893 domain->iommu_coherency = 1;
1894 else
1895 domain->iommu_coherency = 0;
1896
58c610bd
SY
1897 if (ecap_sc_support(iommu->ecap))
1898 domain->iommu_snooping = 1;
1899 else
1900 domain->iommu_snooping = 0;
1901
214e39aa
DW
1902 if (intel_iommu_superpage)
1903 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1904 else
1905 domain->iommu_superpage = 0;
1906
4c923d47 1907 domain->nid = iommu->node;
c7151a8d 1908
ba395927 1909 /* always allocate the top pgd */
4c923d47 1910 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
ba395927
KA
1911 if (!domain->pgd)
1912 return -ENOMEM;
5b6985ce 1913 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1914 return 0;
1915}
1916
1917static void domain_exit(struct dmar_domain *domain)
1918{
ea8ea460 1919 struct page *freelist = NULL;
ba395927
KA
1920
1921 /* Domain 0 is reserved, so dont process it */
1922 if (!domain)
1923 return;
1924
7b668357 1925 /* Flush any lazy unmaps that may reference this domain */
aa473240
OP
1926 if (!intel_iommu_strict) {
1927 int cpu;
1928
1929 for_each_possible_cpu(cpu)
1930 flush_unmaps_timeout(cpu);
1931 }
7b668357 1932
d160aca5
JR
1933 /* Remove associated devices and clear attached or cached domains */
1934 rcu_read_lock();
ba395927 1935 domain_remove_dev_info(domain);
d160aca5 1936 rcu_read_unlock();
92d03cc8 1937
ba395927
KA
1938 /* destroy iovas */
1939 put_iova_domain(&domain->iovad);
ba395927 1940
ea8ea460 1941 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927 1942
ea8ea460
DW
1943 dma_free_pagelist(freelist);
1944
ba395927
KA
1945 free_domain_mem(domain);
1946}
1947
64ae892b
DW
1948static int domain_context_mapping_one(struct dmar_domain *domain,
1949 struct intel_iommu *iommu,
28ccce0d 1950 u8 bus, u8 devfn)
ba395927 1951{
c6c2cebd 1952 u16 did = domain->iommu_did[iommu->seq_id];
28ccce0d
JR
1953 int translation = CONTEXT_TT_MULTI_LEVEL;
1954 struct device_domain_info *info = NULL;
ba395927 1955 struct context_entry *context;
ba395927 1956 unsigned long flags;
ea6606b0 1957 struct dma_pte *pgd;
55d94043 1958 int ret, agaw;
28ccce0d 1959
c6c2cebd
JR
1960 WARN_ON(did == 0);
1961
28ccce0d
JR
1962 if (hw_pass_through && domain_type_is_si(domain))
1963 translation = CONTEXT_TT_PASS_THROUGH;
ba395927
KA
1964
1965 pr_debug("Set context mapping for %02x:%02x.%d\n",
1966 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 1967
ba395927 1968 BUG_ON(!domain->pgd);
5331fe6f 1969
55d94043
JR
1970 spin_lock_irqsave(&device_domain_lock, flags);
1971 spin_lock(&iommu->lock);
1972
1973 ret = -ENOMEM;
03ecc32c 1974 context = iommu_context_addr(iommu, bus, devfn, 1);
ba395927 1975 if (!context)
55d94043 1976 goto out_unlock;
ba395927 1977
55d94043
JR
1978 ret = 0;
1979 if (context_present(context))
1980 goto out_unlock;
cf484d0e 1981
ea6606b0
WH
1982 pgd = domain->pgd;
1983
de24e553 1984 context_clear_entry(context);
c6c2cebd 1985 context_set_domain_id(context, did);
ea6606b0 1986
de24e553
JR
1987 /*
1988 * Skip top levels of page tables for iommu which has less agaw
1989 * than default. Unnecessary for PT mode.
1990 */
93a23a72 1991 if (translation != CONTEXT_TT_PASS_THROUGH) {
de24e553 1992 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
55d94043 1993 ret = -ENOMEM;
de24e553 1994 pgd = phys_to_virt(dma_pte_addr(pgd));
55d94043
JR
1995 if (!dma_pte_present(pgd))
1996 goto out_unlock;
ea6606b0 1997 }
4ed0d3e6 1998
64ae892b 1999 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
b16d0cb9
DW
2000 if (info && info->ats_supported)
2001 translation = CONTEXT_TT_DEV_IOTLB;
2002 else
2003 translation = CONTEXT_TT_MULTI_LEVEL;
de24e553 2004
93a23a72
YZ
2005 context_set_address_root(context, virt_to_phys(pgd));
2006 context_set_address_width(context, iommu->agaw);
de24e553
JR
2007 } else {
2008 /*
2009 * In pass through mode, AW must be programmed to
2010 * indicate the largest AGAW value supported by
2011 * hardware. And ASR is ignored by hardware.
2012 */
2013 context_set_address_width(context, iommu->msagaw);
93a23a72 2014 }
4ed0d3e6
FY
2015
2016 context_set_translation_type(context, translation);
c07e7d21
MM
2017 context_set_fault_enable(context);
2018 context_set_present(context);
5331fe6f 2019 domain_flush_cache(domain, context, sizeof(*context));
ba395927 2020
4c25a2c1
DW
2021 /*
2022 * It's a non-present to present mapping. If hardware doesn't cache
2023 * non-present entry we only need to flush the write-buffer. If the
2024 * _does_ cache non-present entries, then it does so in the special
2025 * domain #0, which we have to flush:
2026 */
2027 if (cap_caching_mode(iommu->cap)) {
2028 iommu->flush.flush_context(iommu, 0,
2029 (((u16)bus) << 8) | devfn,
2030 DMA_CCMD_MASK_NOBIT,
2031 DMA_CCMD_DEVICE_INVL);
c6c2cebd 2032 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 2033 } else {
ba395927 2034 iommu_flush_write_buffer(iommu);
4c25a2c1 2035 }
93a23a72 2036 iommu_enable_dev_iotlb(info);
c7151a8d 2037
55d94043
JR
2038 ret = 0;
2039
2040out_unlock:
2041 spin_unlock(&iommu->lock);
2042 spin_unlock_irqrestore(&device_domain_lock, flags);
fb170fb4 2043
ba395927
KA
2044 return 0;
2045}
2046
579305f7
AW
2047struct domain_context_mapping_data {
2048 struct dmar_domain *domain;
2049 struct intel_iommu *iommu;
579305f7
AW
2050};
2051
2052static int domain_context_mapping_cb(struct pci_dev *pdev,
2053 u16 alias, void *opaque)
2054{
2055 struct domain_context_mapping_data *data = opaque;
2056
2057 return domain_context_mapping_one(data->domain, data->iommu,
28ccce0d 2058 PCI_BUS_NUM(alias), alias & 0xff);
579305f7
AW
2059}
2060
ba395927 2061static int
28ccce0d 2062domain_context_mapping(struct dmar_domain *domain, struct device *dev)
ba395927 2063{
64ae892b 2064 struct intel_iommu *iommu;
156baca8 2065 u8 bus, devfn;
579305f7 2066 struct domain_context_mapping_data data;
64ae892b 2067
e1f167f3 2068 iommu = device_to_iommu(dev, &bus, &devfn);
64ae892b
DW
2069 if (!iommu)
2070 return -ENODEV;
ba395927 2071
579305f7 2072 if (!dev_is_pci(dev))
28ccce0d 2073 return domain_context_mapping_one(domain, iommu, bus, devfn);
579305f7
AW
2074
2075 data.domain = domain;
2076 data.iommu = iommu;
579305f7
AW
2077
2078 return pci_for_each_dma_alias(to_pci_dev(dev),
2079 &domain_context_mapping_cb, &data);
2080}
2081
2082static int domain_context_mapped_cb(struct pci_dev *pdev,
2083 u16 alias, void *opaque)
2084{
2085 struct intel_iommu *iommu = opaque;
2086
2087 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
ba395927
KA
2088}
2089
e1f167f3 2090static int domain_context_mapped(struct device *dev)
ba395927 2091{
5331fe6f 2092 struct intel_iommu *iommu;
156baca8 2093 u8 bus, devfn;
5331fe6f 2094
e1f167f3 2095 iommu = device_to_iommu(dev, &bus, &devfn);
5331fe6f
WH
2096 if (!iommu)
2097 return -ENODEV;
ba395927 2098
579305f7
AW
2099 if (!dev_is_pci(dev))
2100 return device_context_mapped(iommu, bus, devfn);
e1f167f3 2101
579305f7
AW
2102 return !pci_for_each_dma_alias(to_pci_dev(dev),
2103 domain_context_mapped_cb, iommu);
ba395927
KA
2104}
2105
f532959b
FY
2106/* Returns a number of VTD pages, but aligned to MM page size */
2107static inline unsigned long aligned_nrpages(unsigned long host_addr,
2108 size_t size)
2109{
2110 host_addr &= ~PAGE_MASK;
2111 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2112}
2113
6dd9a7c7
YS
2114/* Return largest possible superpage level for a given mapping */
2115static inline int hardware_largepage_caps(struct dmar_domain *domain,
2116 unsigned long iov_pfn,
2117 unsigned long phy_pfn,
2118 unsigned long pages)
2119{
2120 int support, level = 1;
2121 unsigned long pfnmerge;
2122
2123 support = domain->iommu_superpage;
2124
2125 /* To use a large page, the virtual *and* physical addresses
2126 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2127 of them will mean we have to use smaller pages. So just
2128 merge them and check both at once. */
2129 pfnmerge = iov_pfn | phy_pfn;
2130
2131 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2132 pages >>= VTD_STRIDE_SHIFT;
2133 if (!pages)
2134 break;
2135 pfnmerge >>= VTD_STRIDE_SHIFT;
2136 level++;
2137 support--;
2138 }
2139 return level;
2140}
2141
9051aa02
DW
2142static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2143 struct scatterlist *sg, unsigned long phys_pfn,
2144 unsigned long nr_pages, int prot)
e1605495
DW
2145{
2146 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 2147 phys_addr_t uninitialized_var(pteval);
cc4f14aa 2148 unsigned long sg_res = 0;
6dd9a7c7
YS
2149 unsigned int largepage_lvl = 0;
2150 unsigned long lvl_pages = 0;
e1605495 2151
162d1b10 2152 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
e1605495
DW
2153
2154 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2155 return -EINVAL;
2156
2157 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2158
cc4f14aa
JL
2159 if (!sg) {
2160 sg_res = nr_pages;
9051aa02
DW
2161 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2162 }
2163
6dd9a7c7 2164 while (nr_pages > 0) {
c85994e4
DW
2165 uint64_t tmp;
2166
e1605495 2167 if (!sg_res) {
f532959b 2168 sg_res = aligned_nrpages(sg->offset, sg->length);
e1605495
DW
2169 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
2170 sg->dma_length = sg->length;
3e6110fd 2171 pteval = page_to_phys(sg_page(sg)) | prot;
6dd9a7c7 2172 phys_pfn = pteval >> VTD_PAGE_SHIFT;
e1605495 2173 }
6dd9a7c7 2174
e1605495 2175 if (!pte) {
6dd9a7c7
YS
2176 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2177
5cf0a76f 2178 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
e1605495
DW
2179 if (!pte)
2180 return -ENOMEM;
6dd9a7c7 2181 /* It is large page*/
6491d4d0 2182 if (largepage_lvl > 1) {
ba2374fd
CZ
2183 unsigned long nr_superpages, end_pfn;
2184
6dd9a7c7 2185 pteval |= DMA_PTE_LARGE_PAGE;
d41a4adb 2186 lvl_pages = lvl_to_nr_pages(largepage_lvl);
ba2374fd
CZ
2187
2188 nr_superpages = sg_res / lvl_pages;
2189 end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2190
d41a4adb
JL
2191 /*
2192 * Ensure that old small page tables are
ba2374fd 2193 * removed to make room for superpage(s).
d41a4adb 2194 */
ba2374fd 2195 dma_pte_free_pagetable(domain, iov_pfn, end_pfn);
6491d4d0 2196 } else {
6dd9a7c7 2197 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
6491d4d0 2198 }
6dd9a7c7 2199
e1605495
DW
2200 }
2201 /* We don't need lock here, nobody else
2202 * touches the iova range
2203 */
7766a3fb 2204 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 2205 if (tmp) {
1bf20f0d 2206 static int dumps = 5;
9f10e5bf
JR
2207 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2208 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
2209 if (dumps) {
2210 dumps--;
2211 debug_dma_dump_mappings(NULL);
2212 }
2213 WARN_ON(1);
2214 }
6dd9a7c7
YS
2215
2216 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2217
2218 BUG_ON(nr_pages < lvl_pages);
2219 BUG_ON(sg_res < lvl_pages);
2220
2221 nr_pages -= lvl_pages;
2222 iov_pfn += lvl_pages;
2223 phys_pfn += lvl_pages;
2224 pteval += lvl_pages * VTD_PAGE_SIZE;
2225 sg_res -= lvl_pages;
2226
2227 /* If the next PTE would be the first in a new page, then we
2228 need to flush the cache on the entries we've just written.
2229 And then we'll need to recalculate 'pte', so clear it and
2230 let it get set again in the if (!pte) block above.
2231
2232 If we're done (!nr_pages) we need to flush the cache too.
2233
2234 Also if we've been setting superpages, we may need to
2235 recalculate 'pte' and switch back to smaller pages for the
2236 end of the mapping, if the trailing size is not enough to
2237 use another superpage (i.e. sg_res < lvl_pages). */
e1605495 2238 pte++;
6dd9a7c7
YS
2239 if (!nr_pages || first_pte_in_page(pte) ||
2240 (largepage_lvl > 1 && sg_res < lvl_pages)) {
e1605495
DW
2241 domain_flush_cache(domain, first_pte,
2242 (void *)pte - (void *)first_pte);
2243 pte = NULL;
2244 }
6dd9a7c7
YS
2245
2246 if (!sg_res && nr_pages)
e1605495
DW
2247 sg = sg_next(sg);
2248 }
2249 return 0;
2250}
2251
9051aa02
DW
2252static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2253 struct scatterlist *sg, unsigned long nr_pages,
2254 int prot)
ba395927 2255{
9051aa02
DW
2256 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2257}
6f6a00e4 2258
9051aa02
DW
2259static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2260 unsigned long phys_pfn, unsigned long nr_pages,
2261 int prot)
2262{
2263 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
2264}
2265
2452d9db 2266static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 2267{
c7151a8d
WH
2268 if (!iommu)
2269 return;
8c11e798
WH
2270
2271 clear_context_table(iommu, bus, devfn);
2272 iommu->flush.flush_context(iommu, 0, 0, 0,
4c25a2c1 2273 DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 2274 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
ba395927
KA
2275}
2276
109b9b04
DW
2277static inline void unlink_domain_info(struct device_domain_info *info)
2278{
2279 assert_spin_locked(&device_domain_lock);
2280 list_del(&info->link);
2281 list_del(&info->global);
2282 if (info->dev)
0bcb3e28 2283 info->dev->archdata.iommu = NULL;
109b9b04
DW
2284}
2285
ba395927
KA
2286static void domain_remove_dev_info(struct dmar_domain *domain)
2287{
3a74ca01 2288 struct device_domain_info *info, *tmp;
fb170fb4 2289 unsigned long flags;
ba395927
KA
2290
2291 spin_lock_irqsave(&device_domain_lock, flags);
76f45fe3 2292 list_for_each_entry_safe(info, tmp, &domain->devices, link)
127c7615 2293 __dmar_remove_one_dev_info(info);
ba395927
KA
2294 spin_unlock_irqrestore(&device_domain_lock, flags);
2295}
2296
2297/*
2298 * find_domain
1525a29a 2299 * Note: we use struct device->archdata.iommu stores the info
ba395927 2300 */
1525a29a 2301static struct dmar_domain *find_domain(struct device *dev)
ba395927
KA
2302{
2303 struct device_domain_info *info;
2304
2305 /* No lock here, assumes no domain exit in normal case */
1525a29a 2306 info = dev->archdata.iommu;
ba395927
KA
2307 if (info)
2308 return info->domain;
2309 return NULL;
2310}
2311
5a8f40e8 2312static inline struct device_domain_info *
745f2586
JL
2313dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2314{
2315 struct device_domain_info *info;
2316
2317 list_for_each_entry(info, &device_domain_list, global)
41e80dca 2318 if (info->iommu->segment == segment && info->bus == bus &&
745f2586 2319 info->devfn == devfn)
5a8f40e8 2320 return info;
745f2586
JL
2321
2322 return NULL;
2323}
2324
5db31569
JR
2325static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2326 int bus, int devfn,
2327 struct device *dev,
2328 struct dmar_domain *domain)
745f2586 2329{
5a8f40e8 2330 struct dmar_domain *found = NULL;
745f2586
JL
2331 struct device_domain_info *info;
2332 unsigned long flags;
d160aca5 2333 int ret;
745f2586
JL
2334
2335 info = alloc_devinfo_mem();
2336 if (!info)
b718cd3d 2337 return NULL;
745f2586 2338
745f2586
JL
2339 info->bus = bus;
2340 info->devfn = devfn;
b16d0cb9
DW
2341 info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2342 info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2343 info->ats_qdep = 0;
745f2586
JL
2344 info->dev = dev;
2345 info->domain = domain;
5a8f40e8 2346 info->iommu = iommu;
745f2586 2347
b16d0cb9
DW
2348 if (dev && dev_is_pci(dev)) {
2349 struct pci_dev *pdev = to_pci_dev(info->dev);
2350
2351 if (ecap_dev_iotlb_support(iommu->ecap) &&
2352 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
2353 dmar_find_matched_atsr_unit(pdev))
2354 info->ats_supported = 1;
2355
2356 if (ecs_enabled(iommu)) {
2357 if (pasid_enabled(iommu)) {
2358 int features = pci_pasid_features(pdev);
2359 if (features >= 0)
2360 info->pasid_supported = features | 1;
2361 }
2362
2363 if (info->ats_supported && ecap_prs(iommu->ecap) &&
2364 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2365 info->pri_supported = 1;
2366 }
2367 }
2368
745f2586
JL
2369 spin_lock_irqsave(&device_domain_lock, flags);
2370 if (dev)
0bcb3e28 2371 found = find_domain(dev);
f303e507
JR
2372
2373 if (!found) {
5a8f40e8 2374 struct device_domain_info *info2;
41e80dca 2375 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
f303e507
JR
2376 if (info2) {
2377 found = info2->domain;
2378 info2->dev = dev;
2379 }
5a8f40e8 2380 }
f303e507 2381
745f2586
JL
2382 if (found) {
2383 spin_unlock_irqrestore(&device_domain_lock, flags);
2384 free_devinfo_mem(info);
b718cd3d
DW
2385 /* Caller must free the original domain */
2386 return found;
745f2586
JL
2387 }
2388
d160aca5
JR
2389 spin_lock(&iommu->lock);
2390 ret = domain_attach_iommu(domain, iommu);
2391 spin_unlock(&iommu->lock);
2392
2393 if (ret) {
c6c2cebd 2394 spin_unlock_irqrestore(&device_domain_lock, flags);
499f3aa4 2395 free_devinfo_mem(info);
c6c2cebd
JR
2396 return NULL;
2397 }
c6c2cebd 2398
b718cd3d
DW
2399 list_add(&info->link, &domain->devices);
2400 list_add(&info->global, &device_domain_list);
2401 if (dev)
2402 dev->archdata.iommu = info;
2403 spin_unlock_irqrestore(&device_domain_lock, flags);
2404
cc4e2575
JR
2405 if (dev && domain_context_mapping(domain, dev)) {
2406 pr_err("Domain context map for %s failed\n", dev_name(dev));
e6de0f8d 2407 dmar_remove_one_dev_info(domain, dev);
cc4e2575
JR
2408 return NULL;
2409 }
2410
b718cd3d 2411 return domain;
745f2586
JL
2412}
2413
579305f7
AW
2414static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2415{
2416 *(u16 *)opaque = alias;
2417 return 0;
2418}
2419
ba395927 2420/* domain is initialized */
146922ec 2421static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
ba395927 2422{
cc4e2575 2423 struct device_domain_info *info = NULL;
579305f7
AW
2424 struct dmar_domain *domain, *tmp;
2425 struct intel_iommu *iommu;
08a7f456 2426 u16 req_id, dma_alias;
ba395927 2427 unsigned long flags;
aa4d066a 2428 u8 bus, devfn;
ba395927 2429
146922ec 2430 domain = find_domain(dev);
ba395927
KA
2431 if (domain)
2432 return domain;
2433
579305f7
AW
2434 iommu = device_to_iommu(dev, &bus, &devfn);
2435 if (!iommu)
2436 return NULL;
2437
08a7f456
JR
2438 req_id = ((u16)bus << 8) | devfn;
2439
146922ec
DW
2440 if (dev_is_pci(dev)) {
2441 struct pci_dev *pdev = to_pci_dev(dev);
276dbf99 2442
579305f7
AW
2443 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2444
2445 spin_lock_irqsave(&device_domain_lock, flags);
2446 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2447 PCI_BUS_NUM(dma_alias),
2448 dma_alias & 0xff);
2449 if (info) {
2450 iommu = info->iommu;
2451 domain = info->domain;
5a8f40e8 2452 }
579305f7 2453 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927 2454
579305f7
AW
2455 /* DMA alias already has a domain, uses it */
2456 if (info)
2457 goto found_domain;
2458 }
ba395927 2459
146922ec 2460 /* Allocate and initialize new domain for the device */
ab8dfe25 2461 domain = alloc_domain(0);
745f2586 2462 if (!domain)
579305f7 2463 return NULL;
dc534b25 2464 if (domain_init(domain, iommu, gaw)) {
579305f7
AW
2465 domain_exit(domain);
2466 return NULL;
2c2e2c38 2467 }
ba395927 2468
579305f7 2469 /* register PCI DMA alias device */
0b74ecdf 2470 if (dev_is_pci(dev) && req_id != dma_alias) {
5db31569
JR
2471 tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2472 dma_alias & 0xff, NULL, domain);
579305f7
AW
2473
2474 if (!tmp || tmp != domain) {
2475 domain_exit(domain);
2476 domain = tmp;
2477 }
2478
b718cd3d 2479 if (!domain)
579305f7 2480 return NULL;
ba395927
KA
2481 }
2482
2483found_domain:
5db31569 2484 tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
579305f7
AW
2485
2486 if (!tmp || tmp != domain) {
2487 domain_exit(domain);
2488 domain = tmp;
2489 }
b718cd3d
DW
2490
2491 return domain;
ba395927
KA
2492}
2493
b213203e
DW
2494static int iommu_domain_identity_map(struct dmar_domain *domain,
2495 unsigned long long start,
2496 unsigned long long end)
ba395927 2497{
c5395d5c
DW
2498 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2499 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2500
2501 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2502 dma_to_mm_pfn(last_vpfn))) {
9f10e5bf 2503 pr_err("Reserving iova failed\n");
b213203e 2504 return -ENOMEM;
ba395927
KA
2505 }
2506
af1089ce 2507 pr_debug("Mapping reserved region %llx-%llx\n", start, end);
ba395927
KA
2508 /*
2509 * RMRR range might have overlap with physical memory range,
2510 * clear it first
2511 */
c5395d5c 2512 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 2513
c5395d5c
DW
2514 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2515 last_vpfn - first_vpfn + 1,
61df7443 2516 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
2517}
2518
d66ce54b
JR
2519static int domain_prepare_identity_map(struct device *dev,
2520 struct dmar_domain *domain,
2521 unsigned long long start,
2522 unsigned long long end)
b213203e 2523{
19943b0e
DW
2524 /* For _hardware_ passthrough, don't bother. But for software
2525 passthrough, we do it anyway -- it may indicate a memory
2526 range which is reserved in E820, so which didn't get set
2527 up to start with in si_domain */
2528 if (domain == si_domain && hw_pass_through) {
9f10e5bf
JR
2529 pr_warn("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2530 dev_name(dev), start, end);
19943b0e
DW
2531 return 0;
2532 }
2533
9f10e5bf
JR
2534 pr_info("Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2535 dev_name(dev), start, end);
2536
5595b528
DW
2537 if (end < start) {
2538 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2539 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2540 dmi_get_system_info(DMI_BIOS_VENDOR),
2541 dmi_get_system_info(DMI_BIOS_VERSION),
2542 dmi_get_system_info(DMI_PRODUCT_VERSION));
d66ce54b 2543 return -EIO;
5595b528
DW
2544 }
2545
2ff729f5
DW
2546 if (end >> agaw_to_width(domain->agaw)) {
2547 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2548 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2549 agaw_to_width(domain->agaw),
2550 dmi_get_system_info(DMI_BIOS_VENDOR),
2551 dmi_get_system_info(DMI_BIOS_VERSION),
2552 dmi_get_system_info(DMI_PRODUCT_VERSION));
d66ce54b 2553 return -EIO;
2ff729f5 2554 }
19943b0e 2555
d66ce54b
JR
2556 return iommu_domain_identity_map(domain, start, end);
2557}
ba395927 2558
d66ce54b
JR
2559static int iommu_prepare_identity_map(struct device *dev,
2560 unsigned long long start,
2561 unsigned long long end)
2562{
2563 struct dmar_domain *domain;
2564 int ret;
2565
2566 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2567 if (!domain)
2568 return -ENOMEM;
2569
2570 ret = domain_prepare_identity_map(dev, domain, start, end);
2571 if (ret)
2572 domain_exit(domain);
b213203e 2573
ba395927 2574 return ret;
ba395927
KA
2575}
2576
2577static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
0b9d9753 2578 struct device *dev)
ba395927 2579{
0b9d9753 2580 if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927 2581 return 0;
0b9d9753
DW
2582 return iommu_prepare_identity_map(dev, rmrr->base_address,
2583 rmrr->end_address);
ba395927
KA
2584}
2585
d3f13810 2586#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
49a0429e
KA
2587static inline void iommu_prepare_isa(void)
2588{
2589 struct pci_dev *pdev;
2590 int ret;
2591
2592 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2593 if (!pdev)
2594 return;
2595
9f10e5bf 2596 pr_info("Prepare 0-16MiB unity mapping for LPC\n");
0b9d9753 2597 ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
49a0429e
KA
2598
2599 if (ret)
9f10e5bf 2600 pr_err("Failed to create 0-16MiB identity map - floppy might not work\n");
49a0429e 2601
9b27e82d 2602 pci_dev_put(pdev);
49a0429e
KA
2603}
2604#else
2605static inline void iommu_prepare_isa(void)
2606{
2607 return;
2608}
d3f13810 2609#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
49a0429e 2610
2c2e2c38 2611static int md_domain_init(struct dmar_domain *domain, int guest_width);
c7ab48d2 2612
071e1374 2613static int __init si_domain_init(int hw)
2c2e2c38 2614{
c7ab48d2 2615 int nid, ret = 0;
2c2e2c38 2616
ab8dfe25 2617 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2c2e2c38
FY
2618 if (!si_domain)
2619 return -EFAULT;
2620
2c2e2c38
FY
2621 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2622 domain_exit(si_domain);
2623 return -EFAULT;
2624 }
2625
0dc79715 2626 pr_debug("Identity mapping domain allocated\n");
2c2e2c38 2627
19943b0e
DW
2628 if (hw)
2629 return 0;
2630
c7ab48d2 2631 for_each_online_node(nid) {
5dfe8660
TH
2632 unsigned long start_pfn, end_pfn;
2633 int i;
2634
2635 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2636 ret = iommu_domain_identity_map(si_domain,
2637 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2638 if (ret)
2639 return ret;
2640 }
c7ab48d2
DW
2641 }
2642
2c2e2c38
FY
2643 return 0;
2644}
2645
9b226624 2646static int identity_mapping(struct device *dev)
2c2e2c38
FY
2647{
2648 struct device_domain_info *info;
2649
2650 if (likely(!iommu_identity_mapping))
2651 return 0;
2652
9b226624 2653 info = dev->archdata.iommu;
cb452a40
MT
2654 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2655 return (info->domain == si_domain);
2c2e2c38 2656
2c2e2c38
FY
2657 return 0;
2658}
2659
28ccce0d 2660static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2c2e2c38 2661{
0ac72664 2662 struct dmar_domain *ndomain;
5a8f40e8 2663 struct intel_iommu *iommu;
156baca8 2664 u8 bus, devfn;
2c2e2c38 2665
5913c9bf 2666 iommu = device_to_iommu(dev, &bus, &devfn);
5a8f40e8
DW
2667 if (!iommu)
2668 return -ENODEV;
2669
5db31569 2670 ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
0ac72664
DW
2671 if (ndomain != domain)
2672 return -EBUSY;
2c2e2c38
FY
2673
2674 return 0;
2675}
2676
0b9d9753 2677static bool device_has_rmrr(struct device *dev)
ea2447f7
TM
2678{
2679 struct dmar_rmrr_unit *rmrr;
832bd858 2680 struct device *tmp;
ea2447f7
TM
2681 int i;
2682
0e242612 2683 rcu_read_lock();
ea2447f7 2684 for_each_rmrr_units(rmrr) {
b683b230
JL
2685 /*
2686 * Return TRUE if this RMRR contains the device that
2687 * is passed in.
2688 */
2689 for_each_active_dev_scope(rmrr->devices,
2690 rmrr->devices_cnt, i, tmp)
0b9d9753 2691 if (tmp == dev) {
0e242612 2692 rcu_read_unlock();
ea2447f7 2693 return true;
b683b230 2694 }
ea2447f7 2695 }
0e242612 2696 rcu_read_unlock();
ea2447f7
TM
2697 return false;
2698}
2699
c875d2c1
AW
2700/*
2701 * There are a couple cases where we need to restrict the functionality of
2702 * devices associated with RMRRs. The first is when evaluating a device for
2703 * identity mapping because problems exist when devices are moved in and out
2704 * of domains and their respective RMRR information is lost. This means that
2705 * a device with associated RMRRs will never be in a "passthrough" domain.
2706 * The second is use of the device through the IOMMU API. This interface
2707 * expects to have full control of the IOVA space for the device. We cannot
2708 * satisfy both the requirement that RMRR access is maintained and have an
2709 * unencumbered IOVA space. We also have no ability to quiesce the device's
2710 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2711 * We therefore prevent devices associated with an RMRR from participating in
2712 * the IOMMU API, which eliminates them from device assignment.
2713 *
2714 * In both cases we assume that PCI USB devices with RMRRs have them largely
2715 * for historical reasons and that the RMRR space is not actively used post
2716 * boot. This exclusion may change if vendors begin to abuse it.
18436afd
DW
2717 *
2718 * The same exception is made for graphics devices, with the requirement that
2719 * any use of the RMRR regions will be torn down before assigning the device
2720 * to a guest.
c875d2c1
AW
2721 */
2722static bool device_is_rmrr_locked(struct device *dev)
2723{
2724 if (!device_has_rmrr(dev))
2725 return false;
2726
2727 if (dev_is_pci(dev)) {
2728 struct pci_dev *pdev = to_pci_dev(dev);
2729
18436afd 2730 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
c875d2c1
AW
2731 return false;
2732 }
2733
2734 return true;
2735}
2736
3bdb2591 2737static int iommu_should_identity_map(struct device *dev, int startup)
6941af28 2738{
ea2447f7 2739
3bdb2591
DW
2740 if (dev_is_pci(dev)) {
2741 struct pci_dev *pdev = to_pci_dev(dev);
ea2447f7 2742
c875d2c1 2743 if (device_is_rmrr_locked(dev))
3bdb2591 2744 return 0;
e0fc7e0b 2745
3bdb2591
DW
2746 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2747 return 1;
e0fc7e0b 2748
3bdb2591
DW
2749 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2750 return 1;
6941af28 2751
3bdb2591 2752 if (!(iommu_identity_mapping & IDENTMAP_ALL))
3dfc813d 2753 return 0;
3bdb2591
DW
2754
2755 /*
2756 * We want to start off with all devices in the 1:1 domain, and
2757 * take them out later if we find they can't access all of memory.
2758 *
2759 * However, we can't do this for PCI devices behind bridges,
2760 * because all PCI devices behind the same bridge will end up
2761 * with the same source-id on their transactions.
2762 *
2763 * Practically speaking, we can't change things around for these
2764 * devices at run-time, because we can't be sure there'll be no
2765 * DMA transactions in flight for any of their siblings.
2766 *
2767 * So PCI devices (unless they're on the root bus) as well as
2768 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2769 * the 1:1 domain, just in _case_ one of their siblings turns out
2770 * not to be able to map all of memory.
2771 */
2772 if (!pci_is_pcie(pdev)) {
2773 if (!pci_is_root_bus(pdev->bus))
2774 return 0;
2775 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2776 return 0;
2777 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
3dfc813d 2778 return 0;
3bdb2591
DW
2779 } else {
2780 if (device_has_rmrr(dev))
2781 return 0;
2782 }
3dfc813d 2783
3bdb2591 2784 /*
3dfc813d 2785 * At boot time, we don't yet know if devices will be 64-bit capable.
3bdb2591 2786 * Assume that they will — if they turn out not to be, then we can
3dfc813d
DW
2787 * take them out of the 1:1 domain later.
2788 */
8fcc5372
CW
2789 if (!startup) {
2790 /*
2791 * If the device's dma_mask is less than the system's memory
2792 * size then this is not a candidate for identity mapping.
2793 */
3bdb2591 2794 u64 dma_mask = *dev->dma_mask;
8fcc5372 2795
3bdb2591
DW
2796 if (dev->coherent_dma_mask &&
2797 dev->coherent_dma_mask < dma_mask)
2798 dma_mask = dev->coherent_dma_mask;
8fcc5372 2799
3bdb2591 2800 return dma_mask >= dma_get_required_mask(dev);
8fcc5372 2801 }
6941af28
DW
2802
2803 return 1;
2804}
2805
cf04eee8
DW
2806static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2807{
2808 int ret;
2809
2810 if (!iommu_should_identity_map(dev, 1))
2811 return 0;
2812
28ccce0d 2813 ret = domain_add_dev_info(si_domain, dev);
cf04eee8 2814 if (!ret)
9f10e5bf
JR
2815 pr_info("%s identity mapping for device %s\n",
2816 hw ? "Hardware" : "Software", dev_name(dev));
cf04eee8
DW
2817 else if (ret == -ENODEV)
2818 /* device not associated with an iommu */
2819 ret = 0;
2820
2821 return ret;
2822}
2823
2824
071e1374 2825static int __init iommu_prepare_static_identity_mapping(int hw)
2c2e2c38 2826{
2c2e2c38 2827 struct pci_dev *pdev = NULL;
cf04eee8
DW
2828 struct dmar_drhd_unit *drhd;
2829 struct intel_iommu *iommu;
2830 struct device *dev;
2831 int i;
2832 int ret = 0;
2c2e2c38 2833
2c2e2c38 2834 for_each_pci_dev(pdev) {
cf04eee8
DW
2835 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2836 if (ret)
2837 return ret;
2838 }
2839
2840 for_each_active_iommu(iommu, drhd)
2841 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2842 struct acpi_device_physical_node *pn;
2843 struct acpi_device *adev;
2844
2845 if (dev->bus != &acpi_bus_type)
2846 continue;
86080ccc 2847
cf04eee8
DW
2848 adev= to_acpi_device(dev);
2849 mutex_lock(&adev->physical_node_lock);
2850 list_for_each_entry(pn, &adev->physical_node_list, node) {
2851 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2852 if (ret)
2853 break;
eae460b6 2854 }
cf04eee8
DW
2855 mutex_unlock(&adev->physical_node_lock);
2856 if (ret)
2857 return ret;
62edf5dc 2858 }
2c2e2c38
FY
2859
2860 return 0;
2861}
2862
ffebeb46
JL
2863static void intel_iommu_init_qi(struct intel_iommu *iommu)
2864{
2865 /*
2866 * Start from the sane iommu hardware state.
2867 * If the queued invalidation is already initialized by us
2868 * (for example, while enabling interrupt-remapping) then
2869 * we got the things already rolling from a sane state.
2870 */
2871 if (!iommu->qi) {
2872 /*
2873 * Clear any previous faults.
2874 */
2875 dmar_fault(-1, iommu);
2876 /*
2877 * Disable queued invalidation if supported and already enabled
2878 * before OS handover.
2879 */
2880 dmar_disable_qi(iommu);
2881 }
2882
2883 if (dmar_enable_qi(iommu)) {
2884 /*
2885 * Queued Invalidate not enabled, use Register Based Invalidate
2886 */
2887 iommu->flush.flush_context = __iommu_flush_context;
2888 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
9f10e5bf 2889 pr_info("%s: Using Register based invalidation\n",
ffebeb46
JL
2890 iommu->name);
2891 } else {
2892 iommu->flush.flush_context = qi_flush_context;
2893 iommu->flush.flush_iotlb = qi_flush_iotlb;
9f10e5bf 2894 pr_info("%s: Using Queued invalidation\n", iommu->name);
ffebeb46
JL
2895 }
2896}
2897
091d42e4 2898static int copy_context_table(struct intel_iommu *iommu,
dfddb969 2899 struct root_entry *old_re,
091d42e4
JR
2900 struct context_entry **tbl,
2901 int bus, bool ext)
2902{
dbcd861f 2903 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
543c8dcf 2904 struct context_entry *new_ce = NULL, ce;
dfddb969 2905 struct context_entry *old_ce = NULL;
543c8dcf 2906 struct root_entry re;
091d42e4
JR
2907 phys_addr_t old_ce_phys;
2908
2909 tbl_idx = ext ? bus * 2 : bus;
dfddb969 2910 memcpy(&re, old_re, sizeof(re));
091d42e4
JR
2911
2912 for (devfn = 0; devfn < 256; devfn++) {
2913 /* First calculate the correct index */
2914 idx = (ext ? devfn * 2 : devfn) % 256;
2915
2916 if (idx == 0) {
2917 /* First save what we may have and clean up */
2918 if (new_ce) {
2919 tbl[tbl_idx] = new_ce;
2920 __iommu_flush_cache(iommu, new_ce,
2921 VTD_PAGE_SIZE);
2922 pos = 1;
2923 }
2924
2925 if (old_ce)
2926 iounmap(old_ce);
2927
2928 ret = 0;
2929 if (devfn < 0x80)
543c8dcf 2930 old_ce_phys = root_entry_lctp(&re);
091d42e4 2931 else
543c8dcf 2932 old_ce_phys = root_entry_uctp(&re);
091d42e4
JR
2933
2934 if (!old_ce_phys) {
2935 if (ext && devfn == 0) {
2936 /* No LCTP, try UCTP */
2937 devfn = 0x7f;
2938 continue;
2939 } else {
2940 goto out;
2941 }
2942 }
2943
2944 ret = -ENOMEM;
dfddb969
DW
2945 old_ce = memremap(old_ce_phys, PAGE_SIZE,
2946 MEMREMAP_WB);
091d42e4
JR
2947 if (!old_ce)
2948 goto out;
2949
2950 new_ce = alloc_pgtable_page(iommu->node);
2951 if (!new_ce)
2952 goto out_unmap;
2953
2954 ret = 0;
2955 }
2956
2957 /* Now copy the context entry */
dfddb969 2958 memcpy(&ce, old_ce + idx, sizeof(ce));
091d42e4 2959
cf484d0e 2960 if (!__context_present(&ce))
091d42e4
JR
2961 continue;
2962
dbcd861f
JR
2963 did = context_domain_id(&ce);
2964 if (did >= 0 && did < cap_ndoms(iommu->cap))
2965 set_bit(did, iommu->domain_ids);
2966
cf484d0e
JR
2967 /*
2968 * We need a marker for copied context entries. This
2969 * marker needs to work for the old format as well as
2970 * for extended context entries.
2971 *
2972 * Bit 67 of the context entry is used. In the old
2973 * format this bit is available to software, in the
2974 * extended format it is the PGE bit, but PGE is ignored
2975 * by HW if PASIDs are disabled (and thus still
2976 * available).
2977 *
2978 * So disable PASIDs first and then mark the entry
2979 * copied. This means that we don't copy PASID
2980 * translations from the old kernel, but this is fine as
2981 * faults there are not fatal.
2982 */
2983 context_clear_pasid_enable(&ce);
2984 context_set_copied(&ce);
2985
091d42e4
JR
2986 new_ce[idx] = ce;
2987 }
2988
2989 tbl[tbl_idx + pos] = new_ce;
2990
2991 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
2992
2993out_unmap:
dfddb969 2994 memunmap(old_ce);
091d42e4
JR
2995
2996out:
2997 return ret;
2998}
2999
3000static int copy_translation_tables(struct intel_iommu *iommu)
3001{
3002 struct context_entry **ctxt_tbls;
dfddb969 3003 struct root_entry *old_rt;
091d42e4
JR
3004 phys_addr_t old_rt_phys;
3005 int ctxt_table_entries;
3006 unsigned long flags;
3007 u64 rtaddr_reg;
3008 int bus, ret;
c3361f2f 3009 bool new_ext, ext;
091d42e4
JR
3010
3011 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3012 ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
c3361f2f
JR
3013 new_ext = !!ecap_ecs(iommu->ecap);
3014
3015 /*
3016 * The RTT bit can only be changed when translation is disabled,
3017 * but disabling translation means to open a window for data
3018 * corruption. So bail out and don't copy anything if we would
3019 * have to change the bit.
3020 */
3021 if (new_ext != ext)
3022 return -EINVAL;
091d42e4
JR
3023
3024 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3025 if (!old_rt_phys)
3026 return -EINVAL;
3027
dfddb969 3028 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
091d42e4
JR
3029 if (!old_rt)
3030 return -ENOMEM;
3031
3032 /* This is too big for the stack - allocate it from slab */
3033 ctxt_table_entries = ext ? 512 : 256;
3034 ret = -ENOMEM;
3035 ctxt_tbls = kzalloc(ctxt_table_entries * sizeof(void *), GFP_KERNEL);
3036 if (!ctxt_tbls)
3037 goto out_unmap;
3038
3039 for (bus = 0; bus < 256; bus++) {
3040 ret = copy_context_table(iommu, &old_rt[bus],
3041 ctxt_tbls, bus, ext);
3042 if (ret) {
3043 pr_err("%s: Failed to copy context table for bus %d\n",
3044 iommu->name, bus);
3045 continue;
3046 }
3047 }
3048
3049 spin_lock_irqsave(&iommu->lock, flags);
3050
3051 /* Context tables are copied, now write them to the root_entry table */
3052 for (bus = 0; bus < 256; bus++) {
3053 int idx = ext ? bus * 2 : bus;
3054 u64 val;
3055
3056 if (ctxt_tbls[idx]) {
3057 val = virt_to_phys(ctxt_tbls[idx]) | 1;
3058 iommu->root_entry[bus].lo = val;
3059 }
3060
3061 if (!ext || !ctxt_tbls[idx + 1])
3062 continue;
3063
3064 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3065 iommu->root_entry[bus].hi = val;
3066 }
3067
3068 spin_unlock_irqrestore(&iommu->lock, flags);
3069
3070 kfree(ctxt_tbls);
3071
3072 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3073
3074 ret = 0;
3075
3076out_unmap:
dfddb969 3077 memunmap(old_rt);
091d42e4
JR
3078
3079 return ret;
3080}
3081
b779260b 3082static int __init init_dmars(void)
ba395927
KA
3083{
3084 struct dmar_drhd_unit *drhd;
3085 struct dmar_rmrr_unit *rmrr;
a87f4918 3086 bool copied_tables = false;
832bd858 3087 struct device *dev;
ba395927 3088 struct intel_iommu *iommu;
aa473240 3089 int i, ret, cpu;
2c2e2c38 3090
ba395927
KA
3091 /*
3092 * for each drhd
3093 * allocate root
3094 * initialize and program root entry to not present
3095 * endfor
3096 */
3097 for_each_drhd_unit(drhd) {
5e0d2a6f 3098 /*
3099 * lock not needed as this is only incremented in the single
3100 * threaded kernel __init code path all other access are read
3101 * only
3102 */
78d8e704 3103 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
1b198bb0
MT
3104 g_num_of_iommus++;
3105 continue;
3106 }
9f10e5bf 3107 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
5e0d2a6f 3108 }
3109
ffebeb46
JL
3110 /* Preallocate enough resources for IOMMU hot-addition */
3111 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3112 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3113
d9630fe9
WH
3114 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3115 GFP_KERNEL);
3116 if (!g_iommus) {
9f10e5bf 3117 pr_err("Allocating global iommu array failed\n");
d9630fe9
WH
3118 ret = -ENOMEM;
3119 goto error;
3120 }
3121
aa473240
OP
3122 for_each_possible_cpu(cpu) {
3123 struct deferred_flush_data *dfd = per_cpu_ptr(&deferred_flush,
3124 cpu);
3125
3126 dfd->tables = kzalloc(g_num_of_iommus *
3127 sizeof(struct deferred_flush_table),
3128 GFP_KERNEL);
3129 if (!dfd->tables) {
3130 ret = -ENOMEM;
3131 goto free_g_iommus;
3132 }
3133
3134 spin_lock_init(&dfd->lock);
3135 setup_timer(&dfd->timer, flush_unmaps_timeout, cpu);
5e0d2a6f 3136 }
3137
7c919779 3138 for_each_active_iommu(iommu, drhd) {
d9630fe9 3139 g_iommus[iommu->seq_id] = iommu;
ba395927 3140
b63d80d1
JR
3141 intel_iommu_init_qi(iommu);
3142
e61d98d8
SS
3143 ret = iommu_init_domains(iommu);
3144 if (ret)
989d51fc 3145 goto free_iommu;
e61d98d8 3146
4158c2ec
JR
3147 init_translation_status(iommu);
3148
091d42e4
JR
3149 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3150 iommu_disable_translation(iommu);
3151 clear_translation_pre_enabled(iommu);
3152 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3153 iommu->name);
3154 }
4158c2ec 3155
ba395927
KA
3156 /*
3157 * TBD:
3158 * we could share the same root & context tables
25985edc 3159 * among all IOMMU's. Need to Split it later.
ba395927
KA
3160 */
3161 ret = iommu_alloc_root_entry(iommu);
ffebeb46 3162 if (ret)
989d51fc 3163 goto free_iommu;
5f0a7f76 3164
091d42e4
JR
3165 if (translation_pre_enabled(iommu)) {
3166 pr_info("Translation already enabled - trying to copy translation structures\n");
3167
3168 ret = copy_translation_tables(iommu);
3169 if (ret) {
3170 /*
3171 * We found the IOMMU with translation
3172 * enabled - but failed to copy over the
3173 * old root-entry table. Try to proceed
3174 * by disabling translation now and
3175 * allocating a clean root-entry table.
3176 * This might cause DMAR faults, but
3177 * probably the dump will still succeed.
3178 */
3179 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3180 iommu->name);
3181 iommu_disable_translation(iommu);
3182 clear_translation_pre_enabled(iommu);
3183 } else {
3184 pr_info("Copied translation tables from previous kernel for %s\n",
3185 iommu->name);
a87f4918 3186 copied_tables = true;
091d42e4
JR
3187 }
3188 }
3189
5f0a7f76
JR
3190 iommu_flush_write_buffer(iommu);
3191 iommu_set_root_entry(iommu);
3192 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3193 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3194
4ed0d3e6 3195 if (!ecap_pass_through(iommu->ecap))
19943b0e 3196 hw_pass_through = 0;
8a94ade4
DW
3197#ifdef CONFIG_INTEL_IOMMU_SVM
3198 if (pasid_enabled(iommu))
3199 intel_svm_alloc_pasid_tables(iommu);
3200#endif
ba395927
KA
3201 }
3202
19943b0e 3203 if (iommu_pass_through)
e0fc7e0b
DW
3204 iommu_identity_mapping |= IDENTMAP_ALL;
3205
d3f13810 3206#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
e0fc7e0b 3207 iommu_identity_mapping |= IDENTMAP_GFX;
19943b0e 3208#endif
e0fc7e0b 3209
86080ccc
JR
3210 if (iommu_identity_mapping) {
3211 ret = si_domain_init(hw_pass_through);
3212 if (ret)
3213 goto free_iommu;
3214 }
3215
e0fc7e0b
DW
3216 check_tylersburg_isoch();
3217
a87f4918
JR
3218 /*
3219 * If we copied translations from a previous kernel in the kdump
3220 * case, we can not assign the devices to domains now, as that
3221 * would eliminate the old mappings. So skip this part and defer
3222 * the assignment to device driver initialization time.
3223 */
3224 if (copied_tables)
3225 goto domains_done;
3226
ba395927 3227 /*
19943b0e
DW
3228 * If pass through is not set or not enabled, setup context entries for
3229 * identity mappings for rmrr, gfx, and isa and may fall back to static
3230 * identity mapping if iommu_identity_mapping is set.
ba395927 3231 */
19943b0e
DW
3232 if (iommu_identity_mapping) {
3233 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
4ed0d3e6 3234 if (ret) {
9f10e5bf 3235 pr_crit("Failed to setup IOMMU pass-through\n");
989d51fc 3236 goto free_iommu;
ba395927
KA
3237 }
3238 }
ba395927 3239 /*
19943b0e
DW
3240 * For each rmrr
3241 * for each dev attached to rmrr
3242 * do
3243 * locate drhd for dev, alloc domain for dev
3244 * allocate free domain
3245 * allocate page table entries for rmrr
3246 * if context not allocated for bus
3247 * allocate and init context
3248 * set present in root table for this bus
3249 * init context with domain, translation etc
3250 * endfor
3251 * endfor
ba395927 3252 */
9f10e5bf 3253 pr_info("Setting RMRR:\n");
19943b0e 3254 for_each_rmrr_units(rmrr) {
b683b230
JL
3255 /* some BIOS lists non-exist devices in DMAR table. */
3256 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
832bd858 3257 i, dev) {
0b9d9753 3258 ret = iommu_prepare_rmrr_dev(rmrr, dev);
19943b0e 3259 if (ret)
9f10e5bf 3260 pr_err("Mapping reserved region failed\n");
ba395927 3261 }
4ed0d3e6 3262 }
49a0429e 3263
19943b0e
DW
3264 iommu_prepare_isa();
3265
a87f4918
JR
3266domains_done:
3267
ba395927
KA
3268 /*
3269 * for each drhd
3270 * enable fault log
3271 * global invalidate context cache
3272 * global invalidate iotlb
3273 * enable translation
3274 */
7c919779 3275 for_each_iommu(iommu, drhd) {
51a63e67
JC
3276 if (drhd->ignored) {
3277 /*
3278 * we always have to disable PMRs or DMA may fail on
3279 * this device
3280 */
3281 if (force_on)
7c919779 3282 iommu_disable_protect_mem_regions(iommu);
ba395927 3283 continue;
51a63e67 3284 }
ba395927
KA
3285
3286 iommu_flush_write_buffer(iommu);
3287
a222a7f0
DW
3288#ifdef CONFIG_INTEL_IOMMU_SVM
3289 if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
3290 ret = intel_svm_enable_prq(iommu);
3291 if (ret)
3292 goto free_iommu;
3293 }
3294#endif
3460a6d9
KA
3295 ret = dmar_set_interrupt(iommu);
3296 if (ret)
989d51fc 3297 goto free_iommu;
3460a6d9 3298
8939ddf6
JR
3299 if (!translation_pre_enabled(iommu))
3300 iommu_enable_translation(iommu);
3301
b94996c9 3302 iommu_disable_protect_mem_regions(iommu);
ba395927
KA
3303 }
3304
3305 return 0;
989d51fc
JL
3306
3307free_iommu:
ffebeb46
JL
3308 for_each_active_iommu(iommu, drhd) {
3309 disable_dmar_iommu(iommu);
a868e6b7 3310 free_dmar_iommu(iommu);
ffebeb46 3311 }
989d51fc 3312free_g_iommus:
aa473240
OP
3313 for_each_possible_cpu(cpu)
3314 kfree(per_cpu_ptr(&deferred_flush, cpu)->tables);
d9630fe9 3315 kfree(g_iommus);
989d51fc 3316error:
ba395927
KA
3317 return ret;
3318}
3319
5a5e02a6 3320/* This takes a number of _MM_ pages, not VTD pages */
875764de
DW
3321static struct iova *intel_alloc_iova(struct device *dev,
3322 struct dmar_domain *domain,
3323 unsigned long nrpages, uint64_t dma_mask)
ba395927 3324{
ba395927 3325 struct iova *iova = NULL;
ba395927 3326
875764de
DW
3327 /* Restrict dma_mask to the width that the iommu can handle */
3328 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
8f6429c7
RM
3329 /* Ensure we reserve the whole size-aligned region */
3330 nrpages = __roundup_pow_of_two(nrpages);
875764de
DW
3331
3332 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
3333 /*
3334 * First try to allocate an io virtual address in
284901a9 3335 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 3336 * from higher range
ba395927 3337 */
875764de
DW
3338 iova = alloc_iova(&domain->iovad, nrpages,
3339 IOVA_PFN(DMA_BIT_MASK(32)), 1);
3340 if (iova)
3341 return iova;
3342 }
3343 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
3344 if (unlikely(!iova)) {
9f10e5bf 3345 pr_err("Allocating %ld-page iova for %s failed",
207e3592 3346 nrpages, dev_name(dev));
f76aec76
KA
3347 return NULL;
3348 }
3349
3350 return iova;
3351}
3352
d4b709f4 3353static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
f76aec76 3354{
b1ce5b79 3355 struct dmar_rmrr_unit *rmrr;
f76aec76 3356 struct dmar_domain *domain;
b1ce5b79
JR
3357 struct device *i_dev;
3358 int i, ret;
f76aec76 3359
d4b709f4 3360 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
f76aec76 3361 if (!domain) {
9f10e5bf 3362 pr_err("Allocating domain for %s failed\n",
d4b709f4 3363 dev_name(dev));
4fe05bbc 3364 return NULL;
ba395927
KA
3365 }
3366
b1ce5b79
JR
3367 /* We have a new domain - setup possible RMRRs for the device */
3368 rcu_read_lock();
3369 for_each_rmrr_units(rmrr) {
3370 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3371 i, i_dev) {
3372 if (i_dev != dev)
3373 continue;
3374
3375 ret = domain_prepare_identity_map(dev, domain,
3376 rmrr->base_address,
3377 rmrr->end_address);
3378 if (ret)
3379 dev_err(dev, "Mapping reserved region failed\n");
3380 }
3381 }
3382 rcu_read_unlock();
3383
f76aec76
KA
3384 return domain;
3385}
3386
d4b709f4 3387static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
147202aa
DW
3388{
3389 struct device_domain_info *info;
3390
3391 /* No lock here, assumes no domain exit in normal case */
d4b709f4 3392 info = dev->archdata.iommu;
147202aa
DW
3393 if (likely(info))
3394 return info->domain;
3395
3396 return __get_valid_domain_for_dev(dev);
3397}
3398
ecb509ec 3399/* Check if the dev needs to go through non-identity map and unmap process.*/
73676832 3400static int iommu_no_mapping(struct device *dev)
2c2e2c38
FY
3401{
3402 int found;
3403
3d89194a 3404 if (iommu_dummy(dev))
1e4c64c4
DW
3405 return 1;
3406
2c2e2c38 3407 if (!iommu_identity_mapping)
1e4c64c4 3408 return 0;
2c2e2c38 3409
9b226624 3410 found = identity_mapping(dev);
2c2e2c38 3411 if (found) {
ecb509ec 3412 if (iommu_should_identity_map(dev, 0))
2c2e2c38
FY
3413 return 1;
3414 else {
3415 /*
3416 * 32 bit DMA is removed from si_domain and fall back
3417 * to non-identity mapping.
3418 */
e6de0f8d 3419 dmar_remove_one_dev_info(si_domain, dev);
9f10e5bf
JR
3420 pr_info("32bit %s uses non-identity mapping\n",
3421 dev_name(dev));
2c2e2c38
FY
3422 return 0;
3423 }
3424 } else {
3425 /*
3426 * In case of a detached 64 bit DMA device from vm, the device
3427 * is put into si_domain for identity mapping.
3428 */
ecb509ec 3429 if (iommu_should_identity_map(dev, 0)) {
2c2e2c38 3430 int ret;
28ccce0d 3431 ret = domain_add_dev_info(si_domain, dev);
2c2e2c38 3432 if (!ret) {
9f10e5bf
JR
3433 pr_info("64bit %s uses identity mapping\n",
3434 dev_name(dev));
2c2e2c38
FY
3435 return 1;
3436 }
3437 }
3438 }
3439
1e4c64c4 3440 return 0;
2c2e2c38
FY
3441}
3442
5040a918 3443static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
bb9e6d65 3444 size_t size, int dir, u64 dma_mask)
f76aec76 3445{
f76aec76 3446 struct dmar_domain *domain;
5b6985ce 3447 phys_addr_t start_paddr;
f76aec76
KA
3448 struct iova *iova;
3449 int prot = 0;
6865f0d1 3450 int ret;
8c11e798 3451 struct intel_iommu *iommu;
33041ec0 3452 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
3453
3454 BUG_ON(dir == DMA_NONE);
2c2e2c38 3455
5040a918 3456 if (iommu_no_mapping(dev))
6865f0d1 3457 return paddr;
f76aec76 3458
5040a918 3459 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3460 if (!domain)
3461 return 0;
3462
8c11e798 3463 iommu = domain_get_iommu(domain);
88cb6a74 3464 size = aligned_nrpages(paddr, size);
f76aec76 3465
5040a918 3466 iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
f76aec76
KA
3467 if (!iova)
3468 goto error;
3469
ba395927
KA
3470 /*
3471 * Check if DMAR supports zero-length reads on write only
3472 * mappings..
3473 */
3474 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3475 !cap_zlr(iommu->cap))
ba395927
KA
3476 prot |= DMA_PTE_READ;
3477 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3478 prot |= DMA_PTE_WRITE;
3479 /*
6865f0d1 3480 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 3481 * page. Note: if two part of one page are separately mapped, we
6865f0d1 3482 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
3483 * is not a big problem
3484 */
0ab36de2 3485 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
33041ec0 3486 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
3487 if (ret)
3488 goto error;
3489
1f0ef2aa
DW
3490 /* it's a non-present to present mapping. Only flush if caching mode */
3491 if (cap_caching_mode(iommu->cap))
a1ddcbe9
JR
3492 iommu_flush_iotlb_psi(iommu, domain,
3493 mm_to_dma_pfn(iova->pfn_lo),
3494 size, 0, 1);
1f0ef2aa 3495 else
8c11e798 3496 iommu_flush_write_buffer(iommu);
f76aec76 3497
03d6a246
DW
3498 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
3499 start_paddr += paddr & ~PAGE_MASK;
3500 return start_paddr;
ba395927 3501
ba395927 3502error:
f76aec76
KA
3503 if (iova)
3504 __free_iova(&domain->iovad, iova);
9f10e5bf 3505 pr_err("Device %s request: %zx@%llx dir %d --- failed\n",
5040a918 3506 dev_name(dev), size, (unsigned long long)paddr, dir);
ba395927
KA
3507 return 0;
3508}
3509
ffbbef5c
FT
3510static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3511 unsigned long offset, size_t size,
3512 enum dma_data_direction dir,
3513 struct dma_attrs *attrs)
bb9e6d65 3514{
ffbbef5c 3515 return __intel_map_single(dev, page_to_phys(page) + offset, size,
46333e37 3516 dir, *dev->dma_mask);
bb9e6d65
FT
3517}
3518
aa473240 3519static void flush_unmaps(struct deferred_flush_data *flush_data)
5e0d2a6f 3520{
80b20dd8 3521 int i, j;
5e0d2a6f 3522
aa473240 3523 flush_data->timer_on = 0;
5e0d2a6f 3524
3525 /* just flush them all */
3526 for (i = 0; i < g_num_of_iommus; i++) {
a2bb8459 3527 struct intel_iommu *iommu = g_iommus[i];
aa473240
OP
3528 struct deferred_flush_table *flush_table =
3529 &flush_data->tables[i];
a2bb8459
WH
3530 if (!iommu)
3531 continue;
c42d9f32 3532
aa473240 3533 if (!flush_table->next)
9dd2fe89
YZ
3534 continue;
3535
78d5f0f5
NA
3536 /* In caching mode, global flushes turn emulation expensive */
3537 if (!cap_caching_mode(iommu->cap))
3538 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
93a23a72 3539 DMA_TLB_GLOBAL_FLUSH);
aa473240 3540 for (j = 0; j < flush_table->next; j++) {
93a23a72 3541 unsigned long mask;
314f1dc1 3542 struct deferred_flush_entry *entry =
aa473240 3543 &flush_table->entries[j];
314f1dc1
OP
3544 struct iova *iova = entry->iova;
3545 struct dmar_domain *domain = entry->domain;
3546 struct page *freelist = entry->freelist;
78d5f0f5
NA
3547
3548 /* On real hardware multiple invalidations are expensive */
3549 if (cap_caching_mode(iommu->cap))
a1ddcbe9 3550 iommu_flush_iotlb_psi(iommu, domain,
f5c0c08b
OP
3551 mm_to_dma_pfn(iova->pfn_lo),
3552 mm_to_dma_pfn(iova_size(iova)),
314f1dc1 3553 !freelist, 0);
78d5f0f5 3554 else {
a156ef99 3555 mask = ilog2(mm_to_dma_pfn(iova_size(iova)));
314f1dc1 3556 iommu_flush_dev_iotlb(domain,
78d5f0f5
NA
3557 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
3558 }
314f1dc1
OP
3559 __free_iova(&domain->iovad, iova);
3560 if (freelist)
3561 dma_free_pagelist(freelist);
80b20dd8 3562 }
aa473240 3563 flush_table->next = 0;
5e0d2a6f 3564 }
3565
aa473240 3566 flush_data->size = 0;
5e0d2a6f 3567}
3568
aa473240 3569static void flush_unmaps_timeout(unsigned long cpuid)
5e0d2a6f 3570{
aa473240 3571 struct deferred_flush_data *flush_data = per_cpu_ptr(&deferred_flush, cpuid);
80b20dd8 3572 unsigned long flags;
3573
aa473240
OP
3574 spin_lock_irqsave(&flush_data->lock, flags);
3575 flush_unmaps(flush_data);
3576 spin_unlock_irqrestore(&flush_data->lock, flags);
5e0d2a6f 3577}
3578
ea8ea460 3579static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)
5e0d2a6f 3580{
3581 unsigned long flags;
314f1dc1 3582 int entry_id, iommu_id;
8c11e798 3583 struct intel_iommu *iommu;
314f1dc1 3584 struct deferred_flush_entry *entry;
aa473240
OP
3585 struct deferred_flush_data *flush_data;
3586 unsigned int cpuid;
5e0d2a6f 3587
aa473240
OP
3588 cpuid = get_cpu();
3589 flush_data = per_cpu_ptr(&deferred_flush, cpuid);
3590
3591 /* Flush all CPUs' entries to avoid deferring too much. If
3592 * this becomes a bottleneck, can just flush us, and rely on
3593 * flush timer for the rest.
3594 */
3595 if (flush_data->size == HIGH_WATER_MARK) {
3596 int cpu;
3597
3598 for_each_online_cpu(cpu)
3599 flush_unmaps_timeout(cpu);
3600 }
3601
3602 spin_lock_irqsave(&flush_data->lock, flags);
80b20dd8 3603
8c11e798
WH
3604 iommu = domain_get_iommu(dom);
3605 iommu_id = iommu->seq_id;
c42d9f32 3606
aa473240
OP
3607 entry_id = flush_data->tables[iommu_id].next;
3608 ++(flush_data->tables[iommu_id].next);
314f1dc1 3609
aa473240 3610 entry = &flush_data->tables[iommu_id].entries[entry_id];
314f1dc1
OP
3611 entry->domain = dom;
3612 entry->iova = iova;
3613 entry->freelist = freelist;
5e0d2a6f 3614
aa473240
OP
3615 if (!flush_data->timer_on) {
3616 mod_timer(&flush_data->timer, jiffies + msecs_to_jiffies(10));
3617 flush_data->timer_on = 1;
5e0d2a6f 3618 }
aa473240
OP
3619 flush_data->size++;
3620 spin_unlock_irqrestore(&flush_data->lock, flags);
3621
3622 put_cpu();
5e0d2a6f 3623}
3624
d41a4adb 3625static void intel_unmap(struct device *dev, dma_addr_t dev_addr)
ba395927 3626{
f76aec76 3627 struct dmar_domain *domain;
d794dc9b 3628 unsigned long start_pfn, last_pfn;
ba395927 3629 struct iova *iova;
8c11e798 3630 struct intel_iommu *iommu;
ea8ea460 3631 struct page *freelist;
ba395927 3632
73676832 3633 if (iommu_no_mapping(dev))
f76aec76 3634 return;
2c2e2c38 3635
1525a29a 3636 domain = find_domain(dev);
ba395927
KA
3637 BUG_ON(!domain);
3638
8c11e798
WH
3639 iommu = domain_get_iommu(domain);
3640
ba395927 3641 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
85b98276
DW
3642 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
3643 (unsigned long long)dev_addr))
ba395927 3644 return;
ba395927 3645
d794dc9b
DW
3646 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3647 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
ba395927 3648
d794dc9b 3649 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
207e3592 3650 dev_name(dev), start_pfn, last_pfn);
ba395927 3651
ea8ea460 3652 freelist = domain_unmap(domain, start_pfn, last_pfn);
d794dc9b 3653
5e0d2a6f 3654 if (intel_iommu_strict) {
a1ddcbe9 3655 iommu_flush_iotlb_psi(iommu, domain, start_pfn,
ea8ea460 3656 last_pfn - start_pfn + 1, !freelist, 0);
5e0d2a6f 3657 /* free iova */
3658 __free_iova(&domain->iovad, iova);
ea8ea460 3659 dma_free_pagelist(freelist);
5e0d2a6f 3660 } else {
ea8ea460 3661 add_unmap(domain, iova, freelist);
5e0d2a6f 3662 /*
3663 * queue up the release of the unmap to save the 1/6th of the
3664 * cpu used up by the iotlb flush operation...
3665 */
5e0d2a6f 3666 }
ba395927
KA
3667}
3668
d41a4adb
JL
3669static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3670 size_t size, enum dma_data_direction dir,
3671 struct dma_attrs *attrs)
3672{
3673 intel_unmap(dev, dev_addr);
3674}
3675
5040a918 3676static void *intel_alloc_coherent(struct device *dev, size_t size,
baa676fc
AP
3677 dma_addr_t *dma_handle, gfp_t flags,
3678 struct dma_attrs *attrs)
ba395927 3679{
36746436 3680 struct page *page = NULL;
ba395927
KA
3681 int order;
3682
5b6985ce 3683 size = PAGE_ALIGN(size);
ba395927 3684 order = get_order(size);
e8bb910d 3685
5040a918 3686 if (!iommu_no_mapping(dev))
e8bb910d 3687 flags &= ~(GFP_DMA | GFP_DMA32);
5040a918
DW
3688 else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3689 if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
e8bb910d
AW
3690 flags |= GFP_DMA;
3691 else
3692 flags |= GFP_DMA32;
3693 }
ba395927 3694
d0164adc 3695 if (gfpflags_allow_blocking(flags)) {
36746436
AM
3696 unsigned int count = size >> PAGE_SHIFT;
3697
3698 page = dma_alloc_from_contiguous(dev, count, order);
3699 if (page && iommu_no_mapping(dev) &&
3700 page_to_phys(page) + size > dev->coherent_dma_mask) {
3701 dma_release_from_contiguous(dev, page, count);
3702 page = NULL;
3703 }
3704 }
3705
3706 if (!page)
3707 page = alloc_pages(flags, order);
3708 if (!page)
ba395927 3709 return NULL;
36746436 3710 memset(page_address(page), 0, size);
ba395927 3711
36746436 3712 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
bb9e6d65 3713 DMA_BIDIRECTIONAL,
5040a918 3714 dev->coherent_dma_mask);
ba395927 3715 if (*dma_handle)
36746436
AM
3716 return page_address(page);
3717 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3718 __free_pages(page, order);
3719
ba395927
KA
3720 return NULL;
3721}
3722
5040a918 3723static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
baa676fc 3724 dma_addr_t dma_handle, struct dma_attrs *attrs)
ba395927
KA
3725{
3726 int order;
36746436 3727 struct page *page = virt_to_page(vaddr);
ba395927 3728
5b6985ce 3729 size = PAGE_ALIGN(size);
ba395927
KA
3730 order = get_order(size);
3731
d41a4adb 3732 intel_unmap(dev, dma_handle);
36746436
AM
3733 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3734 __free_pages(page, order);
ba395927
KA
3735}
3736
5040a918 3737static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
d7ab5c46
FT
3738 int nelems, enum dma_data_direction dir,
3739 struct dma_attrs *attrs)
ba395927 3740{
d41a4adb 3741 intel_unmap(dev, sglist[0].dma_address);
ba395927
KA
3742}
3743
ba395927 3744static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 3745 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
3746{
3747 int i;
c03ab37c 3748 struct scatterlist *sg;
ba395927 3749
c03ab37c 3750 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 3751 BUG_ON(!sg_page(sg));
3e6110fd 3752 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
c03ab37c 3753 sg->dma_length = sg->length;
ba395927
KA
3754 }
3755 return nelems;
3756}
3757
5040a918 3758static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
d7ab5c46 3759 enum dma_data_direction dir, struct dma_attrs *attrs)
ba395927 3760{
ba395927 3761 int i;
ba395927 3762 struct dmar_domain *domain;
f76aec76
KA
3763 size_t size = 0;
3764 int prot = 0;
f76aec76
KA
3765 struct iova *iova = NULL;
3766 int ret;
c03ab37c 3767 struct scatterlist *sg;
b536d24d 3768 unsigned long start_vpfn;
8c11e798 3769 struct intel_iommu *iommu;
ba395927
KA
3770
3771 BUG_ON(dir == DMA_NONE);
5040a918
DW
3772 if (iommu_no_mapping(dev))
3773 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
ba395927 3774
5040a918 3775 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3776 if (!domain)
3777 return 0;
3778
8c11e798
WH
3779 iommu = domain_get_iommu(domain);
3780
b536d24d 3781 for_each_sg(sglist, sg, nelems, i)
88cb6a74 3782 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 3783
5040a918
DW
3784 iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3785 *dev->dma_mask);
f76aec76 3786 if (!iova) {
c03ab37c 3787 sglist->dma_length = 0;
f76aec76
KA
3788 return 0;
3789 }
3790
3791 /*
3792 * Check if DMAR supports zero-length reads on write only
3793 * mappings..
3794 */
3795 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3796 !cap_zlr(iommu->cap))
f76aec76
KA
3797 prot |= DMA_PTE_READ;
3798 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3799 prot |= DMA_PTE_WRITE;
3800
b536d24d 3801 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
e1605495 3802
f532959b 3803 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495 3804 if (unlikely(ret)) {
e1605495
DW
3805 dma_pte_free_pagetable(domain, start_vpfn,
3806 start_vpfn + size - 1);
e1605495
DW
3807 __free_iova(&domain->iovad, iova);
3808 return 0;
ba395927
KA
3809 }
3810
1f0ef2aa
DW
3811 /* it's a non-present to present mapping. Only flush if caching mode */
3812 if (cap_caching_mode(iommu->cap))
a1ddcbe9 3813 iommu_flush_iotlb_psi(iommu, domain, start_vpfn, size, 0, 1);
1f0ef2aa 3814 else
8c11e798 3815 iommu_flush_write_buffer(iommu);
1f0ef2aa 3816
ba395927
KA
3817 return nelems;
3818}
3819
dfb805e8
FT
3820static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3821{
3822 return !dma_addr;
3823}
3824
160c1d8e 3825struct dma_map_ops intel_dma_ops = {
baa676fc
AP
3826 .alloc = intel_alloc_coherent,
3827 .free = intel_free_coherent,
ba395927
KA
3828 .map_sg = intel_map_sg,
3829 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
3830 .map_page = intel_map_page,
3831 .unmap_page = intel_unmap_page,
dfb805e8 3832 .mapping_error = intel_mapping_error,
ba395927
KA
3833};
3834
3835static inline int iommu_domain_cache_init(void)
3836{
3837 int ret = 0;
3838
3839 iommu_domain_cache = kmem_cache_create("iommu_domain",
3840 sizeof(struct dmar_domain),
3841 0,
3842 SLAB_HWCACHE_ALIGN,
3843
3844 NULL);
3845 if (!iommu_domain_cache) {
9f10e5bf 3846 pr_err("Couldn't create iommu_domain cache\n");
ba395927
KA
3847 ret = -ENOMEM;
3848 }
3849
3850 return ret;
3851}
3852
3853static inline int iommu_devinfo_cache_init(void)
3854{
3855 int ret = 0;
3856
3857 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3858 sizeof(struct device_domain_info),
3859 0,
3860 SLAB_HWCACHE_ALIGN,
ba395927
KA
3861 NULL);
3862 if (!iommu_devinfo_cache) {
9f10e5bf 3863 pr_err("Couldn't create devinfo cache\n");
ba395927
KA
3864 ret = -ENOMEM;
3865 }
3866
3867 return ret;
3868}
3869
ba395927
KA
3870static int __init iommu_init_mempool(void)
3871{
3872 int ret;
ae1ff3d6 3873 ret = iova_cache_get();
ba395927
KA
3874 if (ret)
3875 return ret;
3876
3877 ret = iommu_domain_cache_init();
3878 if (ret)
3879 goto domain_error;
3880
3881 ret = iommu_devinfo_cache_init();
3882 if (!ret)
3883 return ret;
3884
3885 kmem_cache_destroy(iommu_domain_cache);
3886domain_error:
ae1ff3d6 3887 iova_cache_put();
ba395927
KA
3888
3889 return -ENOMEM;
3890}
3891
3892static void __init iommu_exit_mempool(void)
3893{
3894 kmem_cache_destroy(iommu_devinfo_cache);
3895 kmem_cache_destroy(iommu_domain_cache);
ae1ff3d6 3896 iova_cache_put();
ba395927
KA
3897}
3898
556ab45f
DW
3899static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3900{
3901 struct dmar_drhd_unit *drhd;
3902 u32 vtbar;
3903 int rc;
3904
3905 /* We know that this device on this chipset has its own IOMMU.
3906 * If we find it under a different IOMMU, then the BIOS is lying
3907 * to us. Hope that the IOMMU for this device is actually
3908 * disabled, and it needs no translation...
3909 */
3910 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3911 if (rc) {
3912 /* "can't" happen */
3913 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3914 return;
3915 }
3916 vtbar &= 0xffff0000;
3917
3918 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3919 drhd = dmar_find_matched_drhd_unit(pdev);
3920 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3921 TAINT_FIRMWARE_WORKAROUND,
3922 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3923 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3924}
3925DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3926
ba395927
KA
3927static void __init init_no_remapping_devices(void)
3928{
3929 struct dmar_drhd_unit *drhd;
832bd858 3930 struct device *dev;
b683b230 3931 int i;
ba395927
KA
3932
3933 for_each_drhd_unit(drhd) {
3934 if (!drhd->include_all) {
b683b230
JL
3935 for_each_active_dev_scope(drhd->devices,
3936 drhd->devices_cnt, i, dev)
3937 break;
832bd858 3938 /* ignore DMAR unit if no devices exist */
ba395927
KA
3939 if (i == drhd->devices_cnt)
3940 drhd->ignored = 1;
3941 }
3942 }
3943
7c919779 3944 for_each_active_drhd_unit(drhd) {
7c919779 3945 if (drhd->include_all)
ba395927
KA
3946 continue;
3947
b683b230
JL
3948 for_each_active_dev_scope(drhd->devices,
3949 drhd->devices_cnt, i, dev)
832bd858 3950 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
ba395927 3951 break;
ba395927
KA
3952 if (i < drhd->devices_cnt)
3953 continue;
3954
c0771df8
DW
3955 /* This IOMMU has *only* gfx devices. Either bypass it or
3956 set the gfx_mapped flag, as appropriate */
3957 if (dmar_map_gfx) {
3958 intel_iommu_gfx_mapped = 1;
3959 } else {
3960 drhd->ignored = 1;
b683b230
JL
3961 for_each_active_dev_scope(drhd->devices,
3962 drhd->devices_cnt, i, dev)
832bd858 3963 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
3964 }
3965 }
3966}
3967
f59c7b69
FY
3968#ifdef CONFIG_SUSPEND
3969static int init_iommu_hw(void)
3970{
3971 struct dmar_drhd_unit *drhd;
3972 struct intel_iommu *iommu = NULL;
3973
3974 for_each_active_iommu(iommu, drhd)
3975 if (iommu->qi)
3976 dmar_reenable_qi(iommu);
3977
b779260b
JC
3978 for_each_iommu(iommu, drhd) {
3979 if (drhd->ignored) {
3980 /*
3981 * we always have to disable PMRs or DMA may fail on
3982 * this device
3983 */
3984 if (force_on)
3985 iommu_disable_protect_mem_regions(iommu);
3986 continue;
3987 }
3988
f59c7b69
FY
3989 iommu_flush_write_buffer(iommu);
3990
3991 iommu_set_root_entry(iommu);
3992
3993 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3994 DMA_CCMD_GLOBAL_INVL);
2a41ccee
JL
3995 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3996 iommu_enable_translation(iommu);
b94996c9 3997 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
3998 }
3999
4000 return 0;
4001}
4002
4003static void iommu_flush_all(void)
4004{
4005 struct dmar_drhd_unit *drhd;
4006 struct intel_iommu *iommu;
4007
4008 for_each_active_iommu(iommu, drhd) {
4009 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 4010 DMA_CCMD_GLOBAL_INVL);
f59c7b69 4011 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 4012 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
4013 }
4014}
4015
134fac3f 4016static int iommu_suspend(void)
f59c7b69
FY
4017{
4018 struct dmar_drhd_unit *drhd;
4019 struct intel_iommu *iommu = NULL;
4020 unsigned long flag;
4021
4022 for_each_active_iommu(iommu, drhd) {
4023 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
4024 GFP_ATOMIC);
4025 if (!iommu->iommu_state)
4026 goto nomem;
4027 }
4028
4029 iommu_flush_all();
4030
4031 for_each_active_iommu(iommu, drhd) {
4032 iommu_disable_translation(iommu);
4033
1f5b3c3f 4034 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
4035
4036 iommu->iommu_state[SR_DMAR_FECTL_REG] =
4037 readl(iommu->reg + DMAR_FECTL_REG);
4038 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
4039 readl(iommu->reg + DMAR_FEDATA_REG);
4040 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
4041 readl(iommu->reg + DMAR_FEADDR_REG);
4042 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
4043 readl(iommu->reg + DMAR_FEUADDR_REG);
4044
1f5b3c3f 4045 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
4046 }
4047 return 0;
4048
4049nomem:
4050 for_each_active_iommu(iommu, drhd)
4051 kfree(iommu->iommu_state);
4052
4053 return -ENOMEM;
4054}
4055
134fac3f 4056static void iommu_resume(void)
f59c7b69
FY
4057{
4058 struct dmar_drhd_unit *drhd;
4059 struct intel_iommu *iommu = NULL;
4060 unsigned long flag;
4061
4062 if (init_iommu_hw()) {
b779260b
JC
4063 if (force_on)
4064 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
4065 else
4066 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 4067 return;
f59c7b69
FY
4068 }
4069
4070 for_each_active_iommu(iommu, drhd) {
4071
1f5b3c3f 4072 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
4073
4074 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
4075 iommu->reg + DMAR_FECTL_REG);
4076 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
4077 iommu->reg + DMAR_FEDATA_REG);
4078 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
4079 iommu->reg + DMAR_FEADDR_REG);
4080 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
4081 iommu->reg + DMAR_FEUADDR_REG);
4082
1f5b3c3f 4083 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
4084 }
4085
4086 for_each_active_iommu(iommu, drhd)
4087 kfree(iommu->iommu_state);
f59c7b69
FY
4088}
4089
134fac3f 4090static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
4091 .resume = iommu_resume,
4092 .suspend = iommu_suspend,
4093};
4094
134fac3f 4095static void __init init_iommu_pm_ops(void)
f59c7b69 4096{
134fac3f 4097 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
4098}
4099
4100#else
99592ba4 4101static inline void init_iommu_pm_ops(void) {}
f59c7b69
FY
4102#endif /* CONFIG_PM */
4103
318fe7df 4104
c2a0b538 4105int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
318fe7df
SS
4106{
4107 struct acpi_dmar_reserved_memory *rmrr;
4108 struct dmar_rmrr_unit *rmrru;
4109
4110 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4111 if (!rmrru)
4112 return -ENOMEM;
4113
4114 rmrru->hdr = header;
4115 rmrr = (struct acpi_dmar_reserved_memory *)header;
4116 rmrru->base_address = rmrr->base_address;
4117 rmrru->end_address = rmrr->end_address;
2e455289
JL
4118 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4119 ((void *)rmrr) + rmrr->header.length,
4120 &rmrru->devices_cnt);
4121 if (rmrru->devices_cnt && rmrru->devices == NULL) {
4122 kfree(rmrru);
4123 return -ENOMEM;
4124 }
318fe7df 4125
2e455289 4126 list_add(&rmrru->list, &dmar_rmrr_units);
318fe7df 4127
2e455289 4128 return 0;
318fe7df
SS
4129}
4130
6b197249
JL
4131static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4132{
4133 struct dmar_atsr_unit *atsru;
4134 struct acpi_dmar_atsr *tmp;
4135
4136 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4137 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4138 if (atsr->segment != tmp->segment)
4139 continue;
4140 if (atsr->header.length != tmp->header.length)
4141 continue;
4142 if (memcmp(atsr, tmp, atsr->header.length) == 0)
4143 return atsru;
4144 }
4145
4146 return NULL;
4147}
4148
4149int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
318fe7df
SS
4150{
4151 struct acpi_dmar_atsr *atsr;
4152 struct dmar_atsr_unit *atsru;
4153
6b197249
JL
4154 if (system_state != SYSTEM_BOOTING && !intel_iommu_enabled)
4155 return 0;
4156
318fe7df 4157 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
6b197249
JL
4158 atsru = dmar_find_atsr(atsr);
4159 if (atsru)
4160 return 0;
4161
4162 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
318fe7df
SS
4163 if (!atsru)
4164 return -ENOMEM;
4165
6b197249
JL
4166 /*
4167 * If memory is allocated from slab by ACPI _DSM method, we need to
4168 * copy the memory content because the memory buffer will be freed
4169 * on return.
4170 */
4171 atsru->hdr = (void *)(atsru + 1);
4172 memcpy(atsru->hdr, hdr, hdr->length);
318fe7df 4173 atsru->include_all = atsr->flags & 0x1;
2e455289
JL
4174 if (!atsru->include_all) {
4175 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4176 (void *)atsr + atsr->header.length,
4177 &atsru->devices_cnt);
4178 if (atsru->devices_cnt && atsru->devices == NULL) {
4179 kfree(atsru);
4180 return -ENOMEM;
4181 }
4182 }
318fe7df 4183
0e242612 4184 list_add_rcu(&atsru->list, &dmar_atsr_units);
318fe7df
SS
4185
4186 return 0;
4187}
4188
9bdc531e
JL
4189static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4190{
4191 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4192 kfree(atsru);
4193}
4194
6b197249
JL
4195int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4196{
4197 struct acpi_dmar_atsr *atsr;
4198 struct dmar_atsr_unit *atsru;
4199
4200 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4201 atsru = dmar_find_atsr(atsr);
4202 if (atsru) {
4203 list_del_rcu(&atsru->list);
4204 synchronize_rcu();
4205 intel_iommu_free_atsr(atsru);
4206 }
4207
4208 return 0;
4209}
4210
4211int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4212{
4213 int i;
4214 struct device *dev;
4215 struct acpi_dmar_atsr *atsr;
4216 struct dmar_atsr_unit *atsru;
4217
4218 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4219 atsru = dmar_find_atsr(atsr);
4220 if (!atsru)
4221 return 0;
4222
4223 if (!atsru->include_all && atsru->devices && atsru->devices_cnt)
4224 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4225 i, dev)
4226 return -EBUSY;
4227
4228 return 0;
4229}
4230
ffebeb46
JL
4231static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4232{
4233 int sp, ret = 0;
4234 struct intel_iommu *iommu = dmaru->iommu;
4235
4236 if (g_iommus[iommu->seq_id])
4237 return 0;
4238
4239 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
9f10e5bf 4240 pr_warn("%s: Doesn't support hardware pass through.\n",
ffebeb46
JL
4241 iommu->name);
4242 return -ENXIO;
4243 }
4244 if (!ecap_sc_support(iommu->ecap) &&
4245 domain_update_iommu_snooping(iommu)) {
9f10e5bf 4246 pr_warn("%s: Doesn't support snooping.\n",
ffebeb46
JL
4247 iommu->name);
4248 return -ENXIO;
4249 }
4250 sp = domain_update_iommu_superpage(iommu) - 1;
4251 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
9f10e5bf 4252 pr_warn("%s: Doesn't support large page.\n",
ffebeb46
JL
4253 iommu->name);
4254 return -ENXIO;
4255 }
4256
4257 /*
4258 * Disable translation if already enabled prior to OS handover.
4259 */
4260 if (iommu->gcmd & DMA_GCMD_TE)
4261 iommu_disable_translation(iommu);
4262
4263 g_iommus[iommu->seq_id] = iommu;
4264 ret = iommu_init_domains(iommu);
4265 if (ret == 0)
4266 ret = iommu_alloc_root_entry(iommu);
4267 if (ret)
4268 goto out;
4269
8a94ade4
DW
4270#ifdef CONFIG_INTEL_IOMMU_SVM
4271 if (pasid_enabled(iommu))
4272 intel_svm_alloc_pasid_tables(iommu);
4273#endif
4274
ffebeb46
JL
4275 if (dmaru->ignored) {
4276 /*
4277 * we always have to disable PMRs or DMA may fail on this device
4278 */
4279 if (force_on)
4280 iommu_disable_protect_mem_regions(iommu);
4281 return 0;
4282 }
4283
4284 intel_iommu_init_qi(iommu);
4285 iommu_flush_write_buffer(iommu);
a222a7f0
DW
4286
4287#ifdef CONFIG_INTEL_IOMMU_SVM
4288 if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
4289 ret = intel_svm_enable_prq(iommu);
4290 if (ret)
4291 goto disable_iommu;
4292 }
4293#endif
ffebeb46
JL
4294 ret = dmar_set_interrupt(iommu);
4295 if (ret)
4296 goto disable_iommu;
4297
4298 iommu_set_root_entry(iommu);
4299 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4300 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4301 iommu_enable_translation(iommu);
4302
ffebeb46
JL
4303 iommu_disable_protect_mem_regions(iommu);
4304 return 0;
4305
4306disable_iommu:
4307 disable_dmar_iommu(iommu);
4308out:
4309 free_dmar_iommu(iommu);
4310 return ret;
4311}
4312
6b197249
JL
4313int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4314{
ffebeb46
JL
4315 int ret = 0;
4316 struct intel_iommu *iommu = dmaru->iommu;
4317
4318 if (!intel_iommu_enabled)
4319 return 0;
4320 if (iommu == NULL)
4321 return -EINVAL;
4322
4323 if (insert) {
4324 ret = intel_iommu_add(dmaru);
4325 } else {
4326 disable_dmar_iommu(iommu);
4327 free_dmar_iommu(iommu);
4328 }
4329
4330 return ret;
6b197249
JL
4331}
4332
9bdc531e
JL
4333static void intel_iommu_free_dmars(void)
4334{
4335 struct dmar_rmrr_unit *rmrru, *rmrr_n;
4336 struct dmar_atsr_unit *atsru, *atsr_n;
4337
4338 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4339 list_del(&rmrru->list);
4340 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
4341 kfree(rmrru);
318fe7df
SS
4342 }
4343
9bdc531e
JL
4344 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4345 list_del(&atsru->list);
4346 intel_iommu_free_atsr(atsru);
4347 }
318fe7df
SS
4348}
4349
4350int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4351{
b683b230 4352 int i, ret = 1;
318fe7df 4353 struct pci_bus *bus;
832bd858
DW
4354 struct pci_dev *bridge = NULL;
4355 struct device *tmp;
318fe7df
SS
4356 struct acpi_dmar_atsr *atsr;
4357 struct dmar_atsr_unit *atsru;
4358
4359 dev = pci_physfn(dev);
318fe7df 4360 for (bus = dev->bus; bus; bus = bus->parent) {
b5f82ddf 4361 bridge = bus->self;
d14053b3
DW
4362 /* If it's an integrated device, allow ATS */
4363 if (!bridge)
4364 return 1;
4365 /* Connected via non-PCIe: no ATS */
4366 if (!pci_is_pcie(bridge) ||
62f87c0e 4367 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
318fe7df 4368 return 0;
d14053b3 4369 /* If we found the root port, look it up in the ATSR */
b5f82ddf 4370 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
318fe7df 4371 break;
318fe7df
SS
4372 }
4373
0e242612 4374 rcu_read_lock();
b5f82ddf
JL
4375 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4376 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4377 if (atsr->segment != pci_domain_nr(dev->bus))
4378 continue;
4379
b683b230 4380 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
832bd858 4381 if (tmp == &bridge->dev)
b683b230 4382 goto out;
b5f82ddf
JL
4383
4384 if (atsru->include_all)
b683b230 4385 goto out;
b5f82ddf 4386 }
b683b230
JL
4387 ret = 0;
4388out:
0e242612 4389 rcu_read_unlock();
318fe7df 4390
b683b230 4391 return ret;
318fe7df
SS
4392}
4393
59ce0515
JL
4394int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4395{
4396 int ret = 0;
4397 struct dmar_rmrr_unit *rmrru;
4398 struct dmar_atsr_unit *atsru;
4399 struct acpi_dmar_atsr *atsr;
4400 struct acpi_dmar_reserved_memory *rmrr;
4401
4402 if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
4403 return 0;
4404
4405 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4406 rmrr = container_of(rmrru->hdr,
4407 struct acpi_dmar_reserved_memory, header);
4408 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4409 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4410 ((void *)rmrr) + rmrr->header.length,
4411 rmrr->segment, rmrru->devices,
4412 rmrru->devices_cnt);
27e24950 4413 if(ret < 0)
59ce0515 4414 return ret;
e6a8c9b3 4415 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
27e24950
JL
4416 dmar_remove_dev_scope(info, rmrr->segment,
4417 rmrru->devices, rmrru->devices_cnt);
59ce0515
JL
4418 }
4419 }
4420
4421 list_for_each_entry(atsru, &dmar_atsr_units, list) {
4422 if (atsru->include_all)
4423 continue;
4424
4425 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4426 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4427 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4428 (void *)atsr + atsr->header.length,
4429 atsr->segment, atsru->devices,
4430 atsru->devices_cnt);
4431 if (ret > 0)
4432 break;
4433 else if(ret < 0)
4434 return ret;
e6a8c9b3 4435 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
59ce0515
JL
4436 if (dmar_remove_dev_scope(info, atsr->segment,
4437 atsru->devices, atsru->devices_cnt))
4438 break;
4439 }
4440 }
4441
4442 return 0;
4443}
4444
99dcaded
FY
4445/*
4446 * Here we only respond to action of unbound device from driver.
4447 *
4448 * Added device is not attached to its DMAR domain here yet. That will happen
4449 * when mapping the device to iova.
4450 */
4451static int device_notifier(struct notifier_block *nb,
4452 unsigned long action, void *data)
4453{
4454 struct device *dev = data;
99dcaded
FY
4455 struct dmar_domain *domain;
4456
3d89194a 4457 if (iommu_dummy(dev))
44cd613c
DW
4458 return 0;
4459
1196c2fb 4460 if (action != BUS_NOTIFY_REMOVED_DEVICE)
7e7dfab7
JL
4461 return 0;
4462
1525a29a 4463 domain = find_domain(dev);
99dcaded
FY
4464 if (!domain)
4465 return 0;
4466
e6de0f8d 4467 dmar_remove_one_dev_info(domain, dev);
ab8dfe25 4468 if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
7e7dfab7 4469 domain_exit(domain);
a97590e5 4470
99dcaded
FY
4471 return 0;
4472}
4473
4474static struct notifier_block device_nb = {
4475 .notifier_call = device_notifier,
4476};
4477
75f05569
JL
4478static int intel_iommu_memory_notifier(struct notifier_block *nb,
4479 unsigned long val, void *v)
4480{
4481 struct memory_notify *mhp = v;
4482 unsigned long long start, end;
4483 unsigned long start_vpfn, last_vpfn;
4484
4485 switch (val) {
4486 case MEM_GOING_ONLINE:
4487 start = mhp->start_pfn << PAGE_SHIFT;
4488 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4489 if (iommu_domain_identity_map(si_domain, start, end)) {
9f10e5bf 4490 pr_warn("Failed to build identity map for [%llx-%llx]\n",
75f05569
JL
4491 start, end);
4492 return NOTIFY_BAD;
4493 }
4494 break;
4495
4496 case MEM_OFFLINE:
4497 case MEM_CANCEL_ONLINE:
4498 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4499 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4500 while (start_vpfn <= last_vpfn) {
4501 struct iova *iova;
4502 struct dmar_drhd_unit *drhd;
4503 struct intel_iommu *iommu;
ea8ea460 4504 struct page *freelist;
75f05569
JL
4505
4506 iova = find_iova(&si_domain->iovad, start_vpfn);
4507 if (iova == NULL) {
9f10e5bf 4508 pr_debug("Failed get IOVA for PFN %lx\n",
75f05569
JL
4509 start_vpfn);
4510 break;
4511 }
4512
4513 iova = split_and_remove_iova(&si_domain->iovad, iova,
4514 start_vpfn, last_vpfn);
4515 if (iova == NULL) {
9f10e5bf 4516 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
75f05569
JL
4517 start_vpfn, last_vpfn);
4518 return NOTIFY_BAD;
4519 }
4520
ea8ea460
DW
4521 freelist = domain_unmap(si_domain, iova->pfn_lo,
4522 iova->pfn_hi);
4523
75f05569
JL
4524 rcu_read_lock();
4525 for_each_active_iommu(iommu, drhd)
a1ddcbe9 4526 iommu_flush_iotlb_psi(iommu, si_domain,
a156ef99 4527 iova->pfn_lo, iova_size(iova),
ea8ea460 4528 !freelist, 0);
75f05569 4529 rcu_read_unlock();
ea8ea460 4530 dma_free_pagelist(freelist);
75f05569
JL
4531
4532 start_vpfn = iova->pfn_hi + 1;
4533 free_iova_mem(iova);
4534 }
4535 break;
4536 }
4537
4538 return NOTIFY_OK;
4539}
4540
4541static struct notifier_block intel_iommu_memory_nb = {
4542 .notifier_call = intel_iommu_memory_notifier,
4543 .priority = 0
4544};
4545
aa473240
OP
4546static int intel_iommu_cpu_notifier(struct notifier_block *nfb,
4547 unsigned long action, void *v)
4548{
4549 unsigned int cpu = (unsigned long)v;
4550
4551 switch (action) {
4552 case CPU_DEAD:
4553 case CPU_DEAD_FROZEN:
4554 flush_unmaps_timeout(cpu);
4555 break;
4556 }
4557 return NOTIFY_OK;
4558}
4559
4560static struct notifier_block intel_iommu_cpu_nb = {
4561 .notifier_call = intel_iommu_cpu_notifier,
4562};
a5459cfe
AW
4563
4564static ssize_t intel_iommu_show_version(struct device *dev,
4565 struct device_attribute *attr,
4566 char *buf)
4567{
4568 struct intel_iommu *iommu = dev_get_drvdata(dev);
4569 u32 ver = readl(iommu->reg + DMAR_VER_REG);
4570 return sprintf(buf, "%d:%d\n",
4571 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4572}
4573static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4574
4575static ssize_t intel_iommu_show_address(struct device *dev,
4576 struct device_attribute *attr,
4577 char *buf)
4578{
4579 struct intel_iommu *iommu = dev_get_drvdata(dev);
4580 return sprintf(buf, "%llx\n", iommu->reg_phys);
4581}
4582static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4583
4584static ssize_t intel_iommu_show_cap(struct device *dev,
4585 struct device_attribute *attr,
4586 char *buf)
4587{
4588 struct intel_iommu *iommu = dev_get_drvdata(dev);
4589 return sprintf(buf, "%llx\n", iommu->cap);
4590}
4591static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4592
4593static ssize_t intel_iommu_show_ecap(struct device *dev,
4594 struct device_attribute *attr,
4595 char *buf)
4596{
4597 struct intel_iommu *iommu = dev_get_drvdata(dev);
4598 return sprintf(buf, "%llx\n", iommu->ecap);
4599}
4600static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4601
2238c082
AW
4602static ssize_t intel_iommu_show_ndoms(struct device *dev,
4603 struct device_attribute *attr,
4604 char *buf)
4605{
4606 struct intel_iommu *iommu = dev_get_drvdata(dev);
4607 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4608}
4609static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4610
4611static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4612 struct device_attribute *attr,
4613 char *buf)
4614{
4615 struct intel_iommu *iommu = dev_get_drvdata(dev);
4616 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4617 cap_ndoms(iommu->cap)));
4618}
4619static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4620
a5459cfe
AW
4621static struct attribute *intel_iommu_attrs[] = {
4622 &dev_attr_version.attr,
4623 &dev_attr_address.attr,
4624 &dev_attr_cap.attr,
4625 &dev_attr_ecap.attr,
2238c082
AW
4626 &dev_attr_domains_supported.attr,
4627 &dev_attr_domains_used.attr,
a5459cfe
AW
4628 NULL,
4629};
4630
4631static struct attribute_group intel_iommu_group = {
4632 .name = "intel-iommu",
4633 .attrs = intel_iommu_attrs,
4634};
4635
4636const struct attribute_group *intel_iommu_groups[] = {
4637 &intel_iommu_group,
4638 NULL,
4639};
4640
ba395927
KA
4641int __init intel_iommu_init(void)
4642{
9bdc531e 4643 int ret = -ENODEV;
3a93c841 4644 struct dmar_drhd_unit *drhd;
7c919779 4645 struct intel_iommu *iommu;
ba395927 4646
a59b50e9
JC
4647 /* VT-d is required for a TXT/tboot launch, so enforce that */
4648 force_on = tboot_force_iommu();
4649
3a5670e8
JL
4650 if (iommu_init_mempool()) {
4651 if (force_on)
4652 panic("tboot: Failed to initialize iommu memory\n");
4653 return -ENOMEM;
4654 }
4655
4656 down_write(&dmar_global_lock);
a59b50e9
JC
4657 if (dmar_table_init()) {
4658 if (force_on)
4659 panic("tboot: Failed to initialize DMAR table\n");
9bdc531e 4660 goto out_free_dmar;
a59b50e9 4661 }
ba395927 4662
c2c7286a 4663 if (dmar_dev_scope_init() < 0) {
a59b50e9
JC
4664 if (force_on)
4665 panic("tboot: Failed to initialize DMAR device scope\n");
9bdc531e 4666 goto out_free_dmar;
a59b50e9 4667 }
1886e8a9 4668
75f1cdf1 4669 if (no_iommu || dmar_disabled)
9bdc531e 4670 goto out_free_dmar;
2ae21010 4671
318fe7df 4672 if (list_empty(&dmar_rmrr_units))
9f10e5bf 4673 pr_info("No RMRR found\n");
318fe7df
SS
4674
4675 if (list_empty(&dmar_atsr_units))
9f10e5bf 4676 pr_info("No ATSR found\n");
318fe7df 4677
51a63e67
JC
4678 if (dmar_init_reserved_ranges()) {
4679 if (force_on)
4680 panic("tboot: Failed to reserve iommu ranges\n");
3a5670e8 4681 goto out_free_reserved_range;
51a63e67 4682 }
ba395927
KA
4683
4684 init_no_remapping_devices();
4685
b779260b 4686 ret = init_dmars();
ba395927 4687 if (ret) {
a59b50e9
JC
4688 if (force_on)
4689 panic("tboot: Failed to initialize DMARs\n");
9f10e5bf 4690 pr_err("Initialization failed\n");
9bdc531e 4691 goto out_free_reserved_range;
ba395927 4692 }
3a5670e8 4693 up_write(&dmar_global_lock);
9f10e5bf 4694 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
ba395927 4695
75f1cdf1
FT
4696#ifdef CONFIG_SWIOTLB
4697 swiotlb = 0;
4698#endif
19943b0e 4699 dma_ops = &intel_dma_ops;
4ed0d3e6 4700
134fac3f 4701 init_iommu_pm_ops();
a8bcbb0d 4702
a5459cfe
AW
4703 for_each_active_iommu(iommu, drhd)
4704 iommu->iommu_dev = iommu_device_create(NULL, iommu,
4705 intel_iommu_groups,
2439d4aa 4706 "%s", iommu->name);
a5459cfe 4707
4236d97d 4708 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
99dcaded 4709 bus_register_notifier(&pci_bus_type, &device_nb);
75f05569
JL
4710 if (si_domain && !hw_pass_through)
4711 register_memory_notifier(&intel_iommu_memory_nb);
aa473240 4712 register_hotcpu_notifier(&intel_iommu_cpu_nb);
99dcaded 4713
8bc1f85c
ED
4714 intel_iommu_enabled = 1;
4715
ba395927 4716 return 0;
9bdc531e
JL
4717
4718out_free_reserved_range:
4719 put_iova_domain(&reserved_iova_list);
9bdc531e
JL
4720out_free_dmar:
4721 intel_iommu_free_dmars();
3a5670e8
JL
4722 up_write(&dmar_global_lock);
4723 iommu_exit_mempool();
9bdc531e 4724 return ret;
ba395927 4725}
e820482c 4726
2452d9db 4727static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
579305f7
AW
4728{
4729 struct intel_iommu *iommu = opaque;
4730
2452d9db 4731 domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
579305f7
AW
4732 return 0;
4733}
4734
4735/*
4736 * NB - intel-iommu lacks any sort of reference counting for the users of
4737 * dependent devices. If multiple endpoints have intersecting dependent
4738 * devices, unbinding the driver from any one of them will possibly leave
4739 * the others unable to operate.
4740 */
2452d9db 4741static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
3199aa6b 4742{
0bcb3e28 4743 if (!iommu || !dev || !dev_is_pci(dev))
3199aa6b
HW
4744 return;
4745
2452d9db 4746 pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
3199aa6b
HW
4747}
4748
127c7615 4749static void __dmar_remove_one_dev_info(struct device_domain_info *info)
c7151a8d 4750{
c7151a8d
WH
4751 struct intel_iommu *iommu;
4752 unsigned long flags;
c7151a8d 4753
55d94043
JR
4754 assert_spin_locked(&device_domain_lock);
4755
127c7615 4756 if (WARN_ON(!info))
c7151a8d
WH
4757 return;
4758
127c7615 4759 iommu = info->iommu;
c7151a8d 4760
127c7615
JR
4761 if (info->dev) {
4762 iommu_disable_dev_iotlb(info);
4763 domain_context_clear(iommu, info->dev);
4764 }
c7151a8d 4765
b608ac3b 4766 unlink_domain_info(info);
c7151a8d 4767
d160aca5 4768 spin_lock_irqsave(&iommu->lock, flags);
127c7615 4769 domain_detach_iommu(info->domain, iommu);
d160aca5 4770 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d 4771
127c7615 4772 free_devinfo_mem(info);
c7151a8d 4773}
c7151a8d 4774
55d94043
JR
4775static void dmar_remove_one_dev_info(struct dmar_domain *domain,
4776 struct device *dev)
4777{
127c7615 4778 struct device_domain_info *info;
55d94043 4779 unsigned long flags;
3e7abe25 4780
55d94043 4781 spin_lock_irqsave(&device_domain_lock, flags);
127c7615
JR
4782 info = dev->archdata.iommu;
4783 __dmar_remove_one_dev_info(info);
55d94043 4784 spin_unlock_irqrestore(&device_domain_lock, flags);
c7151a8d
WH
4785}
4786
2c2e2c38 4787static int md_domain_init(struct dmar_domain *domain, int guest_width)
5e98c4b1
WH
4788{
4789 int adjust_width;
4790
0fb5fe87
RM
4791 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
4792 DMA_32BIT_PFN);
5e98c4b1
WH
4793 domain_reserve_special_ranges(domain);
4794
4795 /* calculate AGAW */
4796 domain->gaw = guest_width;
4797 adjust_width = guestwidth_to_adjustwidth(guest_width);
4798 domain->agaw = width_to_agaw(adjust_width);
4799
5e98c4b1 4800 domain->iommu_coherency = 0;
c5b15255 4801 domain->iommu_snooping = 0;
6dd9a7c7 4802 domain->iommu_superpage = 0;
fe40f1e0 4803 domain->max_addr = 0;
5e98c4b1
WH
4804
4805 /* always allocate the top pgd */
4c923d47 4806 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5e98c4b1
WH
4807 if (!domain->pgd)
4808 return -ENOMEM;
4809 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4810 return 0;
4811}
4812
00a77deb 4813static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
38717946 4814{
5d450806 4815 struct dmar_domain *dmar_domain;
00a77deb
JR
4816 struct iommu_domain *domain;
4817
4818 if (type != IOMMU_DOMAIN_UNMANAGED)
4819 return NULL;
38717946 4820
ab8dfe25 4821 dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
5d450806 4822 if (!dmar_domain) {
9f10e5bf 4823 pr_err("Can't allocate dmar_domain\n");
00a77deb 4824 return NULL;
38717946 4825 }
2c2e2c38 4826 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
9f10e5bf 4827 pr_err("Domain initialization failed\n");
92d03cc8 4828 domain_exit(dmar_domain);
00a77deb 4829 return NULL;
38717946 4830 }
8140a95d 4831 domain_update_iommu_cap(dmar_domain);
faa3d6f5 4832
00a77deb 4833 domain = &dmar_domain->domain;
8a0e715b
JR
4834 domain->geometry.aperture_start = 0;
4835 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4836 domain->geometry.force_aperture = true;
4837
00a77deb 4838 return domain;
38717946 4839}
38717946 4840
00a77deb 4841static void intel_iommu_domain_free(struct iommu_domain *domain)
38717946 4842{
00a77deb 4843 domain_exit(to_dmar_domain(domain));
38717946 4844}
38717946 4845
4c5478c9
JR
4846static int intel_iommu_attach_device(struct iommu_domain *domain,
4847 struct device *dev)
38717946 4848{
00a77deb 4849 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0
WH
4850 struct intel_iommu *iommu;
4851 int addr_width;
156baca8 4852 u8 bus, devfn;
faa3d6f5 4853
c875d2c1
AW
4854 if (device_is_rmrr_locked(dev)) {
4855 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
4856 return -EPERM;
4857 }
4858
7207d8f9
DW
4859 /* normally dev is not mapped */
4860 if (unlikely(domain_context_mapped(dev))) {
faa3d6f5
WH
4861 struct dmar_domain *old_domain;
4862
1525a29a 4863 old_domain = find_domain(dev);
faa3d6f5 4864 if (old_domain) {
d160aca5 4865 rcu_read_lock();
de7e8886 4866 dmar_remove_one_dev_info(old_domain, dev);
d160aca5 4867 rcu_read_unlock();
62c22167
JR
4868
4869 if (!domain_type_is_vm_or_si(old_domain) &&
4870 list_empty(&old_domain->devices))
4871 domain_exit(old_domain);
faa3d6f5
WH
4872 }
4873 }
4874
156baca8 4875 iommu = device_to_iommu(dev, &bus, &devfn);
fe40f1e0
WH
4876 if (!iommu)
4877 return -ENODEV;
4878
4879 /* check if this iommu agaw is sufficient for max mapped address */
4880 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
4881 if (addr_width > cap_mgaw(iommu->cap))
4882 addr_width = cap_mgaw(iommu->cap);
4883
4884 if (dmar_domain->max_addr > (1LL << addr_width)) {
9f10e5bf 4885 pr_err("%s: iommu width (%d) is not "
fe40f1e0 4886 "sufficient for the mapped address (%llx)\n",
a99c47a2 4887 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
4888 return -EFAULT;
4889 }
a99c47a2
TL
4890 dmar_domain->gaw = addr_width;
4891
4892 /*
4893 * Knock out extra levels of page tables if necessary
4894 */
4895 while (iommu->agaw < dmar_domain->agaw) {
4896 struct dma_pte *pte;
4897
4898 pte = dmar_domain->pgd;
4899 if (dma_pte_present(pte)) {
25cbff16
SY
4900 dmar_domain->pgd = (struct dma_pte *)
4901 phys_to_virt(dma_pte_addr(pte));
7a661013 4902 free_pgtable_page(pte);
a99c47a2
TL
4903 }
4904 dmar_domain->agaw--;
4905 }
fe40f1e0 4906
28ccce0d 4907 return domain_add_dev_info(dmar_domain, dev);
38717946 4908}
38717946 4909
4c5478c9
JR
4910static void intel_iommu_detach_device(struct iommu_domain *domain,
4911 struct device *dev)
38717946 4912{
e6de0f8d 4913 dmar_remove_one_dev_info(to_dmar_domain(domain), dev);
faa3d6f5 4914}
c7151a8d 4915
b146a1c9
JR
4916static int intel_iommu_map(struct iommu_domain *domain,
4917 unsigned long iova, phys_addr_t hpa,
5009065d 4918 size_t size, int iommu_prot)
faa3d6f5 4919{
00a77deb 4920 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0 4921 u64 max_addr;
dde57a21 4922 int prot = 0;
faa3d6f5 4923 int ret;
fe40f1e0 4924
dde57a21
JR
4925 if (iommu_prot & IOMMU_READ)
4926 prot |= DMA_PTE_READ;
4927 if (iommu_prot & IOMMU_WRITE)
4928 prot |= DMA_PTE_WRITE;
9cf06697
SY
4929 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4930 prot |= DMA_PTE_SNP;
dde57a21 4931
163cc52c 4932 max_addr = iova + size;
dde57a21 4933 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
4934 u64 end;
4935
4936 /* check if minimum agaw is sufficient for mapped address */
8954da1f 4937 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 4938 if (end < max_addr) {
9f10e5bf 4939 pr_err("%s: iommu width (%d) is not "
fe40f1e0 4940 "sufficient for the mapped address (%llx)\n",
8954da1f 4941 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
4942 return -EFAULT;
4943 }
dde57a21 4944 dmar_domain->max_addr = max_addr;
fe40f1e0 4945 }
ad051221
DW
4946 /* Round up size to next multiple of PAGE_SIZE, if it and
4947 the low bits of hpa would take us onto the next page */
88cb6a74 4948 size = aligned_nrpages(hpa, size);
ad051221
DW
4949 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4950 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 4951 return ret;
38717946 4952}
38717946 4953
5009065d 4954static size_t intel_iommu_unmap(struct iommu_domain *domain,
ea8ea460 4955 unsigned long iova, size_t size)
38717946 4956{
00a77deb 4957 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
ea8ea460
DW
4958 struct page *freelist = NULL;
4959 struct intel_iommu *iommu;
4960 unsigned long start_pfn, last_pfn;
4961 unsigned int npages;
42e8c186 4962 int iommu_id, level = 0;
5cf0a76f
DW
4963
4964 /* Cope with horrid API which requires us to unmap more than the
4965 size argument if it happens to be a large-page mapping. */
dc02e46e 4966 BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
5cf0a76f
DW
4967
4968 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4969 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4b99d352 4970
ea8ea460
DW
4971 start_pfn = iova >> VTD_PAGE_SHIFT;
4972 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4973
4974 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
4975
4976 npages = last_pfn - start_pfn + 1;
4977
29a27719 4978 for_each_domain_iommu(iommu_id, dmar_domain) {
a1ddcbe9 4979 iommu = g_iommus[iommu_id];
ea8ea460 4980
42e8c186
JR
4981 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
4982 start_pfn, npages, !freelist, 0);
ea8ea460
DW
4983 }
4984
4985 dma_free_pagelist(freelist);
fe40f1e0 4986
163cc52c
DW
4987 if (dmar_domain->max_addr == iova + size)
4988 dmar_domain->max_addr = iova;
b146a1c9 4989
5cf0a76f 4990 return size;
38717946 4991}
38717946 4992
d14d6577 4993static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
bb5547ac 4994 dma_addr_t iova)
38717946 4995{
00a77deb 4996 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
38717946 4997 struct dma_pte *pte;
5cf0a76f 4998 int level = 0;
faa3d6f5 4999 u64 phys = 0;
38717946 5000
5cf0a76f 5001 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
38717946 5002 if (pte)
faa3d6f5 5003 phys = dma_pte_addr(pte);
38717946 5004
faa3d6f5 5005 return phys;
38717946 5006}
a8bcbb0d 5007
5d587b8d 5008static bool intel_iommu_capable(enum iommu_cap cap)
dbb9fd86 5009{
dbb9fd86 5010 if (cap == IOMMU_CAP_CACHE_COHERENCY)
5d587b8d 5011 return domain_update_iommu_snooping(NULL) == 1;
323f99cb 5012 if (cap == IOMMU_CAP_INTR_REMAP)
5d587b8d 5013 return irq_remapping_enabled == 1;
dbb9fd86 5014
5d587b8d 5015 return false;
dbb9fd86
SY
5016}
5017
abdfdde2
AW
5018static int intel_iommu_add_device(struct device *dev)
5019{
a5459cfe 5020 struct intel_iommu *iommu;
abdfdde2 5021 struct iommu_group *group;
156baca8 5022 u8 bus, devfn;
70ae6f0d 5023
a5459cfe
AW
5024 iommu = device_to_iommu(dev, &bus, &devfn);
5025 if (!iommu)
70ae6f0d
AW
5026 return -ENODEV;
5027
a5459cfe 5028 iommu_device_link(iommu->iommu_dev, dev);
a4ff1fc2 5029
e17f9ff4 5030 group = iommu_group_get_for_dev(dev);
783f157b 5031
e17f9ff4
AW
5032 if (IS_ERR(group))
5033 return PTR_ERR(group);
bcb71abe 5034
abdfdde2 5035 iommu_group_put(group);
e17f9ff4 5036 return 0;
abdfdde2 5037}
70ae6f0d 5038
abdfdde2
AW
5039static void intel_iommu_remove_device(struct device *dev)
5040{
a5459cfe
AW
5041 struct intel_iommu *iommu;
5042 u8 bus, devfn;
5043
5044 iommu = device_to_iommu(dev, &bus, &devfn);
5045 if (!iommu)
5046 return;
5047
abdfdde2 5048 iommu_group_remove_device(dev);
a5459cfe
AW
5049
5050 iommu_device_unlink(iommu->iommu_dev, dev);
70ae6f0d
AW
5051}
5052
2f26e0a9
DW
5053#ifdef CONFIG_INTEL_IOMMU_SVM
5054int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev)
5055{
5056 struct device_domain_info *info;
5057 struct context_entry *context;
5058 struct dmar_domain *domain;
5059 unsigned long flags;
5060 u64 ctx_lo;
5061 int ret;
5062
5063 domain = get_valid_domain_for_dev(sdev->dev);
5064 if (!domain)
5065 return -EINVAL;
5066
5067 spin_lock_irqsave(&device_domain_lock, flags);
5068 spin_lock(&iommu->lock);
5069
5070 ret = -EINVAL;
5071 info = sdev->dev->archdata.iommu;
5072 if (!info || !info->pasid_supported)
5073 goto out;
5074
5075 context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5076 if (WARN_ON(!context))
5077 goto out;
5078
5079 ctx_lo = context[0].lo;
5080
5081 sdev->did = domain->iommu_did[iommu->seq_id];
5082 sdev->sid = PCI_DEVID(info->bus, info->devfn);
5083
5084 if (!(ctx_lo & CONTEXT_PASIDE)) {
5085 context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
5086 context[1].lo = (u64)virt_to_phys(iommu->pasid_table) | ecap_pss(iommu->ecap);
5087 wmb();
5088 /* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both
5089 * extended to permit requests-with-PASID if the PASIDE bit
5090 * is set. which makes sense. For CONTEXT_TT_PASS_THROUGH,
5091 * however, the PASIDE bit is ignored and requests-with-PASID
5092 * are unconditionally blocked. Which makes less sense.
5093 * So convert from CONTEXT_TT_PASS_THROUGH to one of the new
5094 * "guest mode" translation types depending on whether ATS
5095 * is available or not. Annoyingly, we can't use the new
5096 * modes *unless* PASIDE is set. */
5097 if ((ctx_lo & CONTEXT_TT_MASK) == (CONTEXT_TT_PASS_THROUGH << 2)) {
5098 ctx_lo &= ~CONTEXT_TT_MASK;
5099 if (info->ats_supported)
5100 ctx_lo |= CONTEXT_TT_PT_PASID_DEV_IOTLB << 2;
5101 else
5102 ctx_lo |= CONTEXT_TT_PT_PASID << 2;
5103 }
5104 ctx_lo |= CONTEXT_PASIDE;
907fea34
DW
5105 if (iommu->pasid_state_table)
5106 ctx_lo |= CONTEXT_DINVE;
a222a7f0
DW
5107 if (info->pri_supported)
5108 ctx_lo |= CONTEXT_PRS;
2f26e0a9
DW
5109 context[0].lo = ctx_lo;
5110 wmb();
5111 iommu->flush.flush_context(iommu, sdev->did, sdev->sid,
5112 DMA_CCMD_MASK_NOBIT,
5113 DMA_CCMD_DEVICE_INVL);
5114 }
5115
5116 /* Enable PASID support in the device, if it wasn't already */
5117 if (!info->pasid_enabled)
5118 iommu_enable_dev_iotlb(info);
5119
5120 if (info->ats_enabled) {
5121 sdev->dev_iotlb = 1;
5122 sdev->qdep = info->ats_qdep;
5123 if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
5124 sdev->qdep = 0;
5125 }
5126 ret = 0;
5127
5128 out:
5129 spin_unlock(&iommu->lock);
5130 spin_unlock_irqrestore(&device_domain_lock, flags);
5131
5132 return ret;
5133}
5134
5135struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
5136{
5137 struct intel_iommu *iommu;
5138 u8 bus, devfn;
5139
5140 if (iommu_dummy(dev)) {
5141 dev_warn(dev,
5142 "No IOMMU translation for device; cannot enable SVM\n");
5143 return NULL;
5144 }
5145
5146 iommu = device_to_iommu(dev, &bus, &devfn);
5147 if ((!iommu)) {
b9997e38 5148 dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
2f26e0a9
DW
5149 return NULL;
5150 }
5151
5152 if (!iommu->pasid_table) {
b9997e38 5153 dev_err(dev, "PASID not enabled on IOMMU; cannot enable SVM\n");
2f26e0a9
DW
5154 return NULL;
5155 }
5156
5157 return iommu;
5158}
5159#endif /* CONFIG_INTEL_IOMMU_SVM */
5160
b22f6434 5161static const struct iommu_ops intel_iommu_ops = {
5d587b8d 5162 .capable = intel_iommu_capable,
00a77deb
JR
5163 .domain_alloc = intel_iommu_domain_alloc,
5164 .domain_free = intel_iommu_domain_free,
a8bcbb0d
JR
5165 .attach_dev = intel_iommu_attach_device,
5166 .detach_dev = intel_iommu_detach_device,
b146a1c9
JR
5167 .map = intel_iommu_map,
5168 .unmap = intel_iommu_unmap,
315786eb 5169 .map_sg = default_iommu_map_sg,
a8bcbb0d 5170 .iova_to_phys = intel_iommu_iova_to_phys,
abdfdde2
AW
5171 .add_device = intel_iommu_add_device,
5172 .remove_device = intel_iommu_remove_device,
a960fadb 5173 .device_group = pci_device_group,
6d1c56a9 5174 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
a8bcbb0d 5175};
9af88143 5176
9452618e
DV
5177static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
5178{
5179 /* G4x/GM45 integrated gfx dmar support is totally busted. */
9f10e5bf 5180 pr_info("Disabling IOMMU for graphics on this chipset\n");
9452618e
DV
5181 dmar_map_gfx = 0;
5182}
5183
5184DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
5185DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
5186DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
5187DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
5188DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
5189DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
5190DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
5191
d34d6517 5192static void quirk_iommu_rwbf(struct pci_dev *dev)
9af88143
DW
5193{
5194 /*
5195 * Mobile 4 Series Chipset neglects to set RWBF capability,
210561ff 5196 * but needs it. Same seems to hold for the desktop versions.
9af88143 5197 */
9f10e5bf 5198 pr_info("Forcing write-buffer flush capability\n");
9af88143
DW
5199 rwbf_quirk = 1;
5200}
5201
5202DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
210561ff
DV
5203DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
5204DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
5205DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
5206DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
5207DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
5208DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
e0fc7e0b 5209
eecfd57f
AJ
5210#define GGC 0x52
5211#define GGC_MEMORY_SIZE_MASK (0xf << 8)
5212#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
5213#define GGC_MEMORY_SIZE_1M (0x1 << 8)
5214#define GGC_MEMORY_SIZE_2M (0x3 << 8)
5215#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
5216#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
5217#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
5218#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
5219
d34d6517 5220static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
9eecabcb
DW
5221{
5222 unsigned short ggc;
5223
eecfd57f 5224 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
5225 return;
5226
eecfd57f 5227 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
9f10e5bf 5228 pr_info("BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
9eecabcb 5229 dmar_map_gfx = 0;
6fbcfb3e
DW
5230 } else if (dmar_map_gfx) {
5231 /* we have to ensure the gfx device is idle before we flush */
9f10e5bf 5232 pr_info("Disabling batched IOTLB flush on Ironlake\n");
6fbcfb3e
DW
5233 intel_iommu_strict = 1;
5234 }
9eecabcb
DW
5235}
5236DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
5237DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
5238DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
5239DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
5240
e0fc7e0b
DW
5241/* On Tylersburg chipsets, some BIOSes have been known to enable the
5242 ISOCH DMAR unit for the Azalia sound device, but not give it any
5243 TLB entries, which causes it to deadlock. Check for that. We do
5244 this in a function called from init_dmars(), instead of in a PCI
5245 quirk, because we don't want to print the obnoxious "BIOS broken"
5246 message if VT-d is actually disabled.
5247*/
5248static void __init check_tylersburg_isoch(void)
5249{
5250 struct pci_dev *pdev;
5251 uint32_t vtisochctrl;
5252
5253 /* If there's no Azalia in the system anyway, forget it. */
5254 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
5255 if (!pdev)
5256 return;
5257 pci_dev_put(pdev);
5258
5259 /* System Management Registers. Might be hidden, in which case
5260 we can't do the sanity check. But that's OK, because the
5261 known-broken BIOSes _don't_ actually hide it, so far. */
5262 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
5263 if (!pdev)
5264 return;
5265
5266 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
5267 pci_dev_put(pdev);
5268 return;
5269 }
5270
5271 pci_dev_put(pdev);
5272
5273 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
5274 if (vtisochctrl & 1)
5275 return;
5276
5277 /* Drop all bits other than the number of TLB entries */
5278 vtisochctrl &= 0x1c;
5279
5280 /* If we have the recommended number of TLB entries (16), fine. */
5281 if (vtisochctrl == 0x10)
5282 return;
5283
5284 /* Zero TLB entries? You get to ride the short bus to school. */
5285 if (!vtisochctrl) {
5286 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
5287 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
5288 dmi_get_system_info(DMI_BIOS_VENDOR),
5289 dmi_get_system_info(DMI_BIOS_VERSION),
5290 dmi_get_system_info(DMI_PRODUCT_VERSION));
5291 iommu_identity_mapping |= IDENTMAP_AZALIA;
5292 return;
5293 }
9f10e5bf
JR
5294
5295 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
e0fc7e0b
DW
5296 vtisochctrl);
5297}
This page took 1.540859 seconds and 5 git commands to generate.