intel-iommu: move root entry defs from dma_remapping.h
[deliverable/linux.git] / drivers / pci / intel-iommu.c
1 /*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21 * Author: Fenghua Yu <fenghua.yu@intel.com>
22 */
23
24 #include <linux/init.h>
25 #include <linux/bitmap.h>
26 #include <linux/debugfs.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/timer.h>
36 #include <linux/iova.h>
37 #include <linux/intel-iommu.h>
38 #include <asm/cacheflush.h>
39 #include <asm/iommu.h>
40 #include "pci.h"
41
42 #define ROOT_SIZE VTD_PAGE_SIZE
43 #define CONTEXT_SIZE VTD_PAGE_SIZE
44
45 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
46 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
47
48 #define IOAPIC_RANGE_START (0xfee00000)
49 #define IOAPIC_RANGE_END (0xfeefffff)
50 #define IOVA_START_ADDR (0x1000)
51
52 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
53
54 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
55
56 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
57 #define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
58 #define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
59
60 /*
61 * 0: Present
62 * 1-11: Reserved
63 * 12-63: Context Ptr (12 - (haw-1))
64 * 64-127: Reserved
65 */
66 struct root_entry {
67 u64 val;
68 u64 rsvd1;
69 };
70 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
71 static inline bool root_present(struct root_entry *root)
72 {
73 return (root->val & 1);
74 }
75 static inline void set_root_present(struct root_entry *root)
76 {
77 root->val |= 1;
78 }
79 static inline void set_root_value(struct root_entry *root, unsigned long value)
80 {
81 root->val |= value & VTD_PAGE_MASK;
82 }
83
84 static inline struct context_entry *
85 get_context_addr_from_root(struct root_entry *root)
86 {
87 return (struct context_entry *)
88 (root_present(root)?phys_to_virt(
89 root->val & VTD_PAGE_MASK) :
90 NULL);
91 }
92
93 static void flush_unmaps_timeout(unsigned long data);
94
95 DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
96
97 #define HIGH_WATER_MARK 250
98 struct deferred_flush_tables {
99 int next;
100 struct iova *iova[HIGH_WATER_MARK];
101 struct dmar_domain *domain[HIGH_WATER_MARK];
102 };
103
104 static struct deferred_flush_tables *deferred_flush;
105
106 /* bitmap for indexing intel_iommus */
107 static int g_num_of_iommus;
108
109 static DEFINE_SPINLOCK(async_umap_flush_lock);
110 static LIST_HEAD(unmaps_to_do);
111
112 static int timer_on;
113 static long list_size;
114
115 static void domain_remove_dev_info(struct dmar_domain *domain);
116
117 int dmar_disabled;
118 static int __initdata dmar_map_gfx = 1;
119 static int dmar_forcedac;
120 static int intel_iommu_strict;
121
122 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
123 static DEFINE_SPINLOCK(device_domain_lock);
124 static LIST_HEAD(device_domain_list);
125
126 static int __init intel_iommu_setup(char *str)
127 {
128 if (!str)
129 return -EINVAL;
130 while (*str) {
131 if (!strncmp(str, "off", 3)) {
132 dmar_disabled = 1;
133 printk(KERN_INFO"Intel-IOMMU: disabled\n");
134 } else if (!strncmp(str, "igfx_off", 8)) {
135 dmar_map_gfx = 0;
136 printk(KERN_INFO
137 "Intel-IOMMU: disable GFX device mapping\n");
138 } else if (!strncmp(str, "forcedac", 8)) {
139 printk(KERN_INFO
140 "Intel-IOMMU: Forcing DAC for PCI devices\n");
141 dmar_forcedac = 1;
142 } else if (!strncmp(str, "strict", 6)) {
143 printk(KERN_INFO
144 "Intel-IOMMU: disable batched IOTLB flush\n");
145 intel_iommu_strict = 1;
146 }
147
148 str += strcspn(str, ",");
149 while (*str == ',')
150 str++;
151 }
152 return 0;
153 }
154 __setup("intel_iommu=", intel_iommu_setup);
155
156 static struct kmem_cache *iommu_domain_cache;
157 static struct kmem_cache *iommu_devinfo_cache;
158 static struct kmem_cache *iommu_iova_cache;
159
160 static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
161 {
162 unsigned int flags;
163 void *vaddr;
164
165 /* trying to avoid low memory issues */
166 flags = current->flags & PF_MEMALLOC;
167 current->flags |= PF_MEMALLOC;
168 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
169 current->flags &= (~PF_MEMALLOC | flags);
170 return vaddr;
171 }
172
173
174 static inline void *alloc_pgtable_page(void)
175 {
176 unsigned int flags;
177 void *vaddr;
178
179 /* trying to avoid low memory issues */
180 flags = current->flags & PF_MEMALLOC;
181 current->flags |= PF_MEMALLOC;
182 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
183 current->flags &= (~PF_MEMALLOC | flags);
184 return vaddr;
185 }
186
187 static inline void free_pgtable_page(void *vaddr)
188 {
189 free_page((unsigned long)vaddr);
190 }
191
192 static inline void *alloc_domain_mem(void)
193 {
194 return iommu_kmem_cache_alloc(iommu_domain_cache);
195 }
196
197 static void free_domain_mem(void *vaddr)
198 {
199 kmem_cache_free(iommu_domain_cache, vaddr);
200 }
201
202 static inline void * alloc_devinfo_mem(void)
203 {
204 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
205 }
206
207 static inline void free_devinfo_mem(void *vaddr)
208 {
209 kmem_cache_free(iommu_devinfo_cache, vaddr);
210 }
211
212 struct iova *alloc_iova_mem(void)
213 {
214 return iommu_kmem_cache_alloc(iommu_iova_cache);
215 }
216
217 void free_iova_mem(struct iova *iova)
218 {
219 kmem_cache_free(iommu_iova_cache, iova);
220 }
221
222 /* Gets context entry for a given bus and devfn */
223 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
224 u8 bus, u8 devfn)
225 {
226 struct root_entry *root;
227 struct context_entry *context;
228 unsigned long phy_addr;
229 unsigned long flags;
230
231 spin_lock_irqsave(&iommu->lock, flags);
232 root = &iommu->root_entry[bus];
233 context = get_context_addr_from_root(root);
234 if (!context) {
235 context = (struct context_entry *)alloc_pgtable_page();
236 if (!context) {
237 spin_unlock_irqrestore(&iommu->lock, flags);
238 return NULL;
239 }
240 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
241 phy_addr = virt_to_phys((void *)context);
242 set_root_value(root, phy_addr);
243 set_root_present(root);
244 __iommu_flush_cache(iommu, root, sizeof(*root));
245 }
246 spin_unlock_irqrestore(&iommu->lock, flags);
247 return &context[devfn];
248 }
249
250 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
251 {
252 struct root_entry *root;
253 struct context_entry *context;
254 int ret;
255 unsigned long flags;
256
257 spin_lock_irqsave(&iommu->lock, flags);
258 root = &iommu->root_entry[bus];
259 context = get_context_addr_from_root(root);
260 if (!context) {
261 ret = 0;
262 goto out;
263 }
264 ret = context_present(context[devfn]);
265 out:
266 spin_unlock_irqrestore(&iommu->lock, flags);
267 return ret;
268 }
269
270 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
271 {
272 struct root_entry *root;
273 struct context_entry *context;
274 unsigned long flags;
275
276 spin_lock_irqsave(&iommu->lock, flags);
277 root = &iommu->root_entry[bus];
278 context = get_context_addr_from_root(root);
279 if (context) {
280 context_clear_entry(context[devfn]);
281 __iommu_flush_cache(iommu, &context[devfn], \
282 sizeof(*context));
283 }
284 spin_unlock_irqrestore(&iommu->lock, flags);
285 }
286
287 static void free_context_table(struct intel_iommu *iommu)
288 {
289 struct root_entry *root;
290 int i;
291 unsigned long flags;
292 struct context_entry *context;
293
294 spin_lock_irqsave(&iommu->lock, flags);
295 if (!iommu->root_entry) {
296 goto out;
297 }
298 for (i = 0; i < ROOT_ENTRY_NR; i++) {
299 root = &iommu->root_entry[i];
300 context = get_context_addr_from_root(root);
301 if (context)
302 free_pgtable_page(context);
303 }
304 free_pgtable_page(iommu->root_entry);
305 iommu->root_entry = NULL;
306 out:
307 spin_unlock_irqrestore(&iommu->lock, flags);
308 }
309
310 /* page table handling */
311 #define LEVEL_STRIDE (9)
312 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
313
314 static inline int agaw_to_level(int agaw)
315 {
316 return agaw + 2;
317 }
318
319 static inline int agaw_to_width(int agaw)
320 {
321 return 30 + agaw * LEVEL_STRIDE;
322
323 }
324
325 static inline int width_to_agaw(int width)
326 {
327 return (width - 30) / LEVEL_STRIDE;
328 }
329
330 static inline unsigned int level_to_offset_bits(int level)
331 {
332 return (12 + (level - 1) * LEVEL_STRIDE);
333 }
334
335 static inline int address_level_offset(u64 addr, int level)
336 {
337 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
338 }
339
340 static inline u64 level_mask(int level)
341 {
342 return ((u64)-1 << level_to_offset_bits(level));
343 }
344
345 static inline u64 level_size(int level)
346 {
347 return ((u64)1 << level_to_offset_bits(level));
348 }
349
350 static inline u64 align_to_level(u64 addr, int level)
351 {
352 return ((addr + level_size(level) - 1) & level_mask(level));
353 }
354
355 static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
356 {
357 int addr_width = agaw_to_width(domain->agaw);
358 struct dma_pte *parent, *pte = NULL;
359 int level = agaw_to_level(domain->agaw);
360 int offset;
361 unsigned long flags;
362
363 BUG_ON(!domain->pgd);
364
365 addr &= (((u64)1) << addr_width) - 1;
366 parent = domain->pgd;
367
368 spin_lock_irqsave(&domain->mapping_lock, flags);
369 while (level > 0) {
370 void *tmp_page;
371
372 offset = address_level_offset(addr, level);
373 pte = &parent[offset];
374 if (level == 1)
375 break;
376
377 if (!dma_pte_present(*pte)) {
378 tmp_page = alloc_pgtable_page();
379
380 if (!tmp_page) {
381 spin_unlock_irqrestore(&domain->mapping_lock,
382 flags);
383 return NULL;
384 }
385 __iommu_flush_cache(domain->iommu, tmp_page,
386 PAGE_SIZE);
387 dma_set_pte_addr(*pte, virt_to_phys(tmp_page));
388 /*
389 * high level table always sets r/w, last level page
390 * table control read/write
391 */
392 dma_set_pte_readable(*pte);
393 dma_set_pte_writable(*pte);
394 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
395 }
396 parent = phys_to_virt(dma_pte_addr(*pte));
397 level--;
398 }
399
400 spin_unlock_irqrestore(&domain->mapping_lock, flags);
401 return pte;
402 }
403
404 /* return address's pte at specific level */
405 static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
406 int level)
407 {
408 struct dma_pte *parent, *pte = NULL;
409 int total = agaw_to_level(domain->agaw);
410 int offset;
411
412 parent = domain->pgd;
413 while (level <= total) {
414 offset = address_level_offset(addr, total);
415 pte = &parent[offset];
416 if (level == total)
417 return pte;
418
419 if (!dma_pte_present(*pte))
420 break;
421 parent = phys_to_virt(dma_pte_addr(*pte));
422 total--;
423 }
424 return NULL;
425 }
426
427 /* clear one page's page table */
428 static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
429 {
430 struct dma_pte *pte = NULL;
431
432 /* get last level pte */
433 pte = dma_addr_level_pte(domain, addr, 1);
434
435 if (pte) {
436 dma_clear_pte(*pte);
437 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
438 }
439 }
440
441 /* clear last level pte, a tlb flush should be followed */
442 static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
443 {
444 int addr_width = agaw_to_width(domain->agaw);
445
446 start &= (((u64)1) << addr_width) - 1;
447 end &= (((u64)1) << addr_width) - 1;
448 /* in case it's partial page */
449 start = PAGE_ALIGN(start);
450 end &= PAGE_MASK;
451
452 /* we don't need lock here, nobody else touches the iova range */
453 while (start < end) {
454 dma_pte_clear_one(domain, start);
455 start += VTD_PAGE_SIZE;
456 }
457 }
458
459 /* free page table pages. last level pte should already be cleared */
460 static void dma_pte_free_pagetable(struct dmar_domain *domain,
461 u64 start, u64 end)
462 {
463 int addr_width = agaw_to_width(domain->agaw);
464 struct dma_pte *pte;
465 int total = agaw_to_level(domain->agaw);
466 int level;
467 u64 tmp;
468
469 start &= (((u64)1) << addr_width) - 1;
470 end &= (((u64)1) << addr_width) - 1;
471
472 /* we don't need lock here, nobody else touches the iova range */
473 level = 2;
474 while (level <= total) {
475 tmp = align_to_level(start, level);
476 if (tmp >= end || (tmp + level_size(level) > end))
477 return;
478
479 while (tmp < end) {
480 pte = dma_addr_level_pte(domain, tmp, level);
481 if (pte) {
482 free_pgtable_page(
483 phys_to_virt(dma_pte_addr(*pte)));
484 dma_clear_pte(*pte);
485 __iommu_flush_cache(domain->iommu,
486 pte, sizeof(*pte));
487 }
488 tmp += level_size(level);
489 }
490 level++;
491 }
492 /* free pgd */
493 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
494 free_pgtable_page(domain->pgd);
495 domain->pgd = NULL;
496 }
497 }
498
499 /* iommu handling */
500 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
501 {
502 struct root_entry *root;
503 unsigned long flags;
504
505 root = (struct root_entry *)alloc_pgtable_page();
506 if (!root)
507 return -ENOMEM;
508
509 __iommu_flush_cache(iommu, root, ROOT_SIZE);
510
511 spin_lock_irqsave(&iommu->lock, flags);
512 iommu->root_entry = root;
513 spin_unlock_irqrestore(&iommu->lock, flags);
514
515 return 0;
516 }
517
518 static void iommu_set_root_entry(struct intel_iommu *iommu)
519 {
520 void *addr;
521 u32 cmd, sts;
522 unsigned long flag;
523
524 addr = iommu->root_entry;
525
526 spin_lock_irqsave(&iommu->register_lock, flag);
527 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
528
529 cmd = iommu->gcmd | DMA_GCMD_SRTP;
530 writel(cmd, iommu->reg + DMAR_GCMD_REG);
531
532 /* Make sure hardware complete it */
533 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
534 readl, (sts & DMA_GSTS_RTPS), sts);
535
536 spin_unlock_irqrestore(&iommu->register_lock, flag);
537 }
538
539 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
540 {
541 u32 val;
542 unsigned long flag;
543
544 if (!cap_rwbf(iommu->cap))
545 return;
546 val = iommu->gcmd | DMA_GCMD_WBF;
547
548 spin_lock_irqsave(&iommu->register_lock, flag);
549 writel(val, iommu->reg + DMAR_GCMD_REG);
550
551 /* Make sure hardware complete it */
552 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
553 readl, (!(val & DMA_GSTS_WBFS)), val);
554
555 spin_unlock_irqrestore(&iommu->register_lock, flag);
556 }
557
558 /* return value determine if we need a write buffer flush */
559 static int __iommu_flush_context(struct intel_iommu *iommu,
560 u16 did, u16 source_id, u8 function_mask, u64 type,
561 int non_present_entry_flush)
562 {
563 u64 val = 0;
564 unsigned long flag;
565
566 /*
567 * In the non-present entry flush case, if hardware doesn't cache
568 * non-present entry we do nothing and if hardware cache non-present
569 * entry, we flush entries of domain 0 (the domain id is used to cache
570 * any non-present entries)
571 */
572 if (non_present_entry_flush) {
573 if (!cap_caching_mode(iommu->cap))
574 return 1;
575 else
576 did = 0;
577 }
578
579 switch (type) {
580 case DMA_CCMD_GLOBAL_INVL:
581 val = DMA_CCMD_GLOBAL_INVL;
582 break;
583 case DMA_CCMD_DOMAIN_INVL:
584 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
585 break;
586 case DMA_CCMD_DEVICE_INVL:
587 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
588 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
589 break;
590 default:
591 BUG();
592 }
593 val |= DMA_CCMD_ICC;
594
595 spin_lock_irqsave(&iommu->register_lock, flag);
596 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
597
598 /* Make sure hardware complete it */
599 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
600 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
601
602 spin_unlock_irqrestore(&iommu->register_lock, flag);
603
604 /* flush context entry will implicitly flush write buffer */
605 return 0;
606 }
607
608 /* return value determine if we need a write buffer flush */
609 static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
610 u64 addr, unsigned int size_order, u64 type,
611 int non_present_entry_flush)
612 {
613 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
614 u64 val = 0, val_iva = 0;
615 unsigned long flag;
616
617 /*
618 * In the non-present entry flush case, if hardware doesn't cache
619 * non-present entry we do nothing and if hardware cache non-present
620 * entry, we flush entries of domain 0 (the domain id is used to cache
621 * any non-present entries)
622 */
623 if (non_present_entry_flush) {
624 if (!cap_caching_mode(iommu->cap))
625 return 1;
626 else
627 did = 0;
628 }
629
630 switch (type) {
631 case DMA_TLB_GLOBAL_FLUSH:
632 /* global flush doesn't need set IVA_REG */
633 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
634 break;
635 case DMA_TLB_DSI_FLUSH:
636 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
637 break;
638 case DMA_TLB_PSI_FLUSH:
639 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
640 /* Note: always flush non-leaf currently */
641 val_iva = size_order | addr;
642 break;
643 default:
644 BUG();
645 }
646 /* Note: set drain read/write */
647 #if 0
648 /*
649 * This is probably to be super secure.. Looks like we can
650 * ignore it without any impact.
651 */
652 if (cap_read_drain(iommu->cap))
653 val |= DMA_TLB_READ_DRAIN;
654 #endif
655 if (cap_write_drain(iommu->cap))
656 val |= DMA_TLB_WRITE_DRAIN;
657
658 spin_lock_irqsave(&iommu->register_lock, flag);
659 /* Note: Only uses first TLB reg currently */
660 if (val_iva)
661 dmar_writeq(iommu->reg + tlb_offset, val_iva);
662 dmar_writeq(iommu->reg + tlb_offset + 8, val);
663
664 /* Make sure hardware complete it */
665 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
666 dmar_readq, (!(val & DMA_TLB_IVT)), val);
667
668 spin_unlock_irqrestore(&iommu->register_lock, flag);
669
670 /* check IOTLB invalidation granularity */
671 if (DMA_TLB_IAIG(val) == 0)
672 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
673 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
674 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
675 (unsigned long long)DMA_TLB_IIRG(type),
676 (unsigned long long)DMA_TLB_IAIG(val));
677 /* flush iotlb entry will implicitly flush write buffer */
678 return 0;
679 }
680
681 static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
682 u64 addr, unsigned int pages, int non_present_entry_flush)
683 {
684 unsigned int mask;
685
686 BUG_ON(addr & (~VTD_PAGE_MASK));
687 BUG_ON(pages == 0);
688
689 /* Fallback to domain selective flush if no PSI support */
690 if (!cap_pgsel_inv(iommu->cap))
691 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
692 DMA_TLB_DSI_FLUSH,
693 non_present_entry_flush);
694
695 /*
696 * PSI requires page size to be 2 ^ x, and the base address is naturally
697 * aligned to the size
698 */
699 mask = ilog2(__roundup_pow_of_two(pages));
700 /* Fallback to domain selective flush if size is too big */
701 if (mask > cap_max_amask_val(iommu->cap))
702 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
703 DMA_TLB_DSI_FLUSH, non_present_entry_flush);
704
705 return iommu->flush.flush_iotlb(iommu, did, addr, mask,
706 DMA_TLB_PSI_FLUSH,
707 non_present_entry_flush);
708 }
709
710 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
711 {
712 u32 pmen;
713 unsigned long flags;
714
715 spin_lock_irqsave(&iommu->register_lock, flags);
716 pmen = readl(iommu->reg + DMAR_PMEN_REG);
717 pmen &= ~DMA_PMEN_EPM;
718 writel(pmen, iommu->reg + DMAR_PMEN_REG);
719
720 /* wait for the protected region status bit to clear */
721 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
722 readl, !(pmen & DMA_PMEN_PRS), pmen);
723
724 spin_unlock_irqrestore(&iommu->register_lock, flags);
725 }
726
727 static int iommu_enable_translation(struct intel_iommu *iommu)
728 {
729 u32 sts;
730 unsigned long flags;
731
732 spin_lock_irqsave(&iommu->register_lock, flags);
733 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
734
735 /* Make sure hardware complete it */
736 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
737 readl, (sts & DMA_GSTS_TES), sts);
738
739 iommu->gcmd |= DMA_GCMD_TE;
740 spin_unlock_irqrestore(&iommu->register_lock, flags);
741 return 0;
742 }
743
744 static int iommu_disable_translation(struct intel_iommu *iommu)
745 {
746 u32 sts;
747 unsigned long flag;
748
749 spin_lock_irqsave(&iommu->register_lock, flag);
750 iommu->gcmd &= ~DMA_GCMD_TE;
751 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
752
753 /* Make sure hardware complete it */
754 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
755 readl, (!(sts & DMA_GSTS_TES)), sts);
756
757 spin_unlock_irqrestore(&iommu->register_lock, flag);
758 return 0;
759 }
760
761 /* iommu interrupt handling. Most stuff are MSI-like. */
762
763 static const char *fault_reason_strings[] =
764 {
765 "Software",
766 "Present bit in root entry is clear",
767 "Present bit in context entry is clear",
768 "Invalid context entry",
769 "Access beyond MGAW",
770 "PTE Write access is not set",
771 "PTE Read access is not set",
772 "Next page table ptr is invalid",
773 "Root table address invalid",
774 "Context table ptr is invalid",
775 "non-zero reserved fields in RTP",
776 "non-zero reserved fields in CTP",
777 "non-zero reserved fields in PTE",
778 };
779 #define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
780
781 const char *dmar_get_fault_reason(u8 fault_reason)
782 {
783 if (fault_reason > MAX_FAULT_REASON_IDX)
784 return "Unknown";
785 else
786 return fault_reason_strings[fault_reason];
787 }
788
789 void dmar_msi_unmask(unsigned int irq)
790 {
791 struct intel_iommu *iommu = get_irq_data(irq);
792 unsigned long flag;
793
794 /* unmask it */
795 spin_lock_irqsave(&iommu->register_lock, flag);
796 writel(0, iommu->reg + DMAR_FECTL_REG);
797 /* Read a reg to force flush the post write */
798 readl(iommu->reg + DMAR_FECTL_REG);
799 spin_unlock_irqrestore(&iommu->register_lock, flag);
800 }
801
802 void dmar_msi_mask(unsigned int irq)
803 {
804 unsigned long flag;
805 struct intel_iommu *iommu = get_irq_data(irq);
806
807 /* mask it */
808 spin_lock_irqsave(&iommu->register_lock, flag);
809 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
810 /* Read a reg to force flush the post write */
811 readl(iommu->reg + DMAR_FECTL_REG);
812 spin_unlock_irqrestore(&iommu->register_lock, flag);
813 }
814
815 void dmar_msi_write(int irq, struct msi_msg *msg)
816 {
817 struct intel_iommu *iommu = get_irq_data(irq);
818 unsigned long flag;
819
820 spin_lock_irqsave(&iommu->register_lock, flag);
821 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
822 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
823 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
824 spin_unlock_irqrestore(&iommu->register_lock, flag);
825 }
826
827 void dmar_msi_read(int irq, struct msi_msg *msg)
828 {
829 struct intel_iommu *iommu = get_irq_data(irq);
830 unsigned long flag;
831
832 spin_lock_irqsave(&iommu->register_lock, flag);
833 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
834 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
835 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
836 spin_unlock_irqrestore(&iommu->register_lock, flag);
837 }
838
839 static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
840 u8 fault_reason, u16 source_id, unsigned long long addr)
841 {
842 const char *reason;
843
844 reason = dmar_get_fault_reason(fault_reason);
845
846 printk(KERN_ERR
847 "DMAR:[%s] Request device [%02x:%02x.%d] "
848 "fault addr %llx \n"
849 "DMAR:[fault reason %02d] %s\n",
850 (type ? "DMA Read" : "DMA Write"),
851 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
852 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
853 return 0;
854 }
855
856 #define PRIMARY_FAULT_REG_LEN (16)
857 static irqreturn_t iommu_page_fault(int irq, void *dev_id)
858 {
859 struct intel_iommu *iommu = dev_id;
860 int reg, fault_index;
861 u32 fault_status;
862 unsigned long flag;
863
864 spin_lock_irqsave(&iommu->register_lock, flag);
865 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
866
867 /* TBD: ignore advanced fault log currently */
868 if (!(fault_status & DMA_FSTS_PPF))
869 goto clear_overflow;
870
871 fault_index = dma_fsts_fault_record_index(fault_status);
872 reg = cap_fault_reg_offset(iommu->cap);
873 while (1) {
874 u8 fault_reason;
875 u16 source_id;
876 u64 guest_addr;
877 int type;
878 u32 data;
879
880 /* highest 32 bits */
881 data = readl(iommu->reg + reg +
882 fault_index * PRIMARY_FAULT_REG_LEN + 12);
883 if (!(data & DMA_FRCD_F))
884 break;
885
886 fault_reason = dma_frcd_fault_reason(data);
887 type = dma_frcd_type(data);
888
889 data = readl(iommu->reg + reg +
890 fault_index * PRIMARY_FAULT_REG_LEN + 8);
891 source_id = dma_frcd_source_id(data);
892
893 guest_addr = dmar_readq(iommu->reg + reg +
894 fault_index * PRIMARY_FAULT_REG_LEN);
895 guest_addr = dma_frcd_page_addr(guest_addr);
896 /* clear the fault */
897 writel(DMA_FRCD_F, iommu->reg + reg +
898 fault_index * PRIMARY_FAULT_REG_LEN + 12);
899
900 spin_unlock_irqrestore(&iommu->register_lock, flag);
901
902 iommu_page_fault_do_one(iommu, type, fault_reason,
903 source_id, guest_addr);
904
905 fault_index++;
906 if (fault_index > cap_num_fault_regs(iommu->cap))
907 fault_index = 0;
908 spin_lock_irqsave(&iommu->register_lock, flag);
909 }
910 clear_overflow:
911 /* clear primary fault overflow */
912 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
913 if (fault_status & DMA_FSTS_PFO)
914 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
915
916 spin_unlock_irqrestore(&iommu->register_lock, flag);
917 return IRQ_HANDLED;
918 }
919
920 int dmar_set_interrupt(struct intel_iommu *iommu)
921 {
922 int irq, ret;
923
924 irq = create_irq();
925 if (!irq) {
926 printk(KERN_ERR "IOMMU: no free vectors\n");
927 return -EINVAL;
928 }
929
930 set_irq_data(irq, iommu);
931 iommu->irq = irq;
932
933 ret = arch_setup_dmar_msi(irq);
934 if (ret) {
935 set_irq_data(irq, NULL);
936 iommu->irq = 0;
937 destroy_irq(irq);
938 return 0;
939 }
940
941 /* Force fault register is cleared */
942 iommu_page_fault(irq, iommu);
943
944 ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
945 if (ret)
946 printk(KERN_ERR "IOMMU: can't request irq\n");
947 return ret;
948 }
949
950 static int iommu_init_domains(struct intel_iommu *iommu)
951 {
952 unsigned long ndomains;
953 unsigned long nlongs;
954
955 ndomains = cap_ndoms(iommu->cap);
956 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
957 nlongs = BITS_TO_LONGS(ndomains);
958
959 /* TBD: there might be 64K domains,
960 * consider other allocation for future chip
961 */
962 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
963 if (!iommu->domain_ids) {
964 printk(KERN_ERR "Allocating domain id array failed\n");
965 return -ENOMEM;
966 }
967 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
968 GFP_KERNEL);
969 if (!iommu->domains) {
970 printk(KERN_ERR "Allocating domain array failed\n");
971 kfree(iommu->domain_ids);
972 return -ENOMEM;
973 }
974
975 spin_lock_init(&iommu->lock);
976
977 /*
978 * if Caching mode is set, then invalid translations are tagged
979 * with domainid 0. Hence we need to pre-allocate it.
980 */
981 if (cap_caching_mode(iommu->cap))
982 set_bit(0, iommu->domain_ids);
983 return 0;
984 }
985
986
987 static void domain_exit(struct dmar_domain *domain);
988
989 void free_dmar_iommu(struct intel_iommu *iommu)
990 {
991 struct dmar_domain *domain;
992 int i;
993
994 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
995 for (; i < cap_ndoms(iommu->cap); ) {
996 domain = iommu->domains[i];
997 clear_bit(i, iommu->domain_ids);
998 domain_exit(domain);
999 i = find_next_bit(iommu->domain_ids,
1000 cap_ndoms(iommu->cap), i+1);
1001 }
1002
1003 if (iommu->gcmd & DMA_GCMD_TE)
1004 iommu_disable_translation(iommu);
1005
1006 if (iommu->irq) {
1007 set_irq_data(iommu->irq, NULL);
1008 /* This will mask the irq */
1009 free_irq(iommu->irq, iommu);
1010 destroy_irq(iommu->irq);
1011 }
1012
1013 kfree(iommu->domains);
1014 kfree(iommu->domain_ids);
1015
1016 /* free context mapping */
1017 free_context_table(iommu);
1018 }
1019
1020 static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1021 {
1022 unsigned long num;
1023 unsigned long ndomains;
1024 struct dmar_domain *domain;
1025 unsigned long flags;
1026
1027 domain = alloc_domain_mem();
1028 if (!domain)
1029 return NULL;
1030
1031 ndomains = cap_ndoms(iommu->cap);
1032
1033 spin_lock_irqsave(&iommu->lock, flags);
1034 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1035 if (num >= ndomains) {
1036 spin_unlock_irqrestore(&iommu->lock, flags);
1037 free_domain_mem(domain);
1038 printk(KERN_ERR "IOMMU: no free domain ids\n");
1039 return NULL;
1040 }
1041
1042 set_bit(num, iommu->domain_ids);
1043 domain->id = num;
1044 domain->iommu = iommu;
1045 iommu->domains[num] = domain;
1046 spin_unlock_irqrestore(&iommu->lock, flags);
1047
1048 return domain;
1049 }
1050
1051 static void iommu_free_domain(struct dmar_domain *domain)
1052 {
1053 unsigned long flags;
1054
1055 spin_lock_irqsave(&domain->iommu->lock, flags);
1056 clear_bit(domain->id, domain->iommu->domain_ids);
1057 spin_unlock_irqrestore(&domain->iommu->lock, flags);
1058 }
1059
1060 static struct iova_domain reserved_iova_list;
1061 static struct lock_class_key reserved_alloc_key;
1062 static struct lock_class_key reserved_rbtree_key;
1063
1064 static void dmar_init_reserved_ranges(void)
1065 {
1066 struct pci_dev *pdev = NULL;
1067 struct iova *iova;
1068 int i;
1069 u64 addr, size;
1070
1071 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1072
1073 lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
1074 &reserved_alloc_key);
1075 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1076 &reserved_rbtree_key);
1077
1078 /* IOAPIC ranges shouldn't be accessed by DMA */
1079 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1080 IOVA_PFN(IOAPIC_RANGE_END));
1081 if (!iova)
1082 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1083
1084 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1085 for_each_pci_dev(pdev) {
1086 struct resource *r;
1087
1088 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1089 r = &pdev->resource[i];
1090 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1091 continue;
1092 addr = r->start;
1093 addr &= PAGE_MASK;
1094 size = r->end - addr;
1095 size = PAGE_ALIGN(size);
1096 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1097 IOVA_PFN(size + addr) - 1);
1098 if (!iova)
1099 printk(KERN_ERR "Reserve iova failed\n");
1100 }
1101 }
1102
1103 }
1104
1105 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1106 {
1107 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1108 }
1109
1110 static inline int guestwidth_to_adjustwidth(int gaw)
1111 {
1112 int agaw;
1113 int r = (gaw - 12) % 9;
1114
1115 if (r == 0)
1116 agaw = gaw;
1117 else
1118 agaw = gaw + 9 - r;
1119 if (agaw > 64)
1120 agaw = 64;
1121 return agaw;
1122 }
1123
1124 static int domain_init(struct dmar_domain *domain, int guest_width)
1125 {
1126 struct intel_iommu *iommu;
1127 int adjust_width, agaw;
1128 unsigned long sagaw;
1129
1130 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1131 spin_lock_init(&domain->mapping_lock);
1132
1133 domain_reserve_special_ranges(domain);
1134
1135 /* calculate AGAW */
1136 iommu = domain->iommu;
1137 if (guest_width > cap_mgaw(iommu->cap))
1138 guest_width = cap_mgaw(iommu->cap);
1139 domain->gaw = guest_width;
1140 adjust_width = guestwidth_to_adjustwidth(guest_width);
1141 agaw = width_to_agaw(adjust_width);
1142 sagaw = cap_sagaw(iommu->cap);
1143 if (!test_bit(agaw, &sagaw)) {
1144 /* hardware doesn't support it, choose a bigger one */
1145 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1146 agaw = find_next_bit(&sagaw, 5, agaw);
1147 if (agaw >= 5)
1148 return -ENODEV;
1149 }
1150 domain->agaw = agaw;
1151 INIT_LIST_HEAD(&domain->devices);
1152
1153 /* always allocate the top pgd */
1154 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1155 if (!domain->pgd)
1156 return -ENOMEM;
1157 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1158 return 0;
1159 }
1160
1161 static void domain_exit(struct dmar_domain *domain)
1162 {
1163 u64 end;
1164
1165 /* Domain 0 is reserved, so dont process it */
1166 if (!domain)
1167 return;
1168
1169 domain_remove_dev_info(domain);
1170 /* destroy iovas */
1171 put_iova_domain(&domain->iovad);
1172 end = DOMAIN_MAX_ADDR(domain->gaw);
1173 end = end & (~PAGE_MASK);
1174
1175 /* clear ptes */
1176 dma_pte_clear_range(domain, 0, end);
1177
1178 /* free page tables */
1179 dma_pte_free_pagetable(domain, 0, end);
1180
1181 iommu_free_domain(domain);
1182 free_domain_mem(domain);
1183 }
1184
1185 static int domain_context_mapping_one(struct dmar_domain *domain,
1186 u8 bus, u8 devfn)
1187 {
1188 struct context_entry *context;
1189 struct intel_iommu *iommu = domain->iommu;
1190 unsigned long flags;
1191
1192 pr_debug("Set context mapping for %02x:%02x.%d\n",
1193 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1194 BUG_ON(!domain->pgd);
1195 context = device_to_context_entry(iommu, bus, devfn);
1196 if (!context)
1197 return -ENOMEM;
1198 spin_lock_irqsave(&iommu->lock, flags);
1199 if (context_present(*context)) {
1200 spin_unlock_irqrestore(&iommu->lock, flags);
1201 return 0;
1202 }
1203
1204 context_set_domain_id(*context, domain->id);
1205 context_set_address_width(*context, domain->agaw);
1206 context_set_address_root(*context, virt_to_phys(domain->pgd));
1207 context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
1208 context_set_fault_enable(*context);
1209 context_set_present(*context);
1210 __iommu_flush_cache(iommu, context, sizeof(*context));
1211
1212 /* it's a non-present to present mapping */
1213 if (iommu->flush.flush_context(iommu, domain->id,
1214 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT,
1215 DMA_CCMD_DEVICE_INVL, 1))
1216 iommu_flush_write_buffer(iommu);
1217 else
1218 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
1219
1220 spin_unlock_irqrestore(&iommu->lock, flags);
1221 return 0;
1222 }
1223
1224 static int
1225 domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1226 {
1227 int ret;
1228 struct pci_dev *tmp, *parent;
1229
1230 ret = domain_context_mapping_one(domain, pdev->bus->number,
1231 pdev->devfn);
1232 if (ret)
1233 return ret;
1234
1235 /* dependent device mapping */
1236 tmp = pci_find_upstream_pcie_bridge(pdev);
1237 if (!tmp)
1238 return 0;
1239 /* Secondary interface's bus number and devfn 0 */
1240 parent = pdev->bus->self;
1241 while (parent != tmp) {
1242 ret = domain_context_mapping_one(domain, parent->bus->number,
1243 parent->devfn);
1244 if (ret)
1245 return ret;
1246 parent = parent->bus->self;
1247 }
1248 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1249 return domain_context_mapping_one(domain,
1250 tmp->subordinate->number, 0);
1251 else /* this is a legacy PCI bridge */
1252 return domain_context_mapping_one(domain,
1253 tmp->bus->number, tmp->devfn);
1254 }
1255
1256 static int domain_context_mapped(struct dmar_domain *domain,
1257 struct pci_dev *pdev)
1258 {
1259 int ret;
1260 struct pci_dev *tmp, *parent;
1261
1262 ret = device_context_mapped(domain->iommu,
1263 pdev->bus->number, pdev->devfn);
1264 if (!ret)
1265 return ret;
1266 /* dependent device mapping */
1267 tmp = pci_find_upstream_pcie_bridge(pdev);
1268 if (!tmp)
1269 return ret;
1270 /* Secondary interface's bus number and devfn 0 */
1271 parent = pdev->bus->self;
1272 while (parent != tmp) {
1273 ret = device_context_mapped(domain->iommu, parent->bus->number,
1274 parent->devfn);
1275 if (!ret)
1276 return ret;
1277 parent = parent->bus->self;
1278 }
1279 if (tmp->is_pcie)
1280 return device_context_mapped(domain->iommu,
1281 tmp->subordinate->number, 0);
1282 else
1283 return device_context_mapped(domain->iommu,
1284 tmp->bus->number, tmp->devfn);
1285 }
1286
1287 static int
1288 domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1289 u64 hpa, size_t size, int prot)
1290 {
1291 u64 start_pfn, end_pfn;
1292 struct dma_pte *pte;
1293 int index;
1294 int addr_width = agaw_to_width(domain->agaw);
1295
1296 hpa &= (((u64)1) << addr_width) - 1;
1297
1298 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1299 return -EINVAL;
1300 iova &= PAGE_MASK;
1301 start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT;
1302 end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT;
1303 index = 0;
1304 while (start_pfn < end_pfn) {
1305 pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index);
1306 if (!pte)
1307 return -ENOMEM;
1308 /* We don't need lock here, nobody else
1309 * touches the iova range
1310 */
1311 BUG_ON(dma_pte_addr(*pte));
1312 dma_set_pte_addr(*pte, start_pfn << VTD_PAGE_SHIFT);
1313 dma_set_pte_prot(*pte, prot);
1314 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
1315 start_pfn++;
1316 index++;
1317 }
1318 return 0;
1319 }
1320
1321 static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
1322 {
1323 clear_context_table(domain->iommu, bus, devfn);
1324 domain->iommu->flush.flush_context(domain->iommu, 0, 0, 0,
1325 DMA_CCMD_GLOBAL_INVL, 0);
1326 domain->iommu->flush.flush_iotlb(domain->iommu, 0, 0, 0,
1327 DMA_TLB_GLOBAL_FLUSH, 0);
1328 }
1329
1330 static void domain_remove_dev_info(struct dmar_domain *domain)
1331 {
1332 struct device_domain_info *info;
1333 unsigned long flags;
1334
1335 spin_lock_irqsave(&device_domain_lock, flags);
1336 while (!list_empty(&domain->devices)) {
1337 info = list_entry(domain->devices.next,
1338 struct device_domain_info, link);
1339 list_del(&info->link);
1340 list_del(&info->global);
1341 if (info->dev)
1342 info->dev->dev.archdata.iommu = NULL;
1343 spin_unlock_irqrestore(&device_domain_lock, flags);
1344
1345 detach_domain_for_dev(info->domain, info->bus, info->devfn);
1346 free_devinfo_mem(info);
1347
1348 spin_lock_irqsave(&device_domain_lock, flags);
1349 }
1350 spin_unlock_irqrestore(&device_domain_lock, flags);
1351 }
1352
1353 /*
1354 * find_domain
1355 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1356 */
1357 static struct dmar_domain *
1358 find_domain(struct pci_dev *pdev)
1359 {
1360 struct device_domain_info *info;
1361
1362 /* No lock here, assumes no domain exit in normal case */
1363 info = pdev->dev.archdata.iommu;
1364 if (info)
1365 return info->domain;
1366 return NULL;
1367 }
1368
1369 /* domain is initialized */
1370 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1371 {
1372 struct dmar_domain *domain, *found = NULL;
1373 struct intel_iommu *iommu;
1374 struct dmar_drhd_unit *drhd;
1375 struct device_domain_info *info, *tmp;
1376 struct pci_dev *dev_tmp;
1377 unsigned long flags;
1378 int bus = 0, devfn = 0;
1379
1380 domain = find_domain(pdev);
1381 if (domain)
1382 return domain;
1383
1384 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1385 if (dev_tmp) {
1386 if (dev_tmp->is_pcie) {
1387 bus = dev_tmp->subordinate->number;
1388 devfn = 0;
1389 } else {
1390 bus = dev_tmp->bus->number;
1391 devfn = dev_tmp->devfn;
1392 }
1393 spin_lock_irqsave(&device_domain_lock, flags);
1394 list_for_each_entry(info, &device_domain_list, global) {
1395 if (info->bus == bus && info->devfn == devfn) {
1396 found = info->domain;
1397 break;
1398 }
1399 }
1400 spin_unlock_irqrestore(&device_domain_lock, flags);
1401 /* pcie-pci bridge already has a domain, uses it */
1402 if (found) {
1403 domain = found;
1404 goto found_domain;
1405 }
1406 }
1407
1408 /* Allocate new domain for the device */
1409 drhd = dmar_find_matched_drhd_unit(pdev);
1410 if (!drhd) {
1411 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1412 pci_name(pdev));
1413 return NULL;
1414 }
1415 iommu = drhd->iommu;
1416
1417 domain = iommu_alloc_domain(iommu);
1418 if (!domain)
1419 goto error;
1420
1421 if (domain_init(domain, gaw)) {
1422 domain_exit(domain);
1423 goto error;
1424 }
1425
1426 /* register pcie-to-pci device */
1427 if (dev_tmp) {
1428 info = alloc_devinfo_mem();
1429 if (!info) {
1430 domain_exit(domain);
1431 goto error;
1432 }
1433 info->bus = bus;
1434 info->devfn = devfn;
1435 info->dev = NULL;
1436 info->domain = domain;
1437 /* This domain is shared by devices under p2p bridge */
1438 domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES;
1439
1440 /* pcie-to-pci bridge already has a domain, uses it */
1441 found = NULL;
1442 spin_lock_irqsave(&device_domain_lock, flags);
1443 list_for_each_entry(tmp, &device_domain_list, global) {
1444 if (tmp->bus == bus && tmp->devfn == devfn) {
1445 found = tmp->domain;
1446 break;
1447 }
1448 }
1449 if (found) {
1450 free_devinfo_mem(info);
1451 domain_exit(domain);
1452 domain = found;
1453 } else {
1454 list_add(&info->link, &domain->devices);
1455 list_add(&info->global, &device_domain_list);
1456 }
1457 spin_unlock_irqrestore(&device_domain_lock, flags);
1458 }
1459
1460 found_domain:
1461 info = alloc_devinfo_mem();
1462 if (!info)
1463 goto error;
1464 info->bus = pdev->bus->number;
1465 info->devfn = pdev->devfn;
1466 info->dev = pdev;
1467 info->domain = domain;
1468 spin_lock_irqsave(&device_domain_lock, flags);
1469 /* somebody is fast */
1470 found = find_domain(pdev);
1471 if (found != NULL) {
1472 spin_unlock_irqrestore(&device_domain_lock, flags);
1473 if (found != domain) {
1474 domain_exit(domain);
1475 domain = found;
1476 }
1477 free_devinfo_mem(info);
1478 return domain;
1479 }
1480 list_add(&info->link, &domain->devices);
1481 list_add(&info->global, &device_domain_list);
1482 pdev->dev.archdata.iommu = info;
1483 spin_unlock_irqrestore(&device_domain_lock, flags);
1484 return domain;
1485 error:
1486 /* recheck it here, maybe others set it */
1487 return find_domain(pdev);
1488 }
1489
1490 static int iommu_prepare_identity_map(struct pci_dev *pdev,
1491 unsigned long long start,
1492 unsigned long long end)
1493 {
1494 struct dmar_domain *domain;
1495 unsigned long size;
1496 unsigned long long base;
1497 int ret;
1498
1499 printk(KERN_INFO
1500 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1501 pci_name(pdev), start, end);
1502 /* page table init */
1503 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1504 if (!domain)
1505 return -ENOMEM;
1506
1507 /* The address might not be aligned */
1508 base = start & PAGE_MASK;
1509 size = end - base;
1510 size = PAGE_ALIGN(size);
1511 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1512 IOVA_PFN(base + size) - 1)) {
1513 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1514 ret = -ENOMEM;
1515 goto error;
1516 }
1517
1518 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1519 size, base, pci_name(pdev));
1520 /*
1521 * RMRR range might have overlap with physical memory range,
1522 * clear it first
1523 */
1524 dma_pte_clear_range(domain, base, base + size);
1525
1526 ret = domain_page_mapping(domain, base, base, size,
1527 DMA_PTE_READ|DMA_PTE_WRITE);
1528 if (ret)
1529 goto error;
1530
1531 /* context entry init */
1532 ret = domain_context_mapping(domain, pdev);
1533 if (!ret)
1534 return 0;
1535 error:
1536 domain_exit(domain);
1537 return ret;
1538
1539 }
1540
1541 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1542 struct pci_dev *pdev)
1543 {
1544 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1545 return 0;
1546 return iommu_prepare_identity_map(pdev, rmrr->base_address,
1547 rmrr->end_address + 1);
1548 }
1549
1550 #ifdef CONFIG_DMAR_GFX_WA
1551 struct iommu_prepare_data {
1552 struct pci_dev *pdev;
1553 int ret;
1554 };
1555
1556 static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1557 unsigned long end_pfn, void *datax)
1558 {
1559 struct iommu_prepare_data *data;
1560
1561 data = (struct iommu_prepare_data *)datax;
1562
1563 data->ret = iommu_prepare_identity_map(data->pdev,
1564 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1565 return data->ret;
1566
1567 }
1568
1569 static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1570 {
1571 int nid;
1572 struct iommu_prepare_data data;
1573
1574 data.pdev = pdev;
1575 data.ret = 0;
1576
1577 for_each_online_node(nid) {
1578 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1579 if (data.ret)
1580 return data.ret;
1581 }
1582 return data.ret;
1583 }
1584
1585 static void __init iommu_prepare_gfx_mapping(void)
1586 {
1587 struct pci_dev *pdev = NULL;
1588 int ret;
1589
1590 for_each_pci_dev(pdev) {
1591 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
1592 !IS_GFX_DEVICE(pdev))
1593 continue;
1594 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1595 pci_name(pdev));
1596 ret = iommu_prepare_with_active_regions(pdev);
1597 if (ret)
1598 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1599 }
1600 }
1601 #endif
1602
1603 #ifdef CONFIG_DMAR_FLOPPY_WA
1604 static inline void iommu_prepare_isa(void)
1605 {
1606 struct pci_dev *pdev;
1607 int ret;
1608
1609 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1610 if (!pdev)
1611 return;
1612
1613 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1614 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1615
1616 if (ret)
1617 printk("IOMMU: Failed to create 0-64M identity map, "
1618 "floppy might not work\n");
1619
1620 }
1621 #else
1622 static inline void iommu_prepare_isa(void)
1623 {
1624 return;
1625 }
1626 #endif /* !CONFIG_DMAR_FLPY_WA */
1627
1628 static int __init init_dmars(void)
1629 {
1630 struct dmar_drhd_unit *drhd;
1631 struct dmar_rmrr_unit *rmrr;
1632 struct pci_dev *pdev;
1633 struct intel_iommu *iommu;
1634 int i, ret, unit = 0;
1635
1636 /*
1637 * for each drhd
1638 * allocate root
1639 * initialize and program root entry to not present
1640 * endfor
1641 */
1642 for_each_drhd_unit(drhd) {
1643 g_num_of_iommus++;
1644 /*
1645 * lock not needed as this is only incremented in the single
1646 * threaded kernel __init code path all other access are read
1647 * only
1648 */
1649 }
1650
1651 deferred_flush = kzalloc(g_num_of_iommus *
1652 sizeof(struct deferred_flush_tables), GFP_KERNEL);
1653 if (!deferred_flush) {
1654 ret = -ENOMEM;
1655 goto error;
1656 }
1657
1658 for_each_drhd_unit(drhd) {
1659 if (drhd->ignored)
1660 continue;
1661
1662 iommu = drhd->iommu;
1663
1664 ret = iommu_init_domains(iommu);
1665 if (ret)
1666 goto error;
1667
1668 /*
1669 * TBD:
1670 * we could share the same root & context tables
1671 * amoung all IOMMU's. Need to Split it later.
1672 */
1673 ret = iommu_alloc_root_entry(iommu);
1674 if (ret) {
1675 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1676 goto error;
1677 }
1678 }
1679
1680 for_each_drhd_unit(drhd) {
1681 if (drhd->ignored)
1682 continue;
1683
1684 iommu = drhd->iommu;
1685 if (dmar_enable_qi(iommu)) {
1686 /*
1687 * Queued Invalidate not enabled, use Register Based
1688 * Invalidate
1689 */
1690 iommu->flush.flush_context = __iommu_flush_context;
1691 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
1692 printk(KERN_INFO "IOMMU 0x%Lx: using Register based "
1693 "invalidation\n",
1694 (unsigned long long)drhd->reg_base_addr);
1695 } else {
1696 iommu->flush.flush_context = qi_flush_context;
1697 iommu->flush.flush_iotlb = qi_flush_iotlb;
1698 printk(KERN_INFO "IOMMU 0x%Lx: using Queued "
1699 "invalidation\n",
1700 (unsigned long long)drhd->reg_base_addr);
1701 }
1702 }
1703
1704 /*
1705 * For each rmrr
1706 * for each dev attached to rmrr
1707 * do
1708 * locate drhd for dev, alloc domain for dev
1709 * allocate free domain
1710 * allocate page table entries for rmrr
1711 * if context not allocated for bus
1712 * allocate and init context
1713 * set present in root table for this bus
1714 * init context with domain, translation etc
1715 * endfor
1716 * endfor
1717 */
1718 for_each_rmrr_units(rmrr) {
1719 for (i = 0; i < rmrr->devices_cnt; i++) {
1720 pdev = rmrr->devices[i];
1721 /* some BIOS lists non-exist devices in DMAR table */
1722 if (!pdev)
1723 continue;
1724 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
1725 if (ret)
1726 printk(KERN_ERR
1727 "IOMMU: mapping reserved region failed\n");
1728 }
1729 }
1730
1731 iommu_prepare_gfx_mapping();
1732
1733 iommu_prepare_isa();
1734
1735 /*
1736 * for each drhd
1737 * enable fault log
1738 * global invalidate context cache
1739 * global invalidate iotlb
1740 * enable translation
1741 */
1742 for_each_drhd_unit(drhd) {
1743 if (drhd->ignored)
1744 continue;
1745 iommu = drhd->iommu;
1746 sprintf (iommu->name, "dmar%d", unit++);
1747
1748 iommu_flush_write_buffer(iommu);
1749
1750 ret = dmar_set_interrupt(iommu);
1751 if (ret)
1752 goto error;
1753
1754 iommu_set_root_entry(iommu);
1755
1756 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
1757 0);
1758 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
1759 0);
1760 iommu_disable_protect_mem_regions(iommu);
1761
1762 ret = iommu_enable_translation(iommu);
1763 if (ret)
1764 goto error;
1765 }
1766
1767 return 0;
1768 error:
1769 for_each_drhd_unit(drhd) {
1770 if (drhd->ignored)
1771 continue;
1772 iommu = drhd->iommu;
1773 free_iommu(iommu);
1774 }
1775 return ret;
1776 }
1777
1778 static inline u64 aligned_size(u64 host_addr, size_t size)
1779 {
1780 u64 addr;
1781 addr = (host_addr & (~PAGE_MASK)) + size;
1782 return PAGE_ALIGN(addr);
1783 }
1784
1785 struct iova *
1786 iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
1787 {
1788 struct iova *piova;
1789
1790 /* Make sure it's in range */
1791 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
1792 if (!size || (IOVA_START_ADDR + size > end))
1793 return NULL;
1794
1795 piova = alloc_iova(&domain->iovad,
1796 size >> PAGE_SHIFT, IOVA_PFN(end), 1);
1797 return piova;
1798 }
1799
1800 static struct iova *
1801 __intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
1802 size_t size, u64 dma_mask)
1803 {
1804 struct pci_dev *pdev = to_pci_dev(dev);
1805 struct iova *iova = NULL;
1806
1807 if (dma_mask <= DMA_32BIT_MASK || dmar_forcedac)
1808 iova = iommu_alloc_iova(domain, size, dma_mask);
1809 else {
1810 /*
1811 * First try to allocate an io virtual address in
1812 * DMA_32BIT_MASK and if that fails then try allocating
1813 * from higher range
1814 */
1815 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
1816 if (!iova)
1817 iova = iommu_alloc_iova(domain, size, dma_mask);
1818 }
1819
1820 if (!iova) {
1821 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
1822 return NULL;
1823 }
1824
1825 return iova;
1826 }
1827
1828 static struct dmar_domain *
1829 get_valid_domain_for_dev(struct pci_dev *pdev)
1830 {
1831 struct dmar_domain *domain;
1832 int ret;
1833
1834 domain = get_domain_for_dev(pdev,
1835 DEFAULT_DOMAIN_ADDRESS_WIDTH);
1836 if (!domain) {
1837 printk(KERN_ERR
1838 "Allocating domain for %s failed", pci_name(pdev));
1839 return NULL;
1840 }
1841
1842 /* make sure context mapping is ok */
1843 if (unlikely(!domain_context_mapped(domain, pdev))) {
1844 ret = domain_context_mapping(domain, pdev);
1845 if (ret) {
1846 printk(KERN_ERR
1847 "Domain context map for %s failed",
1848 pci_name(pdev));
1849 return NULL;
1850 }
1851 }
1852
1853 return domain;
1854 }
1855
1856 static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
1857 size_t size, int dir, u64 dma_mask)
1858 {
1859 struct pci_dev *pdev = to_pci_dev(hwdev);
1860 struct dmar_domain *domain;
1861 phys_addr_t start_paddr;
1862 struct iova *iova;
1863 int prot = 0;
1864 int ret;
1865
1866 BUG_ON(dir == DMA_NONE);
1867 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1868 return paddr;
1869
1870 domain = get_valid_domain_for_dev(pdev);
1871 if (!domain)
1872 return 0;
1873
1874 size = aligned_size((u64)paddr, size);
1875
1876 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
1877 if (!iova)
1878 goto error;
1879
1880 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
1881
1882 /*
1883 * Check if DMAR supports zero-length reads on write only
1884 * mappings..
1885 */
1886 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
1887 !cap_zlr(domain->iommu->cap))
1888 prot |= DMA_PTE_READ;
1889 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
1890 prot |= DMA_PTE_WRITE;
1891 /*
1892 * paddr - (paddr + size) might be partial page, we should map the whole
1893 * page. Note: if two part of one page are separately mapped, we
1894 * might have two guest_addr mapping to the same host paddr, but this
1895 * is not a big problem
1896 */
1897 ret = domain_page_mapping(domain, start_paddr,
1898 ((u64)paddr) & PAGE_MASK, size, prot);
1899 if (ret)
1900 goto error;
1901
1902 /* it's a non-present to present mapping */
1903 ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
1904 start_paddr, size >> VTD_PAGE_SHIFT, 1);
1905 if (ret)
1906 iommu_flush_write_buffer(domain->iommu);
1907
1908 return start_paddr + ((u64)paddr & (~PAGE_MASK));
1909
1910 error:
1911 if (iova)
1912 __free_iova(&domain->iovad, iova);
1913 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
1914 pci_name(pdev), size, (unsigned long long)paddr, dir);
1915 return 0;
1916 }
1917
1918 dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr,
1919 size_t size, int dir)
1920 {
1921 return __intel_map_single(hwdev, paddr, size, dir,
1922 to_pci_dev(hwdev)->dma_mask);
1923 }
1924
1925 static void flush_unmaps(void)
1926 {
1927 int i, j;
1928
1929 timer_on = 0;
1930
1931 /* just flush them all */
1932 for (i = 0; i < g_num_of_iommus; i++) {
1933 if (deferred_flush[i].next) {
1934 struct intel_iommu *iommu =
1935 deferred_flush[i].domain[0]->iommu;
1936
1937 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1938 DMA_TLB_GLOBAL_FLUSH, 0);
1939 for (j = 0; j < deferred_flush[i].next; j++) {
1940 __free_iova(&deferred_flush[i].domain[j]->iovad,
1941 deferred_flush[i].iova[j]);
1942 }
1943 deferred_flush[i].next = 0;
1944 }
1945 }
1946
1947 list_size = 0;
1948 }
1949
1950 static void flush_unmaps_timeout(unsigned long data)
1951 {
1952 unsigned long flags;
1953
1954 spin_lock_irqsave(&async_umap_flush_lock, flags);
1955 flush_unmaps();
1956 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
1957 }
1958
1959 static void add_unmap(struct dmar_domain *dom, struct iova *iova)
1960 {
1961 unsigned long flags;
1962 int next, iommu_id;
1963
1964 spin_lock_irqsave(&async_umap_flush_lock, flags);
1965 if (list_size == HIGH_WATER_MARK)
1966 flush_unmaps();
1967
1968 iommu_id = dom->iommu->seq_id;
1969
1970 next = deferred_flush[iommu_id].next;
1971 deferred_flush[iommu_id].domain[next] = dom;
1972 deferred_flush[iommu_id].iova[next] = iova;
1973 deferred_flush[iommu_id].next++;
1974
1975 if (!timer_on) {
1976 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
1977 timer_on = 1;
1978 }
1979 list_size++;
1980 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
1981 }
1982
1983 void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
1984 int dir)
1985 {
1986 struct pci_dev *pdev = to_pci_dev(dev);
1987 struct dmar_domain *domain;
1988 unsigned long start_addr;
1989 struct iova *iova;
1990
1991 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1992 return;
1993 domain = find_domain(pdev);
1994 BUG_ON(!domain);
1995
1996 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
1997 if (!iova)
1998 return;
1999
2000 start_addr = iova->pfn_lo << PAGE_SHIFT;
2001 size = aligned_size((u64)dev_addr, size);
2002
2003 pr_debug("Device %s unmapping: %lx@%llx\n",
2004 pci_name(pdev), size, (unsigned long long)start_addr);
2005
2006 /* clear the whole page */
2007 dma_pte_clear_range(domain, start_addr, start_addr + size);
2008 /* free page tables */
2009 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2010 if (intel_iommu_strict) {
2011 if (iommu_flush_iotlb_psi(domain->iommu,
2012 domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
2013 iommu_flush_write_buffer(domain->iommu);
2014 /* free iova */
2015 __free_iova(&domain->iovad, iova);
2016 } else {
2017 add_unmap(domain, iova);
2018 /*
2019 * queue up the release of the unmap to save the 1/6th of the
2020 * cpu used up by the iotlb flush operation...
2021 */
2022 }
2023 }
2024
2025 void *intel_alloc_coherent(struct device *hwdev, size_t size,
2026 dma_addr_t *dma_handle, gfp_t flags)
2027 {
2028 void *vaddr;
2029 int order;
2030
2031 size = PAGE_ALIGN(size);
2032 order = get_order(size);
2033 flags &= ~(GFP_DMA | GFP_DMA32);
2034
2035 vaddr = (void *)__get_free_pages(flags, order);
2036 if (!vaddr)
2037 return NULL;
2038 memset(vaddr, 0, size);
2039
2040 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2041 DMA_BIDIRECTIONAL,
2042 hwdev->coherent_dma_mask);
2043 if (*dma_handle)
2044 return vaddr;
2045 free_pages((unsigned long)vaddr, order);
2046 return NULL;
2047 }
2048
2049 void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2050 dma_addr_t dma_handle)
2051 {
2052 int order;
2053
2054 size = PAGE_ALIGN(size);
2055 order = get_order(size);
2056
2057 intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
2058 free_pages((unsigned long)vaddr, order);
2059 }
2060
2061 #define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
2062
2063 void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2064 int nelems, int dir)
2065 {
2066 int i;
2067 struct pci_dev *pdev = to_pci_dev(hwdev);
2068 struct dmar_domain *domain;
2069 unsigned long start_addr;
2070 struct iova *iova;
2071 size_t size = 0;
2072 void *addr;
2073 struct scatterlist *sg;
2074
2075 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2076 return;
2077
2078 domain = find_domain(pdev);
2079
2080 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2081 if (!iova)
2082 return;
2083 for_each_sg(sglist, sg, nelems, i) {
2084 addr = SG_ENT_VIRT_ADDRESS(sg);
2085 size += aligned_size((u64)addr, sg->length);
2086 }
2087
2088 start_addr = iova->pfn_lo << PAGE_SHIFT;
2089
2090 /* clear the whole page */
2091 dma_pte_clear_range(domain, start_addr, start_addr + size);
2092 /* free page tables */
2093 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2094
2095 if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
2096 size >> VTD_PAGE_SHIFT, 0))
2097 iommu_flush_write_buffer(domain->iommu);
2098
2099 /* free iova */
2100 __free_iova(&domain->iovad, iova);
2101 }
2102
2103 static int intel_nontranslate_map_sg(struct device *hddev,
2104 struct scatterlist *sglist, int nelems, int dir)
2105 {
2106 int i;
2107 struct scatterlist *sg;
2108
2109 for_each_sg(sglist, sg, nelems, i) {
2110 BUG_ON(!sg_page(sg));
2111 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2112 sg->dma_length = sg->length;
2113 }
2114 return nelems;
2115 }
2116
2117 int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2118 int dir)
2119 {
2120 void *addr;
2121 int i;
2122 struct pci_dev *pdev = to_pci_dev(hwdev);
2123 struct dmar_domain *domain;
2124 size_t size = 0;
2125 int prot = 0;
2126 size_t offset = 0;
2127 struct iova *iova = NULL;
2128 int ret;
2129 struct scatterlist *sg;
2130 unsigned long start_addr;
2131
2132 BUG_ON(dir == DMA_NONE);
2133 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2134 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
2135
2136 domain = get_valid_domain_for_dev(pdev);
2137 if (!domain)
2138 return 0;
2139
2140 for_each_sg(sglist, sg, nelems, i) {
2141 addr = SG_ENT_VIRT_ADDRESS(sg);
2142 addr = (void *)virt_to_phys(addr);
2143 size += aligned_size((u64)addr, sg->length);
2144 }
2145
2146 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2147 if (!iova) {
2148 sglist->dma_length = 0;
2149 return 0;
2150 }
2151
2152 /*
2153 * Check if DMAR supports zero-length reads on write only
2154 * mappings..
2155 */
2156 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2157 !cap_zlr(domain->iommu->cap))
2158 prot |= DMA_PTE_READ;
2159 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2160 prot |= DMA_PTE_WRITE;
2161
2162 start_addr = iova->pfn_lo << PAGE_SHIFT;
2163 offset = 0;
2164 for_each_sg(sglist, sg, nelems, i) {
2165 addr = SG_ENT_VIRT_ADDRESS(sg);
2166 addr = (void *)virt_to_phys(addr);
2167 size = aligned_size((u64)addr, sg->length);
2168 ret = domain_page_mapping(domain, start_addr + offset,
2169 ((u64)addr) & PAGE_MASK,
2170 size, prot);
2171 if (ret) {
2172 /* clear the page */
2173 dma_pte_clear_range(domain, start_addr,
2174 start_addr + offset);
2175 /* free page tables */
2176 dma_pte_free_pagetable(domain, start_addr,
2177 start_addr + offset);
2178 /* free iova */
2179 __free_iova(&domain->iovad, iova);
2180 return 0;
2181 }
2182 sg->dma_address = start_addr + offset +
2183 ((u64)addr & (~PAGE_MASK));
2184 sg->dma_length = sg->length;
2185 offset += size;
2186 }
2187
2188 /* it's a non-present to present mapping */
2189 if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
2190 start_addr, offset >> VTD_PAGE_SHIFT, 1))
2191 iommu_flush_write_buffer(domain->iommu);
2192 return nelems;
2193 }
2194
2195 static struct dma_mapping_ops intel_dma_ops = {
2196 .alloc_coherent = intel_alloc_coherent,
2197 .free_coherent = intel_free_coherent,
2198 .map_single = intel_map_single,
2199 .unmap_single = intel_unmap_single,
2200 .map_sg = intel_map_sg,
2201 .unmap_sg = intel_unmap_sg,
2202 };
2203
2204 static inline int iommu_domain_cache_init(void)
2205 {
2206 int ret = 0;
2207
2208 iommu_domain_cache = kmem_cache_create("iommu_domain",
2209 sizeof(struct dmar_domain),
2210 0,
2211 SLAB_HWCACHE_ALIGN,
2212
2213 NULL);
2214 if (!iommu_domain_cache) {
2215 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2216 ret = -ENOMEM;
2217 }
2218
2219 return ret;
2220 }
2221
2222 static inline int iommu_devinfo_cache_init(void)
2223 {
2224 int ret = 0;
2225
2226 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2227 sizeof(struct device_domain_info),
2228 0,
2229 SLAB_HWCACHE_ALIGN,
2230 NULL);
2231 if (!iommu_devinfo_cache) {
2232 printk(KERN_ERR "Couldn't create devinfo cache\n");
2233 ret = -ENOMEM;
2234 }
2235
2236 return ret;
2237 }
2238
2239 static inline int iommu_iova_cache_init(void)
2240 {
2241 int ret = 0;
2242
2243 iommu_iova_cache = kmem_cache_create("iommu_iova",
2244 sizeof(struct iova),
2245 0,
2246 SLAB_HWCACHE_ALIGN,
2247 NULL);
2248 if (!iommu_iova_cache) {
2249 printk(KERN_ERR "Couldn't create iova cache\n");
2250 ret = -ENOMEM;
2251 }
2252
2253 return ret;
2254 }
2255
2256 static int __init iommu_init_mempool(void)
2257 {
2258 int ret;
2259 ret = iommu_iova_cache_init();
2260 if (ret)
2261 return ret;
2262
2263 ret = iommu_domain_cache_init();
2264 if (ret)
2265 goto domain_error;
2266
2267 ret = iommu_devinfo_cache_init();
2268 if (!ret)
2269 return ret;
2270
2271 kmem_cache_destroy(iommu_domain_cache);
2272 domain_error:
2273 kmem_cache_destroy(iommu_iova_cache);
2274
2275 return -ENOMEM;
2276 }
2277
2278 static void __init iommu_exit_mempool(void)
2279 {
2280 kmem_cache_destroy(iommu_devinfo_cache);
2281 kmem_cache_destroy(iommu_domain_cache);
2282 kmem_cache_destroy(iommu_iova_cache);
2283
2284 }
2285
2286 static void __init init_no_remapping_devices(void)
2287 {
2288 struct dmar_drhd_unit *drhd;
2289
2290 for_each_drhd_unit(drhd) {
2291 if (!drhd->include_all) {
2292 int i;
2293 for (i = 0; i < drhd->devices_cnt; i++)
2294 if (drhd->devices[i] != NULL)
2295 break;
2296 /* ignore DMAR unit if no pci devices exist */
2297 if (i == drhd->devices_cnt)
2298 drhd->ignored = 1;
2299 }
2300 }
2301
2302 if (dmar_map_gfx)
2303 return;
2304
2305 for_each_drhd_unit(drhd) {
2306 int i;
2307 if (drhd->ignored || drhd->include_all)
2308 continue;
2309
2310 for (i = 0; i < drhd->devices_cnt; i++)
2311 if (drhd->devices[i] &&
2312 !IS_GFX_DEVICE(drhd->devices[i]))
2313 break;
2314
2315 if (i < drhd->devices_cnt)
2316 continue;
2317
2318 /* bypass IOMMU if it is just for gfx devices */
2319 drhd->ignored = 1;
2320 for (i = 0; i < drhd->devices_cnt; i++) {
2321 if (!drhd->devices[i])
2322 continue;
2323 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
2324 }
2325 }
2326 }
2327
2328 int __init intel_iommu_init(void)
2329 {
2330 int ret = 0;
2331
2332 if (dmar_table_init())
2333 return -ENODEV;
2334
2335 if (dmar_dev_scope_init())
2336 return -ENODEV;
2337
2338 /*
2339 * Check the need for DMA-remapping initialization now.
2340 * Above initialization will also be used by Interrupt-remapping.
2341 */
2342 if (no_iommu || swiotlb || dmar_disabled)
2343 return -ENODEV;
2344
2345 iommu_init_mempool();
2346 dmar_init_reserved_ranges();
2347
2348 init_no_remapping_devices();
2349
2350 ret = init_dmars();
2351 if (ret) {
2352 printk(KERN_ERR "IOMMU: dmar init failed\n");
2353 put_iova_domain(&reserved_iova_list);
2354 iommu_exit_mempool();
2355 return ret;
2356 }
2357 printk(KERN_INFO
2358 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2359
2360 init_timer(&unmap_timer);
2361 force_iommu = 1;
2362 dma_ops = &intel_dma_ops;
2363 return 0;
2364 }
2365
2366 void intel_iommu_domain_exit(struct dmar_domain *domain)
2367 {
2368 u64 end;
2369
2370 /* Domain 0 is reserved, so dont process it */
2371 if (!domain)
2372 return;
2373
2374 end = DOMAIN_MAX_ADDR(domain->gaw);
2375 end = end & (~VTD_PAGE_MASK);
2376
2377 /* clear ptes */
2378 dma_pte_clear_range(domain, 0, end);
2379
2380 /* free page tables */
2381 dma_pte_free_pagetable(domain, 0, end);
2382
2383 iommu_free_domain(domain);
2384 free_domain_mem(domain);
2385 }
2386 EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
2387
2388 struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
2389 {
2390 struct dmar_drhd_unit *drhd;
2391 struct dmar_domain *domain;
2392 struct intel_iommu *iommu;
2393
2394 drhd = dmar_find_matched_drhd_unit(pdev);
2395 if (!drhd) {
2396 printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
2397 return NULL;
2398 }
2399
2400 iommu = drhd->iommu;
2401 if (!iommu) {
2402 printk(KERN_ERR
2403 "intel_iommu_domain_alloc: iommu == NULL\n");
2404 return NULL;
2405 }
2406 domain = iommu_alloc_domain(iommu);
2407 if (!domain) {
2408 printk(KERN_ERR
2409 "intel_iommu_domain_alloc: domain == NULL\n");
2410 return NULL;
2411 }
2412 if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2413 printk(KERN_ERR
2414 "intel_iommu_domain_alloc: domain_init() failed\n");
2415 intel_iommu_domain_exit(domain);
2416 return NULL;
2417 }
2418 return domain;
2419 }
2420 EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
2421
2422 int intel_iommu_context_mapping(
2423 struct dmar_domain *domain, struct pci_dev *pdev)
2424 {
2425 int rc;
2426 rc = domain_context_mapping(domain, pdev);
2427 return rc;
2428 }
2429 EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
2430
2431 int intel_iommu_page_mapping(
2432 struct dmar_domain *domain, dma_addr_t iova,
2433 u64 hpa, size_t size, int prot)
2434 {
2435 int rc;
2436 rc = domain_page_mapping(domain, iova, hpa, size, prot);
2437 return rc;
2438 }
2439 EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
2440
2441 void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
2442 {
2443 detach_domain_for_dev(domain, bus, devfn);
2444 }
2445 EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
2446
2447 struct dmar_domain *
2448 intel_iommu_find_domain(struct pci_dev *pdev)
2449 {
2450 return find_domain(pdev);
2451 }
2452 EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
2453
2454 int intel_iommu_found(void)
2455 {
2456 return g_num_of_iommus;
2457 }
2458 EXPORT_SYMBOL_GPL(intel_iommu_found);
2459
2460 u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
2461 {
2462 struct dma_pte *pte;
2463 u64 pfn;
2464
2465 pfn = 0;
2466 pte = addr_to_dma_pte(domain, iova);
2467
2468 if (pte)
2469 pfn = dma_pte_addr(*pte);
2470
2471 return pfn >> VTD_PAGE_SHIFT;
2472 }
2473 EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);
This page took 0.079837 seconds and 6 git commands to generate.