Commit | Line | Data |
---|---|---|
f31e65e1 BH |
1 | /* |
2 | * This program is free software; you can redistribute it and/or modify | |
3 | * it under the terms of the GNU General Public License, version 2, as | |
4 | * published by the Free Software Foundation. | |
5 | * | |
6 | * This program is distributed in the hope that it will be useful, | |
7 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
8 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
9 | * GNU General Public License for more details. | |
10 | * | |
11 | * You should have received a copy of the GNU General Public License | |
12 | * along with this program; if not, write to the Free Software | |
13 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | |
14 | * | |
15 | * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> | |
16 | * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com> | |
17 | */ | |
18 | ||
19 | #include <linux/types.h> | |
20 | #include <linux/string.h> | |
21 | #include <linux/kvm.h> | |
22 | #include <linux/kvm_host.h> | |
23 | #include <linux/highmem.h> | |
24 | #include <linux/gfp.h> | |
25 | #include <linux/slab.h> | |
26 | #include <linux/hugetlb.h> | |
27 | #include <linux/list.h> | |
28 | #include <linux/anon_inodes.h> | |
29 | ||
30 | #include <asm/tlbflush.h> | |
31 | #include <asm/kvm_ppc.h> | |
32 | #include <asm/kvm_book3s.h> | |
33 | #include <asm/mmu-hash64.h> | |
34 | #include <asm/hvcall.h> | |
35 | #include <asm/synch.h> | |
36 | #include <asm/ppc-opcode.h> | |
37 | #include <asm/kvm_host.h> | |
38 | #include <asm/udbg.h> | |
462ee11e | 39 | #include <asm/iommu.h> |
f31e65e1 BH |
40 | |
41 | #define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) | |
42 | ||
f8626985 | 43 | static unsigned long kvmppc_tce_pages(unsigned long window_size) |
f31e65e1 | 44 | { |
462ee11e | 45 | return ALIGN((window_size >> IOMMU_PAGE_SHIFT_4K) |
f31e65e1 BH |
46 | * sizeof(u64), PAGE_SIZE) / PAGE_SIZE; |
47 | } | |
48 | ||
f8626985 AK |
49 | static unsigned long kvmppc_stt_pages(unsigned long tce_pages) |
50 | { | |
51 | unsigned long stt_bytes = sizeof(struct kvmppc_spapr_tce_table) + | |
52 | (tce_pages * sizeof(struct page *)); | |
53 | ||
54 | return tce_pages + ALIGN(stt_bytes, PAGE_SIZE) / PAGE_SIZE; | |
55 | } | |
56 | ||
57 | static long kvmppc_account_memlimit(unsigned long stt_pages, bool inc) | |
58 | { | |
59 | long ret = 0; | |
60 | ||
61 | if (!current || !current->mm) | |
62 | return ret; /* process exited */ | |
63 | ||
64 | down_write(¤t->mm->mmap_sem); | |
65 | ||
66 | if (inc) { | |
67 | unsigned long locked, lock_limit; | |
68 | ||
69 | locked = current->mm->locked_vm + stt_pages; | |
70 | lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; | |
71 | if (locked > lock_limit && !capable(CAP_IPC_LOCK)) | |
72 | ret = -ENOMEM; | |
73 | else | |
74 | current->mm->locked_vm += stt_pages; | |
75 | } else { | |
76 | if (WARN_ON_ONCE(stt_pages > current->mm->locked_vm)) | |
77 | stt_pages = current->mm->locked_vm; | |
78 | ||
79 | current->mm->locked_vm -= stt_pages; | |
80 | } | |
81 | ||
82 | pr_debug("[%d] RLIMIT_MEMLOCK KVM %c%ld %ld/%ld%s\n", current->pid, | |
83 | inc ? '+' : '-', | |
84 | stt_pages << PAGE_SHIFT, | |
85 | current->mm->locked_vm << PAGE_SHIFT, | |
86 | rlimit(RLIMIT_MEMLOCK), | |
87 | ret ? " - exceeded" : ""); | |
88 | ||
89 | up_write(¤t->mm->mmap_sem); | |
90 | ||
91 | return ret; | |
92 | } | |
93 | ||
366baf28 | 94 | static void release_spapr_tce_table(struct rcu_head *head) |
f31e65e1 | 95 | { |
366baf28 AK |
96 | struct kvmppc_spapr_tce_table *stt = container_of(head, |
97 | struct kvmppc_spapr_tce_table, rcu); | |
f31e65e1 | 98 | int i; |
f8626985 | 99 | unsigned long npages = kvmppc_tce_pages(stt->window_size); |
f31e65e1 | 100 | |
f8626985 | 101 | for (i = 0; i < npages; i++) |
f31e65e1 | 102 | __free_page(stt->pages[i]); |
f31e65e1 | 103 | |
366baf28 | 104 | kfree(stt); |
f31e65e1 BH |
105 | } |
106 | ||
107 | static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |
108 | { | |
109 | struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data; | |
110 | struct page *page; | |
111 | ||
f8626985 | 112 | if (vmf->pgoff >= kvmppc_tce_pages(stt->window_size)) |
f31e65e1 BH |
113 | return VM_FAULT_SIGBUS; |
114 | ||
115 | page = stt->pages[vmf->pgoff]; | |
116 | get_page(page); | |
117 | vmf->page = page; | |
118 | return 0; | |
119 | } | |
120 | ||
121 | static const struct vm_operations_struct kvm_spapr_tce_vm_ops = { | |
122 | .fault = kvm_spapr_tce_fault, | |
123 | }; | |
124 | ||
125 | static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma) | |
126 | { | |
127 | vma->vm_ops = &kvm_spapr_tce_vm_ops; | |
128 | return 0; | |
129 | } | |
130 | ||
131 | static int kvm_spapr_tce_release(struct inode *inode, struct file *filp) | |
132 | { | |
133 | struct kvmppc_spapr_tce_table *stt = filp->private_data; | |
134 | ||
366baf28 AK |
135 | list_del_rcu(&stt->list); |
136 | ||
137 | kvm_put_kvm(stt->kvm); | |
138 | ||
f8626985 AK |
139 | kvmppc_account_memlimit( |
140 | kvmppc_stt_pages(kvmppc_tce_pages(stt->window_size)), false); | |
366baf28 AK |
141 | call_rcu(&stt->rcu, release_spapr_tce_table); |
142 | ||
f31e65e1 BH |
143 | return 0; |
144 | } | |
145 | ||
75ef9de1 | 146 | static const struct file_operations kvm_spapr_tce_fops = { |
f31e65e1 BH |
147 | .mmap = kvm_spapr_tce_mmap, |
148 | .release = kvm_spapr_tce_release, | |
149 | }; | |
150 | ||
151 | long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, | |
152 | struct kvm_create_spapr_tce *args) | |
153 | { | |
154 | struct kvmppc_spapr_tce_table *stt = NULL; | |
f8626985 | 155 | unsigned long npages; |
f31e65e1 BH |
156 | int ret = -ENOMEM; |
157 | int i; | |
158 | ||
159 | /* Check this LIOBN hasn't been previously allocated */ | |
160 | list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) { | |
161 | if (stt->liobn == args->liobn) | |
162 | return -EBUSY; | |
163 | } | |
164 | ||
f8626985 AK |
165 | npages = kvmppc_tce_pages(args->window_size); |
166 | ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true); | |
167 | if (ret) { | |
168 | stt = NULL; | |
169 | goto fail; | |
170 | } | |
f31e65e1 BH |
171 | |
172 | stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *), | |
173 | GFP_KERNEL); | |
174 | if (!stt) | |
175 | goto fail; | |
176 | ||
177 | stt->liobn = args->liobn; | |
178 | stt->window_size = args->window_size; | |
179 | stt->kvm = kvm; | |
180 | ||
181 | for (i = 0; i < npages; i++) { | |
182 | stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); | |
183 | if (!stt->pages[i]) | |
184 | goto fail; | |
185 | } | |
186 | ||
187 | kvm_get_kvm(kvm); | |
188 | ||
189 | mutex_lock(&kvm->lock); | |
366baf28 | 190 | list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); |
f31e65e1 BH |
191 | |
192 | mutex_unlock(&kvm->lock); | |
193 | ||
194 | return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, | |
2f84d5ea | 195 | stt, O_RDWR | O_CLOEXEC); |
f31e65e1 BH |
196 | |
197 | fail: | |
198 | if (stt) { | |
199 | for (i = 0; i < npages; i++) | |
200 | if (stt->pages[i]) | |
201 | __free_page(stt->pages[i]); | |
202 | ||
203 | kfree(stt); | |
204 | } | |
205 | return ret; | |
206 | } |