Commit | Line | Data |
---|---|---|
f31e65e1 BH |
1 | /* |
2 | * This program is free software; you can redistribute it and/or modify | |
3 | * it under the terms of the GNU General Public License, version 2, as | |
4 | * published by the Free Software Foundation. | |
5 | * | |
6 | * This program is distributed in the hope that it will be useful, | |
7 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
8 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
9 | * GNU General Public License for more details. | |
10 | * | |
11 | * You should have received a copy of the GNU General Public License | |
12 | * along with this program; if not, write to the Free Software | |
13 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | |
14 | * | |
15 | * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> | |
16 | * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com> | |
d3695aa4 | 17 | * Copyright 2016 Alexey Kardashevskiy, IBM Corporation <aik@au1.ibm.com> |
f31e65e1 BH |
18 | */ |
19 | ||
20 | #include <linux/types.h> | |
21 | #include <linux/string.h> | |
22 | #include <linux/kvm.h> | |
23 | #include <linux/kvm_host.h> | |
24 | #include <linux/highmem.h> | |
25 | #include <linux/gfp.h> | |
26 | #include <linux/slab.h> | |
27 | #include <linux/hugetlb.h> | |
28 | #include <linux/list.h> | |
29 | #include <linux/anon_inodes.h> | |
30 | ||
31 | #include <asm/tlbflush.h> | |
32 | #include <asm/kvm_ppc.h> | |
33 | #include <asm/kvm_book3s.h> | |
34 | #include <asm/mmu-hash64.h> | |
35 | #include <asm/hvcall.h> | |
36 | #include <asm/synch.h> | |
37 | #include <asm/ppc-opcode.h> | |
38 | #include <asm/kvm_host.h> | |
39 | #include <asm/udbg.h> | |
462ee11e | 40 | #include <asm/iommu.h> |
d3695aa4 | 41 | #include <asm/tce.h> |
f31e65e1 | 42 | |
f8626985 | 43 | static unsigned long kvmppc_tce_pages(unsigned long window_size) |
f31e65e1 | 44 | { |
462ee11e | 45 | return ALIGN((window_size >> IOMMU_PAGE_SHIFT_4K) |
f31e65e1 BH |
46 | * sizeof(u64), PAGE_SIZE) / PAGE_SIZE; |
47 | } | |
48 | ||
f8626985 AK |
49 | static unsigned long kvmppc_stt_pages(unsigned long tce_pages) |
50 | { | |
51 | unsigned long stt_bytes = sizeof(struct kvmppc_spapr_tce_table) + | |
52 | (tce_pages * sizeof(struct page *)); | |
53 | ||
54 | return tce_pages + ALIGN(stt_bytes, PAGE_SIZE) / PAGE_SIZE; | |
55 | } | |
56 | ||
57 | static long kvmppc_account_memlimit(unsigned long stt_pages, bool inc) | |
58 | { | |
59 | long ret = 0; | |
60 | ||
61 | if (!current || !current->mm) | |
62 | return ret; /* process exited */ | |
63 | ||
64 | down_write(¤t->mm->mmap_sem); | |
65 | ||
66 | if (inc) { | |
67 | unsigned long locked, lock_limit; | |
68 | ||
69 | locked = current->mm->locked_vm + stt_pages; | |
70 | lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; | |
71 | if (locked > lock_limit && !capable(CAP_IPC_LOCK)) | |
72 | ret = -ENOMEM; | |
73 | else | |
74 | current->mm->locked_vm += stt_pages; | |
75 | } else { | |
76 | if (WARN_ON_ONCE(stt_pages > current->mm->locked_vm)) | |
77 | stt_pages = current->mm->locked_vm; | |
78 | ||
79 | current->mm->locked_vm -= stt_pages; | |
80 | } | |
81 | ||
82 | pr_debug("[%d] RLIMIT_MEMLOCK KVM %c%ld %ld/%ld%s\n", current->pid, | |
83 | inc ? '+' : '-', | |
84 | stt_pages << PAGE_SHIFT, | |
85 | current->mm->locked_vm << PAGE_SHIFT, | |
86 | rlimit(RLIMIT_MEMLOCK), | |
87 | ret ? " - exceeded" : ""); | |
88 | ||
89 | up_write(¤t->mm->mmap_sem); | |
90 | ||
91 | return ret; | |
92 | } | |
93 | ||
366baf28 | 94 | static void release_spapr_tce_table(struct rcu_head *head) |
f31e65e1 | 95 | { |
366baf28 AK |
96 | struct kvmppc_spapr_tce_table *stt = container_of(head, |
97 | struct kvmppc_spapr_tce_table, rcu); | |
f31e65e1 | 98 | int i; |
f8626985 | 99 | unsigned long npages = kvmppc_tce_pages(stt->window_size); |
f31e65e1 | 100 | |
f8626985 | 101 | for (i = 0; i < npages; i++) |
f31e65e1 | 102 | __free_page(stt->pages[i]); |
f31e65e1 | 103 | |
366baf28 | 104 | kfree(stt); |
f31e65e1 BH |
105 | } |
106 | ||
107 | static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |
108 | { | |
109 | struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data; | |
110 | struct page *page; | |
111 | ||
f8626985 | 112 | if (vmf->pgoff >= kvmppc_tce_pages(stt->window_size)) |
f31e65e1 BH |
113 | return VM_FAULT_SIGBUS; |
114 | ||
115 | page = stt->pages[vmf->pgoff]; | |
116 | get_page(page); | |
117 | vmf->page = page; | |
118 | return 0; | |
119 | } | |
120 | ||
121 | static const struct vm_operations_struct kvm_spapr_tce_vm_ops = { | |
122 | .fault = kvm_spapr_tce_fault, | |
123 | }; | |
124 | ||
125 | static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma) | |
126 | { | |
127 | vma->vm_ops = &kvm_spapr_tce_vm_ops; | |
128 | return 0; | |
129 | } | |
130 | ||
131 | static int kvm_spapr_tce_release(struct inode *inode, struct file *filp) | |
132 | { | |
133 | struct kvmppc_spapr_tce_table *stt = filp->private_data; | |
134 | ||
366baf28 AK |
135 | list_del_rcu(&stt->list); |
136 | ||
137 | kvm_put_kvm(stt->kvm); | |
138 | ||
f8626985 AK |
139 | kvmppc_account_memlimit( |
140 | kvmppc_stt_pages(kvmppc_tce_pages(stt->window_size)), false); | |
366baf28 AK |
141 | call_rcu(&stt->rcu, release_spapr_tce_table); |
142 | ||
f31e65e1 BH |
143 | return 0; |
144 | } | |
145 | ||
75ef9de1 | 146 | static const struct file_operations kvm_spapr_tce_fops = { |
f31e65e1 BH |
147 | .mmap = kvm_spapr_tce_mmap, |
148 | .release = kvm_spapr_tce_release, | |
149 | }; | |
150 | ||
151 | long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, | |
152 | struct kvm_create_spapr_tce *args) | |
153 | { | |
154 | struct kvmppc_spapr_tce_table *stt = NULL; | |
f8626985 | 155 | unsigned long npages; |
f31e65e1 BH |
156 | int ret = -ENOMEM; |
157 | int i; | |
158 | ||
159 | /* Check this LIOBN hasn't been previously allocated */ | |
160 | list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) { | |
161 | if (stt->liobn == args->liobn) | |
162 | return -EBUSY; | |
163 | } | |
164 | ||
f8626985 AK |
165 | npages = kvmppc_tce_pages(args->window_size); |
166 | ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true); | |
167 | if (ret) { | |
168 | stt = NULL; | |
169 | goto fail; | |
170 | } | |
f31e65e1 BH |
171 | |
172 | stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *), | |
173 | GFP_KERNEL); | |
174 | if (!stt) | |
175 | goto fail; | |
176 | ||
177 | stt->liobn = args->liobn; | |
178 | stt->window_size = args->window_size; | |
179 | stt->kvm = kvm; | |
180 | ||
181 | for (i = 0; i < npages; i++) { | |
182 | stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); | |
183 | if (!stt->pages[i]) | |
184 | goto fail; | |
185 | } | |
186 | ||
187 | kvm_get_kvm(kvm); | |
188 | ||
189 | mutex_lock(&kvm->lock); | |
366baf28 | 190 | list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); |
f31e65e1 BH |
191 | |
192 | mutex_unlock(&kvm->lock); | |
193 | ||
194 | return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, | |
2f84d5ea | 195 | stt, O_RDWR | O_CLOEXEC); |
f31e65e1 BH |
196 | |
197 | fail: | |
198 | if (stt) { | |
199 | for (i = 0; i < npages; i++) | |
200 | if (stt->pages[i]) | |
201 | __free_page(stt->pages[i]); | |
202 | ||
203 | kfree(stt); | |
204 | } | |
205 | return ret; | |
206 | } | |
d3695aa4 AK |
207 | |
208 | long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, | |
209 | unsigned long liobn, unsigned long ioba, | |
210 | unsigned long tce_list, unsigned long npages) | |
211 | { | |
212 | struct kvmppc_spapr_tce_table *stt; | |
213 | long i, ret = H_SUCCESS, idx; | |
214 | unsigned long entry, ua = 0; | |
215 | u64 __user *tces, tce; | |
216 | ||
217 | stt = kvmppc_find_table(vcpu, liobn); | |
218 | if (!stt) | |
219 | return H_TOO_HARD; | |
220 | ||
221 | entry = ioba >> IOMMU_PAGE_SHIFT_4K; | |
222 | /* | |
223 | * SPAPR spec says that the maximum size of the list is 512 TCEs | |
224 | * so the whole table fits in 4K page | |
225 | */ | |
226 | if (npages > 512) | |
227 | return H_PARAMETER; | |
228 | ||
229 | if (tce_list & (SZ_4K - 1)) | |
230 | return H_PARAMETER; | |
231 | ||
232 | ret = kvmppc_ioba_validate(stt, ioba, npages); | |
233 | if (ret != H_SUCCESS) | |
234 | return ret; | |
235 | ||
236 | idx = srcu_read_lock(&vcpu->kvm->srcu); | |
237 | if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL)) { | |
238 | ret = H_TOO_HARD; | |
239 | goto unlock_exit; | |
240 | } | |
241 | tces = (u64 __user *) ua; | |
242 | ||
243 | for (i = 0; i < npages; ++i) { | |
244 | if (get_user(tce, tces + i)) { | |
245 | ret = H_TOO_HARD; | |
246 | goto unlock_exit; | |
247 | } | |
248 | tce = be64_to_cpu(tce); | |
249 | ||
250 | ret = kvmppc_tce_validate(stt, tce); | |
251 | if (ret != H_SUCCESS) | |
252 | goto unlock_exit; | |
253 | ||
254 | kvmppc_tce_put(stt, entry + i, tce); | |
255 | } | |
256 | ||
257 | unlock_exit: | |
258 | srcu_read_unlock(&vcpu->kvm->srcu, idx); | |
259 | ||
260 | return ret; | |
261 | } | |
262 | EXPORT_SYMBOL_GPL(kvmppc_h_put_tce_indirect); |