Commit | Line | Data |
---|---|---|
c048fdfe GC |
1 | #include <linux/init.h> |
2 | ||
3 | #include <linux/mm.h> | |
c048fdfe GC |
4 | #include <linux/spinlock.h> |
5 | #include <linux/smp.h> | |
c048fdfe | 6 | #include <linux/interrupt.h> |
6dd01bed | 7 | #include <linux/module.h> |
93296720 | 8 | #include <linux/cpu.h> |
c048fdfe | 9 | |
c048fdfe | 10 | #include <asm/tlbflush.h> |
c048fdfe | 11 | #include <asm/mmu_context.h> |
350f8f56 | 12 | #include <asm/cache.h> |
6dd01bed | 13 | #include <asm/apic.h> |
bdbcdd48 | 14 | #include <asm/uv/uv.h> |
3df3212f | 15 | #include <linux/debugfs.h> |
5af5573e | 16 | |
9eb912d1 BG |
17 | DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) |
18 | = { &init_mm, 0, }; | |
19 | ||
c048fdfe GC |
20 | /* |
21 | * Smarter SMP flushing macros. | |
22 | * c/o Linus Torvalds. | |
23 | * | |
24 | * These mean you can really definitely utterly forget about | |
25 | * writing to user space from interrupts. (Its not allowed anyway). | |
26 | * | |
27 | * Optimizations Manfred Spraul <manfred@colorfullife.com> | |
28 | * | |
29 | * More scalable flush, from Andi Kleen | |
30 | * | |
52aec330 | 31 | * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi |
c048fdfe GC |
32 | */ |
33 | ||
52aec330 AS |
34 | struct flush_tlb_info { |
35 | struct mm_struct *flush_mm; | |
36 | unsigned long flush_start; | |
37 | unsigned long flush_end; | |
38 | }; | |
93296720 | 39 | |
c048fdfe GC |
40 | /* |
41 | * We cannot call mmdrop() because we are in interrupt context, | |
42 | * instead update mm->cpu_vm_mask. | |
43 | */ | |
44 | void leave_mm(int cpu) | |
45 | { | |
02171b4a | 46 | struct mm_struct *active_mm = this_cpu_read(cpu_tlbstate.active_mm); |
c6ae41e7 | 47 | if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) |
c048fdfe | 48 | BUG(); |
a6fca40f SS |
49 | if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) { |
50 | cpumask_clear_cpu(cpu, mm_cpumask(active_mm)); | |
51 | load_cr3(swapper_pg_dir); | |
52 | } | |
c048fdfe GC |
53 | } |
54 | EXPORT_SYMBOL_GPL(leave_mm); | |
55 | ||
56 | /* | |
c048fdfe GC |
57 | * The flush IPI assumes that a thread switch happens in this order: |
58 | * [cpu0: the cpu that switches] | |
59 | * 1) switch_mm() either 1a) or 1b) | |
60 | * 1a) thread switch to a different mm | |
52aec330 AS |
61 | * 1a1) set cpu_tlbstate to TLBSTATE_OK |
62 | * Now the tlb flush NMI handler flush_tlb_func won't call leave_mm | |
63 | * if cpu0 was in lazy tlb mode. | |
64 | * 1a2) update cpu active_mm | |
c048fdfe | 65 | * Now cpu0 accepts tlb flushes for the new mm. |
52aec330 | 66 | * 1a3) cpu_set(cpu, new_mm->cpu_vm_mask); |
c048fdfe GC |
67 | * Now the other cpus will send tlb flush ipis. |
68 | * 1a4) change cr3. | |
52aec330 AS |
69 | * 1a5) cpu_clear(cpu, old_mm->cpu_vm_mask); |
70 | * Stop ipi delivery for the old mm. This is not synchronized with | |
71 | * the other cpus, but flush_tlb_func ignore flush ipis for the wrong | |
72 | * mm, and in the worst case we perform a superfluous tlb flush. | |
c048fdfe | 73 | * 1b) thread switch without mm change |
52aec330 AS |
74 | * cpu active_mm is correct, cpu0 already handles flush ipis. |
75 | * 1b1) set cpu_tlbstate to TLBSTATE_OK | |
c048fdfe GC |
76 | * 1b2) test_and_set the cpu bit in cpu_vm_mask. |
77 | * Atomically set the bit [other cpus will start sending flush ipis], | |
78 | * and test the bit. | |
79 | * 1b3) if the bit was 0: leave_mm was called, flush the tlb. | |
80 | * 2) switch %%esp, ie current | |
81 | * | |
82 | * The interrupt must handle 2 special cases: | |
83 | * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm. | |
84 | * - the cpu performs speculative tlb reads, i.e. even if the cpu only | |
85 | * runs in kernel space, the cpu could load tlb entries for user space | |
86 | * pages. | |
87 | * | |
52aec330 | 88 | * The good news is that cpu_tlbstate is local to each cpu, no |
c048fdfe GC |
89 | * write/read ordering problems. |
90 | */ | |
91 | ||
92 | /* | |
52aec330 | 93 | * TLB flush funcation: |
c048fdfe GC |
94 | * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. |
95 | * 2) Leave the mm if we are in the lazy tlb mode. | |
02cf94c3 | 96 | */ |
52aec330 | 97 | static void flush_tlb_func(void *info) |
c048fdfe | 98 | { |
52aec330 | 99 | struct flush_tlb_info *f = info; |
c048fdfe | 100 | |
fd0f5869 TS |
101 | inc_irq_stat(irq_tlb_count); |
102 | ||
52aec330 AS |
103 | if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm)) |
104 | return; | |
c048fdfe | 105 | |
9824cf97 | 106 | count_vm_event(NR_TLB_REMOTE_FLUSH_RECEIVED); |
52aec330 | 107 | if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { |
094ab1db | 108 | if (f->flush_end == TLB_FLUSH_ALL) |
52aec330 AS |
109 | local_flush_tlb(); |
110 | else if (!f->flush_end) | |
111 | __flush_tlb_single(f->flush_start); | |
112 | else { | |
113 | unsigned long addr; | |
114 | addr = f->flush_start; | |
115 | while (addr < f->flush_end) { | |
116 | __flush_tlb_single(addr); | |
117 | addr += PAGE_SIZE; | |
e7b52ffd | 118 | } |
52aec330 AS |
119 | } |
120 | } else | |
121 | leave_mm(smp_processor_id()); | |
c048fdfe | 122 | |
c048fdfe GC |
123 | } |
124 | ||
4595f962 | 125 | void native_flush_tlb_others(const struct cpumask *cpumask, |
e7b52ffd AS |
126 | struct mm_struct *mm, unsigned long start, |
127 | unsigned long end) | |
4595f962 | 128 | { |
52aec330 AS |
129 | struct flush_tlb_info info; |
130 | info.flush_mm = mm; | |
131 | info.flush_start = start; | |
132 | info.flush_end = end; | |
133 | ||
9824cf97 | 134 | count_vm_event(NR_TLB_REMOTE_FLUSH); |
4595f962 | 135 | if (is_uv_system()) { |
bdbcdd48 | 136 | unsigned int cpu; |
0e21990a | 137 | |
25542c64 | 138 | cpu = smp_processor_id(); |
e7b52ffd | 139 | cpumask = uv_flush_tlb_others(cpumask, mm, start, end, cpu); |
bdbcdd48 | 140 | if (cpumask) |
52aec330 AS |
141 | smp_call_function_many(cpumask, flush_tlb_func, |
142 | &info, 1); | |
0e21990a | 143 | return; |
4595f962 | 144 | } |
52aec330 | 145 | smp_call_function_many(cpumask, flush_tlb_func, &info, 1); |
c048fdfe | 146 | } |
c048fdfe GC |
147 | |
148 | void flush_tlb_current_task(void) | |
149 | { | |
150 | struct mm_struct *mm = current->mm; | |
c048fdfe GC |
151 | |
152 | preempt_disable(); | |
c048fdfe | 153 | |
9824cf97 | 154 | count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); |
c048fdfe | 155 | local_flush_tlb(); |
78f1c4d6 | 156 | if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) |
e7b52ffd | 157 | flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); |
c048fdfe GC |
158 | preempt_enable(); |
159 | } | |
160 | ||
611ae8e3 AS |
161 | /* |
162 | * It can find out the THP large page, or | |
163 | * HUGETLB page in tlb_flush when THP disabled | |
164 | */ | |
d8dfe60d AS |
165 | static inline unsigned long has_large_page(struct mm_struct *mm, |
166 | unsigned long start, unsigned long end) | |
167 | { | |
168 | pgd_t *pgd; | |
169 | pud_t *pud; | |
170 | pmd_t *pmd; | |
171 | unsigned long addr = ALIGN(start, HPAGE_SIZE); | |
172 | for (; addr < end; addr += HPAGE_SIZE) { | |
173 | pgd = pgd_offset(mm, addr); | |
174 | if (likely(!pgd_none(*pgd))) { | |
175 | pud = pud_offset(pgd, addr); | |
176 | if (likely(!pud_none(*pud))) { | |
177 | pmd = pmd_offset(pud, addr); | |
178 | if (likely(!pmd_none(*pmd))) | |
179 | if (pmd_large(*pmd)) | |
180 | return addr; | |
181 | } | |
182 | } | |
183 | } | |
184 | return 0; | |
185 | } | |
e7b52ffd | 186 | |
611ae8e3 AS |
187 | void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, |
188 | unsigned long end, unsigned long vmflag) | |
189 | { | |
190 | unsigned long addr; | |
191 | unsigned act_entries, tlb_entries = 0; | |
e7b52ffd AS |
192 | |
193 | preempt_disable(); | |
611ae8e3 AS |
194 | if (current->active_mm != mm) |
195 | goto flush_all; | |
e7b52ffd | 196 | |
611ae8e3 AS |
197 | if (!current->mm) { |
198 | leave_mm(smp_processor_id()); | |
199 | goto flush_all; | |
200 | } | |
c048fdfe | 201 | |
611ae8e3 | 202 | if (end == TLB_FLUSH_ALL || tlb_flushall_shift == -1 |
ddd32b42 | 203 | || vmflag & VM_HUGETLB) { |
611ae8e3 AS |
204 | local_flush_tlb(); |
205 | goto flush_all; | |
206 | } | |
e7b52ffd | 207 | |
611ae8e3 AS |
208 | /* In modern CPU, last level tlb used for both data/ins */ |
209 | if (vmflag & VM_EXEC) | |
210 | tlb_entries = tlb_lli_4k[ENTRIES]; | |
211 | else | |
212 | tlb_entries = tlb_lld_4k[ENTRIES]; | |
213 | /* Assume all of TLB entries was occupied by this task */ | |
214 | act_entries = mm->total_vm > tlb_entries ? tlb_entries : mm->total_vm; | |
215 | ||
216 | /* tlb_flushall_shift is on balance point, details in commit log */ | |
9824cf97 DH |
217 | if ((end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift) { |
218 | count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); | |
611ae8e3 | 219 | local_flush_tlb(); |
9824cf97 | 220 | } else { |
611ae8e3 AS |
221 | if (has_large_page(mm, start, end)) { |
222 | local_flush_tlb(); | |
223 | goto flush_all; | |
e7b52ffd | 224 | } |
611ae8e3 | 225 | /* flush range by one by one 'invlpg' */ |
9824cf97 DH |
226 | for (addr = start; addr < end; addr += PAGE_SIZE) { |
227 | count_vm_event(NR_TLB_LOCAL_FLUSH_ONE); | |
611ae8e3 | 228 | __flush_tlb_single(addr); |
9824cf97 | 229 | } |
611ae8e3 AS |
230 | |
231 | if (cpumask_any_but(mm_cpumask(mm), | |
232 | smp_processor_id()) < nr_cpu_ids) | |
233 | flush_tlb_others(mm_cpumask(mm), mm, start, end); | |
234 | preempt_enable(); | |
235 | return; | |
e7b52ffd | 236 | } |
611ae8e3 AS |
237 | |
238 | flush_all: | |
e7b52ffd AS |
239 | if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) |
240 | flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); | |
c048fdfe GC |
241 | preempt_enable(); |
242 | } | |
243 | ||
e7b52ffd | 244 | void flush_tlb_page(struct vm_area_struct *vma, unsigned long start) |
c048fdfe GC |
245 | { |
246 | struct mm_struct *mm = vma->vm_mm; | |
c048fdfe GC |
247 | |
248 | preempt_disable(); | |
c048fdfe GC |
249 | |
250 | if (current->active_mm == mm) { | |
251 | if (current->mm) | |
e7b52ffd | 252 | __flush_tlb_one(start); |
c048fdfe GC |
253 | else |
254 | leave_mm(smp_processor_id()); | |
255 | } | |
256 | ||
78f1c4d6 | 257 | if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) |
e7b52ffd | 258 | flush_tlb_others(mm_cpumask(mm), mm, start, 0UL); |
c048fdfe GC |
259 | |
260 | preempt_enable(); | |
261 | } | |
262 | ||
263 | static void do_flush_tlb_all(void *info) | |
264 | { | |
9824cf97 | 265 | count_vm_event(NR_TLB_REMOTE_FLUSH_RECEIVED); |
c048fdfe | 266 | __flush_tlb_all(); |
c6ae41e7 | 267 | if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) |
3f8afb77 | 268 | leave_mm(smp_processor_id()); |
c048fdfe GC |
269 | } |
270 | ||
271 | void flush_tlb_all(void) | |
272 | { | |
9824cf97 | 273 | count_vm_event(NR_TLB_REMOTE_FLUSH); |
15c8b6c1 | 274 | on_each_cpu(do_flush_tlb_all, NULL, 1); |
c048fdfe | 275 | } |
3df3212f | 276 | |
effee4b9 AS |
277 | static void do_kernel_range_flush(void *info) |
278 | { | |
279 | struct flush_tlb_info *f = info; | |
280 | unsigned long addr; | |
281 | ||
282 | /* flush range by one by one 'invlpg' */ | |
6df46865 | 283 | for (addr = f->flush_start; addr < f->flush_end; addr += PAGE_SIZE) |
effee4b9 AS |
284 | __flush_tlb_single(addr); |
285 | } | |
286 | ||
287 | void flush_tlb_kernel_range(unsigned long start, unsigned long end) | |
288 | { | |
289 | unsigned act_entries; | |
290 | struct flush_tlb_info info; | |
291 | ||
292 | /* In modern CPU, last level tlb used for both data/ins */ | |
293 | act_entries = tlb_lld_4k[ENTRIES]; | |
294 | ||
295 | /* Balance as user space task's flush, a bit conservative */ | |
296 | if (end == TLB_FLUSH_ALL || tlb_flushall_shift == -1 || | |
297 | (end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift) | |
298 | ||
299 | on_each_cpu(do_flush_tlb_all, NULL, 1); | |
300 | else { | |
301 | info.flush_start = start; | |
302 | info.flush_end = end; | |
303 | on_each_cpu(do_kernel_range_flush, &info, 1); | |
304 | } | |
305 | } | |
306 | ||
3df3212f AS |
307 | #ifdef CONFIG_DEBUG_TLBFLUSH |
308 | static ssize_t tlbflush_read_file(struct file *file, char __user *user_buf, | |
309 | size_t count, loff_t *ppos) | |
310 | { | |
311 | char buf[32]; | |
312 | unsigned int len; | |
313 | ||
314 | len = sprintf(buf, "%hd\n", tlb_flushall_shift); | |
315 | return simple_read_from_buffer(user_buf, count, ppos, buf, len); | |
316 | } | |
317 | ||
318 | static ssize_t tlbflush_write_file(struct file *file, | |
319 | const char __user *user_buf, size_t count, loff_t *ppos) | |
320 | { | |
321 | char buf[32]; | |
322 | ssize_t len; | |
323 | s8 shift; | |
324 | ||
325 | len = min(count, sizeof(buf) - 1); | |
326 | if (copy_from_user(buf, user_buf, len)) | |
327 | return -EFAULT; | |
328 | ||
329 | buf[len] = '\0'; | |
330 | if (kstrtos8(buf, 0, &shift)) | |
331 | return -EINVAL; | |
332 | ||
d4c9dbc6 | 333 | if (shift < -1 || shift >= BITS_PER_LONG) |
3df3212f AS |
334 | return -EINVAL; |
335 | ||
336 | tlb_flushall_shift = shift; | |
337 | return count; | |
338 | } | |
339 | ||
340 | static const struct file_operations fops_tlbflush = { | |
341 | .read = tlbflush_read_file, | |
342 | .write = tlbflush_write_file, | |
343 | .llseek = default_llseek, | |
344 | }; | |
345 | ||
9611dc7a | 346 | static int __init create_tlb_flushall_shift(void) |
3df3212f | 347 | { |
094ab1db PA |
348 | debugfs_create_file("tlb_flushall_shift", S_IRUSR | S_IWUSR, |
349 | arch_debugfs_dir, NULL, &fops_tlbflush); | |
3df3212f AS |
350 | return 0; |
351 | } | |
352 | late_initcall(create_tlb_flushall_shift); | |
353 | #endif |