Commit | Line | Data |
---|---|---|
867e359b CM |
1 | /* |
2 | * Copyright 2010 Tilera Corporation. All Rights Reserved. | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or | |
5 | * modify it under the terms of the GNU General Public License | |
6 | * as published by the Free Software Foundation, version 2. | |
7 | * | |
8 | * This program is distributed in the hope that it will be useful, but | |
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | |
11 | * NON INFRINGEMENT. See the GNU General Public License for | |
12 | * more details. | |
13 | */ | |
14 | ||
15 | #include <linux/string.h> | |
16 | #include <linux/smp.h> | |
17 | #include <linux/module.h> | |
18 | #include <linux/uaccess.h> | |
19 | #include <asm/fixmap.h> | |
20 | #include <asm/kmap_types.h> | |
21 | #include <asm/tlbflush.h> | |
22 | #include <hv/hypervisor.h> | |
23 | #include <arch/chip.h> | |
24 | ||
25 | ||
26 | #if !CHIP_HAS_COHERENT_LOCAL_CACHE() | |
27 | ||
28 | /* Defined in memcpy.S */ | |
29 | extern unsigned long __memcpy_asm(void *to, const void *from, unsigned long n); | |
30 | extern unsigned long __copy_to_user_inatomic_asm( | |
31 | void __user *to, const void *from, unsigned long n); | |
32 | extern unsigned long __copy_from_user_inatomic_asm( | |
33 | void *to, const void __user *from, unsigned long n); | |
34 | extern unsigned long __copy_from_user_zeroing_asm( | |
35 | void *to, const void __user *from, unsigned long n); | |
36 | ||
37 | typedef unsigned long (*memcpy_t)(void *, const void *, unsigned long); | |
38 | ||
39 | /* Size above which to consider TLB games for performance */ | |
40 | #define LARGE_COPY_CUTOFF 2048 | |
41 | ||
42 | /* Communicate to the simulator what we are trying to do. */ | |
43 | #define sim_allow_multiple_caching(b) \ | |
44 | __insn_mtspr(SPR_SIM_CONTROL, \ | |
45 | SIM_CONTROL_ALLOW_MULTIPLE_CACHING | ((b) << _SIM_CONTROL_OPERATOR_BITS)) | |
46 | ||
47 | /* | |
48 | * Copy memory by briefly enabling incoherent cacheline-at-a-time mode. | |
49 | * | |
50 | * We set up our own source and destination PTEs that we fully control. | |
51 | * This is the only way to guarantee that we don't race with another | |
52 | * thread that is modifying the PTE; we can't afford to try the | |
53 | * copy_{to,from}_user() technique of catching the interrupt, since | |
54 | * we must run with interrupts disabled to avoid the risk of some | |
55 | * other code seeing the incoherent data in our cache. (Recall that | |
56 | * our cache is indexed by PA, so even if the other code doesn't use | |
38a6f426 | 57 | * our kmap_atomic virtual addresses, they'll still hit in cache using |
867e359b CM |
58 | * the normal VAs that aren't supposed to hit in cache.) |
59 | */ | |
60 | static void memcpy_multicache(void *dest, const void *source, | |
61 | pte_t dst_pte, pte_t src_pte, int len) | |
62 | { | |
0707ad30 CM |
63 | int idx; |
64 | unsigned long flags, newsrc, newdst; | |
867e359b CM |
65 | pmd_t *pmdp; |
66 | pte_t *ptep; | |
38a6f426 | 67 | int type0, type1; |
867e359b CM |
68 | int cpu = get_cpu(); |
69 | ||
70 | /* | |
71 | * Disable interrupts so that we don't recurse into memcpy() | |
72 | * in an interrupt handler, nor accidentally reference | |
73 | * the PA of the source from an interrupt routine. Also | |
74 | * notify the simulator that we're playing games so we don't | |
75 | * generate spurious coherency warnings. | |
76 | */ | |
77 | local_irq_save(flags); | |
78 | sim_allow_multiple_caching(1); | |
79 | ||
80 | /* Set up the new dest mapping */ | |
38a6f426 CM |
81 | type0 = kmap_atomic_idx_push(); |
82 | idx = FIX_KMAP_BEGIN + (KM_TYPE_NR * cpu) + type0; | |
867e359b CM |
83 | newdst = __fix_to_virt(idx) + ((unsigned long)dest & (PAGE_SIZE-1)); |
84 | pmdp = pmd_offset(pud_offset(pgd_offset_k(newdst), newdst), newdst); | |
85 | ptep = pte_offset_kernel(pmdp, newdst); | |
86 | if (pte_val(*ptep) != pte_val(dst_pte)) { | |
87 | set_pte(ptep, dst_pte); | |
88 | local_flush_tlb_page(NULL, newdst, PAGE_SIZE); | |
89 | } | |
90 | ||
91 | /* Set up the new source mapping */ | |
38a6f426 CM |
92 | type1 = kmap_atomic_idx_push(); |
93 | idx += (type0 - type1); | |
867e359b CM |
94 | src_pte = hv_pte_set_nc(src_pte); |
95 | src_pte = hv_pte_clear_writable(src_pte); /* be paranoid */ | |
96 | newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1)); | |
97 | pmdp = pmd_offset(pud_offset(pgd_offset_k(newsrc), newsrc), newsrc); | |
98 | ptep = pte_offset_kernel(pmdp, newsrc); | |
76c567fb | 99 | __set_pte(ptep, src_pte); /* set_pte() would be confused by this */ |
867e359b CM |
100 | local_flush_tlb_page(NULL, newsrc, PAGE_SIZE); |
101 | ||
102 | /* Actually move the data. */ | |
103 | __memcpy_asm((void *)newdst, (const void *)newsrc, len); | |
104 | ||
105 | /* | |
106 | * Remap the source as locally-cached and not OLOC'ed so that | |
107 | * we can inval without also invaling the remote cpu's cache. | |
108 | * This also avoids known errata with inv'ing cacheable oloc data. | |
109 | */ | |
110 | src_pte = hv_pte_set_mode(src_pte, HV_PTE_MODE_CACHE_NO_L3); | |
111 | src_pte = hv_pte_set_writable(src_pte); /* need write access for inv */ | |
76c567fb | 112 | __set_pte(ptep, src_pte); /* set_pte() would be confused by this */ |
867e359b CM |
113 | local_flush_tlb_page(NULL, newsrc, PAGE_SIZE); |
114 | ||
115 | /* | |
116 | * Do the actual invalidation, covering the full L2 cache line | |
117 | * at the end since __memcpy_asm() is somewhat aggressive. | |
118 | */ | |
119 | __inv_buffer((void *)newsrc, len); | |
120 | ||
121 | /* | |
122 | * We're done: notify the simulator that all is back to normal, | |
123 | * and re-enable interrupts and pre-emption. | |
124 | */ | |
38a6f426 CM |
125 | kmap_atomic_idx_pop(); |
126 | kmap_atomic_idx_pop(); | |
867e359b CM |
127 | sim_allow_multiple_caching(0); |
128 | local_irq_restore(flags); | |
0707ad30 | 129 | put_cpu(); |
867e359b CM |
130 | } |
131 | ||
132 | /* | |
133 | * Identify large copies from remotely-cached memory, and copy them | |
134 | * via memcpy_multicache() if they look good, otherwise fall back | |
135 | * to the particular kind of copying passed as the memcpy_t function. | |
136 | */ | |
137 | static unsigned long fast_copy(void *dest, const void *source, int len, | |
138 | memcpy_t func) | |
139 | { | |
140 | /* | |
141 | * Check if it's big enough to bother with. We may end up doing a | |
142 | * small copy via TLB manipulation if we're near a page boundary, | |
143 | * but presumably we'll make it up when we hit the second page. | |
144 | */ | |
145 | while (len >= LARGE_COPY_CUTOFF) { | |
146 | int copy_size, bytes_left_on_page; | |
147 | pte_t *src_ptep, *dst_ptep; | |
148 | pte_t src_pte, dst_pte; | |
149 | struct page *src_page, *dst_page; | |
150 | ||
151 | /* Is the source page oloc'ed to a remote cpu? */ | |
152 | retry_source: | |
153 | src_ptep = virt_to_pte(current->mm, (unsigned long)source); | |
154 | if (src_ptep == NULL) | |
155 | break; | |
156 | src_pte = *src_ptep; | |
157 | if (!hv_pte_get_present(src_pte) || | |
158 | !hv_pte_get_readable(src_pte) || | |
159 | hv_pte_get_mode(src_pte) != HV_PTE_MODE_CACHE_TILE_L3) | |
160 | break; | |
161 | if (get_remote_cache_cpu(src_pte) == smp_processor_id()) | |
162 | break; | |
d5d14ed6 | 163 | src_page = pfn_to_page(pte_pfn(src_pte)); |
867e359b CM |
164 | get_page(src_page); |
165 | if (pte_val(src_pte) != pte_val(*src_ptep)) { | |
166 | put_page(src_page); | |
167 | goto retry_source; | |
168 | } | |
169 | if (pte_huge(src_pte)) { | |
170 | /* Adjust the PTE to correspond to a small page */ | |
d5d14ed6 | 171 | int pfn = pte_pfn(src_pte); |
867e359b CM |
172 | pfn += (((unsigned long)source & (HPAGE_SIZE-1)) |
173 | >> PAGE_SHIFT); | |
174 | src_pte = pfn_pte(pfn, src_pte); | |
175 | src_pte = pte_mksmall(src_pte); | |
176 | } | |
177 | ||
178 | /* Is the destination page writable? */ | |
179 | retry_dest: | |
180 | dst_ptep = virt_to_pte(current->mm, (unsigned long)dest); | |
181 | if (dst_ptep == NULL) { | |
182 | put_page(src_page); | |
183 | break; | |
184 | } | |
185 | dst_pte = *dst_ptep; | |
186 | if (!hv_pte_get_present(dst_pte) || | |
187 | !hv_pte_get_writable(dst_pte)) { | |
188 | put_page(src_page); | |
189 | break; | |
190 | } | |
d5d14ed6 | 191 | dst_page = pfn_to_page(pte_pfn(dst_pte)); |
867e359b CM |
192 | if (dst_page == src_page) { |
193 | /* | |
194 | * Source and dest are on the same page; this | |
195 | * potentially exposes us to incoherence if any | |
196 | * part of src and dest overlap on a cache line. | |
197 | * Just give up rather than trying to be precise. | |
198 | */ | |
199 | put_page(src_page); | |
200 | break; | |
201 | } | |
202 | get_page(dst_page); | |
203 | if (pte_val(dst_pte) != pte_val(*dst_ptep)) { | |
204 | put_page(dst_page); | |
205 | goto retry_dest; | |
206 | } | |
207 | if (pte_huge(dst_pte)) { | |
208 | /* Adjust the PTE to correspond to a small page */ | |
d5d14ed6 | 209 | int pfn = pte_pfn(dst_pte); |
867e359b CM |
210 | pfn += (((unsigned long)dest & (HPAGE_SIZE-1)) |
211 | >> PAGE_SHIFT); | |
212 | dst_pte = pfn_pte(pfn, dst_pte); | |
213 | dst_pte = pte_mksmall(dst_pte); | |
214 | } | |
215 | ||
216 | /* All looks good: create a cachable PTE and copy from it */ | |
217 | copy_size = len; | |
218 | bytes_left_on_page = | |
219 | PAGE_SIZE - (((int)source) & (PAGE_SIZE-1)); | |
220 | if (copy_size > bytes_left_on_page) | |
221 | copy_size = bytes_left_on_page; | |
222 | bytes_left_on_page = | |
223 | PAGE_SIZE - (((int)dest) & (PAGE_SIZE-1)); | |
224 | if (copy_size > bytes_left_on_page) | |
225 | copy_size = bytes_left_on_page; | |
226 | memcpy_multicache(dest, source, dst_pte, src_pte, copy_size); | |
227 | ||
228 | /* Release the pages */ | |
229 | put_page(dst_page); | |
230 | put_page(src_page); | |
231 | ||
232 | /* Continue on the next page */ | |
233 | dest += copy_size; | |
234 | source += copy_size; | |
235 | len -= copy_size; | |
236 | } | |
237 | ||
238 | return func(dest, source, len); | |
239 | } | |
240 | ||
241 | void *memcpy(void *to, const void *from, __kernel_size_t n) | |
242 | { | |
243 | if (n < LARGE_COPY_CUTOFF) | |
244 | return (void *)__memcpy_asm(to, from, n); | |
245 | else | |
246 | return (void *)fast_copy(to, from, n, __memcpy_asm); | |
247 | } | |
248 | ||
249 | unsigned long __copy_to_user_inatomic(void __user *to, const void *from, | |
250 | unsigned long n) | |
251 | { | |
252 | if (n < LARGE_COPY_CUTOFF) | |
253 | return __copy_to_user_inatomic_asm(to, from, n); | |
254 | else | |
255 | return fast_copy(to, from, n, __copy_to_user_inatomic_asm); | |
256 | } | |
257 | ||
258 | unsigned long __copy_from_user_inatomic(void *to, const void __user *from, | |
259 | unsigned long n) | |
260 | { | |
261 | if (n < LARGE_COPY_CUTOFF) | |
262 | return __copy_from_user_inatomic_asm(to, from, n); | |
263 | else | |
264 | return fast_copy(to, from, n, __copy_from_user_inatomic_asm); | |
265 | } | |
266 | ||
267 | unsigned long __copy_from_user_zeroing(void *to, const void __user *from, | |
268 | unsigned long n) | |
269 | { | |
270 | if (n < LARGE_COPY_CUTOFF) | |
271 | return __copy_from_user_zeroing_asm(to, from, n); | |
272 | else | |
273 | return fast_copy(to, from, n, __copy_from_user_zeroing_asm); | |
274 | } | |
275 | ||
276 | #endif /* !CHIP_HAS_COHERENT_LOCAL_CACHE() */ |