Commit | Line | Data |
---|---|---|
faca2ef7 DM |
1 | /* |
2 | * zcache.c | |
3 | * | |
4 | * Copyright (c) 2010-2012, Dan Magenheimer, Oracle Corp. | |
5 | * Copyright (c) 2010,2011, Nitin Gupta | |
6 | * | |
7 | * Zcache provides an in-kernel "host implementation" for transcendent memory | |
8 | * ("tmem") and, thus indirectly, for cleancache and frontswap. Zcache uses | |
9 | * lzo1x compression to improve density and an embedded allocator called | |
10 | * "zbud" which "buddies" two compressed pages semi-optimally in each physical | |
11 | * pageframe. Zbud is integrally tied into tmem to allow pageframes to | |
12 | * be "reclaimed" efficiently. | |
13 | */ | |
14 | ||
15 | #include <linux/module.h> | |
16 | #include <linux/cpu.h> | |
17 | #include <linux/highmem.h> | |
18 | #include <linux/list.h> | |
19 | #include <linux/slab.h> | |
20 | #include <linux/spinlock.h> | |
21 | #include <linux/types.h> | |
aeac64aa | 22 | #include <linux/string.h> |
faca2ef7 DM |
23 | #include <linux/atomic.h> |
24 | #include <linux/math64.h> | |
25 | #include <linux/crypto.h> | |
76426daf DM |
26 | #include <linux/swap.h> |
27 | #include <linux/swapops.h> | |
28 | #include <linux/pagemap.h> | |
29 | #include <linux/writeback.h> | |
faca2ef7 DM |
30 | |
31 | #include <linux/cleancache.h> | |
32 | #include <linux/frontswap.h> | |
33 | #include "tmem.h" | |
34 | #include "zcache.h" | |
35 | #include "zbud.h" | |
36 | #include "ramster.h" | |
95bdaee2 | 37 | #include "debug.h" |
faca2ef7 | 38 | #ifdef CONFIG_RAMSTER |
7937d74a | 39 | static bool ramster_enabled __read_mostly; |
faca2ef7 | 40 | #else |
7937d74a | 41 | #define ramster_enabled false |
faca2ef7 DM |
42 | #endif |
43 | ||
44 | #ifndef __PG_WAS_ACTIVE | |
45 | static inline bool PageWasActive(struct page *page) | |
46 | { | |
47 | return true; | |
48 | } | |
49 | ||
50 | static inline void SetPageWasActive(struct page *page) | |
51 | { | |
52 | } | |
53 | #endif | |
54 | ||
55 | #ifdef FRONTSWAP_HAS_EXCLUSIVE_GETS | |
56 | static bool frontswap_has_exclusive_gets __read_mostly = true; | |
57 | #else | |
58 | static bool frontswap_has_exclusive_gets __read_mostly; | |
59 | static inline void frontswap_tmem_exclusive_gets(bool b) | |
60 | { | |
61 | } | |
62 | #endif | |
63 | ||
76426daf DM |
64 | /* enable (or fix code) when Seth's patches are accepted upstream */ |
65 | #define zcache_writeback_enabled 0 | |
66 | ||
7937d74a KRW |
67 | static bool zcache_enabled __read_mostly; |
68 | static bool disable_cleancache __read_mostly; | |
69 | static bool disable_frontswap __read_mostly; | |
70 | static bool disable_frontswap_ignore_nonactive __read_mostly; | |
71 | static bool disable_cleancache_ignore_nonactive __read_mostly; | |
faca2ef7 DM |
72 | static char *namestr __read_mostly = "zcache"; |
73 | ||
74 | #define ZCACHE_GFP_MASK \ | |
75 | (__GFP_FS | __GFP_NORETRY | __GFP_NOWARN | __GFP_NOMEMALLOC) | |
76 | ||
faca2ef7 DM |
77 | /* crypto API for zcache */ |
78 | #define ZCACHE_COMP_NAME_SZ CRYPTO_MAX_ALG_NAME | |
79 | static char zcache_comp_name[ZCACHE_COMP_NAME_SZ] __read_mostly; | |
80 | static struct crypto_comp * __percpu *zcache_comp_pcpu_tfms __read_mostly; | |
81 | ||
82 | enum comp_op { | |
83 | ZCACHE_COMPOP_COMPRESS, | |
84 | ZCACHE_COMPOP_DECOMPRESS | |
85 | }; | |
86 | ||
87 | static inline int zcache_comp_op(enum comp_op op, | |
88 | const u8 *src, unsigned int slen, | |
89 | u8 *dst, unsigned int *dlen) | |
90 | { | |
91 | struct crypto_comp *tfm; | |
92 | int ret = -1; | |
93 | ||
94 | BUG_ON(!zcache_comp_pcpu_tfms); | |
95 | tfm = *per_cpu_ptr(zcache_comp_pcpu_tfms, get_cpu()); | |
96 | BUG_ON(!tfm); | |
97 | switch (op) { | |
98 | case ZCACHE_COMPOP_COMPRESS: | |
99 | ret = crypto_comp_compress(tfm, src, slen, dst, dlen); | |
100 | break; | |
101 | case ZCACHE_COMPOP_DECOMPRESS: | |
102 | ret = crypto_comp_decompress(tfm, src, slen, dst, dlen); | |
103 | break; | |
104 | default: | |
105 | ret = -EINVAL; | |
106 | } | |
107 | put_cpu(); | |
108 | return ret; | |
109 | } | |
110 | ||
111 | /* | |
112 | * policy parameters | |
113 | */ | |
114 | ||
115 | /* | |
116 | * byte count defining poor compression; pages with greater zsize will be | |
117 | * rejected | |
118 | */ | |
119 | static unsigned int zbud_max_zsize __read_mostly = (PAGE_SIZE / 8) * 7; | |
120 | /* | |
121 | * byte count defining poor *mean* compression; pages with greater zsize | |
122 | * will be rejected until sufficient better-compressed pages are accepted | |
123 | * driving the mean below this threshold | |
124 | */ | |
125 | static unsigned int zbud_max_mean_zsize __read_mostly = (PAGE_SIZE / 8) * 5; | |
126 | ||
127 | /* | |
128 | * for now, used named slabs so can easily track usage; later can | |
129 | * either just use kmalloc, or perhaps add a slab-like allocator | |
130 | * to more carefully manage total memory utilization | |
131 | */ | |
132 | static struct kmem_cache *zcache_objnode_cache; | |
133 | static struct kmem_cache *zcache_obj_cache; | |
134 | ||
135 | static DEFINE_PER_CPU(struct zcache_preload, zcache_preloads) = { 0, }; | |
136 | ||
95bdaee2 KRW |
137 | /* Used by debug.c */ |
138 | ssize_t zcache_pers_zpages; | |
139 | u64 zcache_pers_zbytes; | |
140 | ssize_t zcache_eph_pageframes; | |
141 | ssize_t zcache_pers_pageframes; | |
e0d11aed | 142 | |
95bdaee2 | 143 | /* Used by this code. */ |
86d7de66 KRW |
144 | ssize_t zcache_last_active_file_pageframes; |
145 | ssize_t zcache_last_inactive_file_pageframes; | |
146 | ssize_t zcache_last_active_anon_pageframes; | |
147 | ssize_t zcache_last_inactive_anon_pageframes; | |
95bdaee2 | 148 | #ifdef CONFIG_ZCACHE_WRITEBACK |
86d7de66 KRW |
149 | ssize_t zcache_writtenback_pages; |
150 | ssize_t zcache_outstanding_writeback_pages; | |
faca2ef7 | 151 | #endif |
faca2ef7 DM |
152 | /* |
153 | * zcache core code starts here | |
154 | */ | |
155 | ||
156 | static struct zcache_client zcache_host; | |
157 | static struct zcache_client zcache_clients[MAX_CLIENTS]; | |
158 | ||
159 | static inline bool is_local_client(struct zcache_client *cli) | |
160 | { | |
161 | return cli == &zcache_host; | |
162 | } | |
163 | ||
164 | static struct zcache_client *zcache_get_client_by_id(uint16_t cli_id) | |
165 | { | |
166 | struct zcache_client *cli = &zcache_host; | |
167 | ||
168 | if (cli_id != LOCAL_CLIENT) { | |
169 | if (cli_id >= MAX_CLIENTS) | |
170 | goto out; | |
171 | cli = &zcache_clients[cli_id]; | |
172 | } | |
173 | out: | |
174 | return cli; | |
175 | } | |
176 | ||
177 | /* | |
178 | * Tmem operations assume the poolid implies the invoking client. | |
179 | * Zcache only has one client (the kernel itself): LOCAL_CLIENT. | |
180 | * RAMster has each client numbered by cluster node, and a KVM version | |
181 | * of zcache would have one client per guest and each client might | |
182 | * have a poolid==N. | |
183 | */ | |
184 | struct tmem_pool *zcache_get_pool_by_id(uint16_t cli_id, uint16_t poolid) | |
185 | { | |
186 | struct tmem_pool *pool = NULL; | |
187 | struct zcache_client *cli = NULL; | |
188 | ||
189 | cli = zcache_get_client_by_id(cli_id); | |
190 | if (cli == NULL) | |
191 | goto out; | |
192 | if (!is_local_client(cli)) | |
193 | atomic_inc(&cli->refcount); | |
194 | if (poolid < MAX_POOLS_PER_CLIENT) { | |
195 | pool = cli->tmem_pools[poolid]; | |
196 | if (pool != NULL) | |
197 | atomic_inc(&pool->refcount); | |
198 | } | |
199 | out: | |
200 | return pool; | |
201 | } | |
202 | ||
203 | void zcache_put_pool(struct tmem_pool *pool) | |
204 | { | |
205 | struct zcache_client *cli = NULL; | |
206 | ||
207 | if (pool == NULL) | |
208 | BUG(); | |
209 | cli = pool->client; | |
210 | atomic_dec(&pool->refcount); | |
211 | if (!is_local_client(cli)) | |
212 | atomic_dec(&cli->refcount); | |
213 | } | |
214 | ||
215 | int zcache_new_client(uint16_t cli_id) | |
216 | { | |
217 | struct zcache_client *cli; | |
218 | int ret = -1; | |
219 | ||
220 | cli = zcache_get_client_by_id(cli_id); | |
221 | if (cli == NULL) | |
222 | goto out; | |
223 | if (cli->allocated) | |
224 | goto out; | |
225 | cli->allocated = 1; | |
226 | ret = 0; | |
227 | out: | |
228 | return ret; | |
229 | } | |
230 | ||
231 | /* | |
232 | * zcache implementation for tmem host ops | |
233 | */ | |
234 | ||
235 | static struct tmem_objnode *zcache_objnode_alloc(struct tmem_pool *pool) | |
236 | { | |
237 | struct tmem_objnode *objnode = NULL; | |
238 | struct zcache_preload *kp; | |
239 | int i; | |
240 | ||
241 | kp = &__get_cpu_var(zcache_preloads); | |
242 | for (i = 0; i < ARRAY_SIZE(kp->objnodes); i++) { | |
243 | objnode = kp->objnodes[i]; | |
244 | if (objnode != NULL) { | |
245 | kp->objnodes[i] = NULL; | |
246 | break; | |
247 | } | |
248 | } | |
249 | BUG_ON(objnode == NULL); | |
3f007ca4 | 250 | inc_zcache_objnode_count(); |
faca2ef7 DM |
251 | return objnode; |
252 | } | |
253 | ||
254 | static void zcache_objnode_free(struct tmem_objnode *objnode, | |
255 | struct tmem_pool *pool) | |
256 | { | |
6f4336fb | 257 | dec_zcache_objnode_count(); |
faca2ef7 DM |
258 | kmem_cache_free(zcache_objnode_cache, objnode); |
259 | } | |
260 | ||
261 | static struct tmem_obj *zcache_obj_alloc(struct tmem_pool *pool) | |
262 | { | |
263 | struct tmem_obj *obj = NULL; | |
264 | struct zcache_preload *kp; | |
265 | ||
266 | kp = &__get_cpu_var(zcache_preloads); | |
267 | obj = kp->obj; | |
268 | BUG_ON(obj == NULL); | |
269 | kp->obj = NULL; | |
3f007ca4 | 270 | inc_zcache_obj_count(); |
faca2ef7 DM |
271 | return obj; |
272 | } | |
273 | ||
274 | static void zcache_obj_free(struct tmem_obj *obj, struct tmem_pool *pool) | |
275 | { | |
6f4336fb | 276 | dec_zcache_obj_count(); |
faca2ef7 DM |
277 | kmem_cache_free(zcache_obj_cache, obj); |
278 | } | |
279 | ||
280 | static struct tmem_hostops zcache_hostops = { | |
281 | .obj_alloc = zcache_obj_alloc, | |
282 | .obj_free = zcache_obj_free, | |
283 | .objnode_alloc = zcache_objnode_alloc, | |
284 | .objnode_free = zcache_objnode_free, | |
285 | }; | |
286 | ||
287 | static struct page *zcache_alloc_page(void) | |
288 | { | |
289 | struct page *page = alloc_page(ZCACHE_GFP_MASK); | |
290 | ||
291 | if (page != NULL) | |
3f007ca4 | 292 | inc_zcache_pageframes_alloced(); |
faca2ef7 DM |
293 | return page; |
294 | } | |
295 | ||
faca2ef7 DM |
296 | static void zcache_free_page(struct page *page) |
297 | { | |
298 | long curr_pageframes; | |
7892e560 | 299 | static long max_pageframes, min_pageframes; |
faca2ef7 DM |
300 | |
301 | if (page == NULL) | |
302 | BUG(); | |
303 | __free_page(page); | |
3f007ca4 | 304 | inc_zcache_pageframes_freed(); |
e0d11aed | 305 | curr_pageframes = curr_pageframes_count(); |
faca2ef7 DM |
306 | if (curr_pageframes > max_pageframes) |
307 | max_pageframes = curr_pageframes; | |
308 | if (curr_pageframes < min_pageframes) | |
309 | min_pageframes = curr_pageframes; | |
67e2cba4 | 310 | #ifdef CONFIG_ZCACHE_DEBUG |
faca2ef7 DM |
311 | if (curr_pageframes > 2L || curr_pageframes < -2L) { |
312 | /* pr_info here */ | |
313 | } | |
314 | #endif | |
315 | } | |
316 | ||
317 | /* | |
318 | * zcache implementations for PAM page descriptor ops | |
319 | */ | |
320 | ||
321 | /* forward reference */ | |
322 | static void zcache_compress(struct page *from, | |
323 | void **out_va, unsigned *out_len); | |
324 | ||
325 | static struct page *zcache_evict_eph_pageframe(void); | |
326 | ||
327 | static void *zcache_pampd_eph_create(char *data, size_t size, bool raw, | |
328 | struct tmem_handle *th) | |
329 | { | |
330 | void *pampd = NULL, *cdata = data; | |
331 | unsigned clen = size; | |
332 | struct page *page = (struct page *)(data), *newpage; | |
333 | ||
334 | if (!raw) { | |
335 | zcache_compress(page, &cdata, &clen); | |
336 | if (clen > zbud_max_buddy_size()) { | |
86d7de66 | 337 | inc_zcache_compress_poor(); |
faca2ef7 DM |
338 | goto out; |
339 | } | |
340 | } else { | |
341 | BUG_ON(clen > zbud_max_buddy_size()); | |
342 | } | |
343 | ||
344 | /* look for space via an existing match first */ | |
345 | pampd = (void *)zbud_match_prep(th, true, cdata, clen); | |
346 | if (pampd != NULL) | |
347 | goto got_pampd; | |
348 | ||
349 | /* no match, now we need to find (or free up) a full page */ | |
350 | newpage = zcache_alloc_page(); | |
351 | if (newpage != NULL) | |
352 | goto create_in_new_page; | |
353 | ||
86d7de66 | 354 | inc_zcache_failed_getfreepages(); |
faca2ef7 DM |
355 | /* can't allocate a page, evict an ephemeral page via LRU */ |
356 | newpage = zcache_evict_eph_pageframe(); | |
357 | if (newpage == NULL) { | |
86d7de66 | 358 | inc_zcache_eph_ate_tail_failed(); |
faca2ef7 DM |
359 | goto out; |
360 | } | |
86d7de66 | 361 | inc_zcache_eph_ate_tail(); |
faca2ef7 DM |
362 | |
363 | create_in_new_page: | |
364 | pampd = (void *)zbud_create_prep(th, true, cdata, clen, newpage); | |
365 | BUG_ON(pampd == NULL); | |
3f007ca4 | 366 | inc_zcache_eph_pageframes(); |
faca2ef7 DM |
367 | |
368 | got_pampd: | |
3f007ca4 KRW |
369 | inc_zcache_eph_zbytes(clen); |
370 | inc_zcache_eph_zpages(); | |
faca2ef7 DM |
371 | if (ramster_enabled && raw) |
372 | ramster_count_foreign_pages(true, 1); | |
373 | out: | |
374 | return pampd; | |
375 | } | |
376 | ||
377 | static void *zcache_pampd_pers_create(char *data, size_t size, bool raw, | |
378 | struct tmem_handle *th) | |
379 | { | |
380 | void *pampd = NULL, *cdata = data; | |
381 | unsigned clen = size; | |
382 | struct page *page = (struct page *)(data), *newpage; | |
383 | unsigned long zbud_mean_zsize; | |
384 | unsigned long curr_pers_zpages, total_zsize; | |
385 | ||
386 | if (data == NULL) { | |
387 | BUG_ON(!ramster_enabled); | |
388 | goto create_pampd; | |
389 | } | |
390 | curr_pers_zpages = zcache_pers_zpages; | |
391 | /* FIXME CONFIG_RAMSTER... subtract atomic remote_pers_pages here? */ | |
392 | if (!raw) | |
393 | zcache_compress(page, &cdata, &clen); | |
394 | /* reject if compression is too poor */ | |
395 | if (clen > zbud_max_zsize) { | |
86d7de66 | 396 | inc_zcache_compress_poor(); |
faca2ef7 DM |
397 | goto out; |
398 | } | |
399 | /* reject if mean compression is too poor */ | |
400 | if ((clen > zbud_max_mean_zsize) && (curr_pers_zpages > 0)) { | |
401 | total_zsize = zcache_pers_zbytes; | |
402 | if ((long)total_zsize < 0) | |
403 | total_zsize = 0; | |
404 | zbud_mean_zsize = div_u64(total_zsize, | |
405 | curr_pers_zpages); | |
406 | if (zbud_mean_zsize > zbud_max_mean_zsize) { | |
86d7de66 | 407 | inc_zcache_mean_compress_poor(); |
faca2ef7 DM |
408 | goto out; |
409 | } | |
410 | } | |
411 | ||
412 | create_pampd: | |
413 | /* look for space via an existing match first */ | |
414 | pampd = (void *)zbud_match_prep(th, false, cdata, clen); | |
415 | if (pampd != NULL) | |
416 | goto got_pampd; | |
417 | ||
418 | /* no match, now we need to find (or free up) a full page */ | |
419 | newpage = zcache_alloc_page(); | |
420 | if (newpage != NULL) | |
421 | goto create_in_new_page; | |
422 | /* | |
423 | * FIXME do the following only if eph is oversized? | |
424 | * if (zcache_eph_pageframes > | |
425 | * (global_page_state(NR_LRU_BASE + LRU_ACTIVE_FILE) + | |
426 | * global_page_state(NR_LRU_BASE + LRU_INACTIVE_FILE))) | |
427 | */ | |
86d7de66 | 428 | inc_zcache_failed_getfreepages(); |
faca2ef7 DM |
429 | /* can't allocate a page, evict an ephemeral page via LRU */ |
430 | newpage = zcache_evict_eph_pageframe(); | |
431 | if (newpage == NULL) { | |
86d7de66 | 432 | inc_zcache_pers_ate_eph_failed(); |
faca2ef7 DM |
433 | goto out; |
434 | } | |
86d7de66 | 435 | inc_zcache_pers_ate_eph(); |
faca2ef7 DM |
436 | |
437 | create_in_new_page: | |
438 | pampd = (void *)zbud_create_prep(th, false, cdata, clen, newpage); | |
439 | BUG_ON(pampd == NULL); | |
3f007ca4 | 440 | inc_zcache_pers_pageframes(); |
faca2ef7 DM |
441 | |
442 | got_pampd: | |
3f007ca4 KRW |
443 | inc_zcache_pers_zpages(); |
444 | inc_zcache_pers_zbytes(clen); | |
faca2ef7 DM |
445 | if (ramster_enabled && raw) |
446 | ramster_count_foreign_pages(false, 1); | |
447 | out: | |
448 | return pampd; | |
449 | } | |
450 | ||
451 | /* | |
452 | * This is called directly from zcache_put_page to pre-allocate space | |
453 | * to store a zpage. | |
454 | */ | |
455 | void *zcache_pampd_create(char *data, unsigned int size, bool raw, | |
456 | int eph, struct tmem_handle *th) | |
457 | { | |
458 | void *pampd = NULL; | |
459 | struct zcache_preload *kp; | |
460 | struct tmem_objnode *objnode; | |
461 | struct tmem_obj *obj; | |
462 | int i; | |
463 | ||
464 | BUG_ON(!irqs_disabled()); | |
465 | /* pre-allocate per-cpu metadata */ | |
466 | BUG_ON(zcache_objnode_cache == NULL); | |
467 | BUG_ON(zcache_obj_cache == NULL); | |
468 | kp = &__get_cpu_var(zcache_preloads); | |
469 | for (i = 0; i < ARRAY_SIZE(kp->objnodes); i++) { | |
470 | objnode = kp->objnodes[i]; | |
471 | if (objnode == NULL) { | |
472 | objnode = kmem_cache_alloc(zcache_objnode_cache, | |
473 | ZCACHE_GFP_MASK); | |
474 | if (unlikely(objnode == NULL)) { | |
86d7de66 | 475 | inc_zcache_failed_alloc(); |
faca2ef7 DM |
476 | goto out; |
477 | } | |
478 | kp->objnodes[i] = objnode; | |
479 | } | |
480 | } | |
481 | if (kp->obj == NULL) { | |
482 | obj = kmem_cache_alloc(zcache_obj_cache, ZCACHE_GFP_MASK); | |
483 | kp->obj = obj; | |
484 | } | |
485 | if (unlikely(kp->obj == NULL)) { | |
86d7de66 | 486 | inc_zcache_failed_alloc(); |
faca2ef7 DM |
487 | goto out; |
488 | } | |
489 | /* | |
490 | * ok, have all the metadata pre-allocated, now do the data | |
491 | * but since how we allocate the data is dependent on ephemeral | |
492 | * or persistent, we split the call here to different sub-functions | |
493 | */ | |
494 | if (eph) | |
495 | pampd = zcache_pampd_eph_create(data, size, raw, th); | |
496 | else | |
497 | pampd = zcache_pampd_pers_create(data, size, raw, th); | |
498 | out: | |
499 | return pampd; | |
500 | } | |
501 | ||
502 | /* | |
503 | * This is a pamops called via tmem_put and is necessary to "finish" | |
504 | * a pampd creation. | |
505 | */ | |
506 | void zcache_pampd_create_finish(void *pampd, bool eph) | |
507 | { | |
508 | zbud_create_finish((struct zbudref *)pampd, eph); | |
509 | } | |
510 | ||
511 | /* | |
512 | * This is passed as a function parameter to zbud_decompress so that | |
513 | * zbud need not be familiar with the details of crypto. It assumes that | |
514 | * the bytes from_va and to_va through from_va+size-1 and to_va+size-1 are | |
515 | * kmapped. It must be successful, else there is a logic bug somewhere. | |
516 | */ | |
517 | static void zcache_decompress(char *from_va, unsigned int size, char *to_va) | |
518 | { | |
519 | int ret; | |
520 | unsigned int outlen = PAGE_SIZE; | |
521 | ||
522 | ret = zcache_comp_op(ZCACHE_COMPOP_DECOMPRESS, from_va, size, | |
523 | to_va, &outlen); | |
524 | BUG_ON(ret); | |
525 | BUG_ON(outlen != PAGE_SIZE); | |
526 | } | |
527 | ||
528 | /* | |
529 | * Decompress from the kernel va to a pageframe | |
530 | */ | |
531 | void zcache_decompress_to_page(char *from_va, unsigned int size, | |
532 | struct page *to_page) | |
533 | { | |
534 | char *to_va = kmap_atomic(to_page); | |
535 | zcache_decompress(from_va, size, to_va); | |
536 | kunmap_atomic(to_va); | |
537 | } | |
538 | ||
539 | /* | |
540 | * fill the pageframe corresponding to the struct page with the data | |
541 | * from the passed pampd | |
542 | */ | |
543 | static int zcache_pampd_get_data(char *data, size_t *sizep, bool raw, | |
544 | void *pampd, struct tmem_pool *pool, | |
545 | struct tmem_oid *oid, uint32_t index) | |
546 | { | |
547 | int ret; | |
548 | bool eph = !is_persistent(pool); | |
549 | ||
550 | BUG_ON(preemptible()); | |
551 | BUG_ON(eph); /* fix later if shared pools get implemented */ | |
552 | BUG_ON(pampd_is_remote(pampd)); | |
553 | if (raw) | |
554 | ret = zbud_copy_from_zbud(data, (struct zbudref *)pampd, | |
555 | sizep, eph); | |
556 | else { | |
557 | ret = zbud_decompress((struct page *)(data), | |
558 | (struct zbudref *)pampd, false, | |
559 | zcache_decompress); | |
560 | *sizep = PAGE_SIZE; | |
561 | } | |
562 | return ret; | |
563 | } | |
564 | ||
565 | /* | |
566 | * fill the pageframe corresponding to the struct page with the data | |
567 | * from the passed pampd | |
568 | */ | |
569 | static int zcache_pampd_get_data_and_free(char *data, size_t *sizep, bool raw, | |
570 | void *pampd, struct tmem_pool *pool, | |
571 | struct tmem_oid *oid, uint32_t index) | |
572 | { | |
573 | int ret; | |
574 | bool eph = !is_persistent(pool); | |
575 | struct page *page = NULL; | |
576 | unsigned int zsize, zpages; | |
577 | ||
578 | BUG_ON(preemptible()); | |
579 | BUG_ON(pampd_is_remote(pampd)); | |
580 | if (raw) | |
581 | ret = zbud_copy_from_zbud(data, (struct zbudref *)pampd, | |
582 | sizep, eph); | |
583 | else { | |
584 | ret = zbud_decompress((struct page *)(data), | |
585 | (struct zbudref *)pampd, eph, | |
586 | zcache_decompress); | |
587 | *sizep = PAGE_SIZE; | |
588 | } | |
589 | page = zbud_free_and_delist((struct zbudref *)pampd, eph, | |
590 | &zsize, &zpages); | |
591 | if (eph) { | |
592 | if (page) | |
6f4336fb KRW |
593 | dec_zcache_eph_pageframes(); |
594 | dec_zcache_eph_zpages(zpages); | |
595 | dec_zcache_eph_zbytes(zsize); | |
faca2ef7 DM |
596 | } else { |
597 | if (page) | |
6f4336fb KRW |
598 | dec_zcache_pers_pageframes(); |
599 | dec_zcache_pers_zpages(zpages); | |
600 | dec_zcache_pers_zbytes(zsize); | |
faca2ef7 DM |
601 | } |
602 | if (!is_local_client(pool->client)) | |
603 | ramster_count_foreign_pages(eph, -1); | |
604 | if (page) | |
605 | zcache_free_page(page); | |
606 | return ret; | |
607 | } | |
608 | ||
609 | /* | |
610 | * free the pampd and remove it from any zcache lists | |
611 | * pampd must no longer be pointed to from any tmem data structures! | |
612 | */ | |
613 | static void zcache_pampd_free(void *pampd, struct tmem_pool *pool, | |
614 | struct tmem_oid *oid, uint32_t index, bool acct) | |
615 | { | |
616 | struct page *page = NULL; | |
617 | unsigned int zsize, zpages; | |
618 | ||
619 | BUG_ON(preemptible()); | |
620 | if (pampd_is_remote(pampd)) { | |
621 | BUG_ON(!ramster_enabled); | |
622 | pampd = ramster_pampd_free(pampd, pool, oid, index, acct); | |
623 | if (pampd == NULL) | |
624 | return; | |
625 | } | |
626 | if (is_ephemeral(pool)) { | |
627 | page = zbud_free_and_delist((struct zbudref *)pampd, | |
628 | true, &zsize, &zpages); | |
629 | if (page) | |
6f4336fb KRW |
630 | dec_zcache_eph_pageframes(); |
631 | dec_zcache_eph_zpages(zpages); | |
632 | dec_zcache_eph_zbytes(zsize); | |
faca2ef7 DM |
633 | /* FIXME CONFIG_RAMSTER... check acct parameter? */ |
634 | } else { | |
635 | page = zbud_free_and_delist((struct zbudref *)pampd, | |
636 | false, &zsize, &zpages); | |
637 | if (page) | |
6f4336fb KRW |
638 | dec_zcache_pers_pageframes(); |
639 | dec_zcache_pers_zpages(zpages); | |
640 | dec_zcache_pers_zbytes(zsize); | |
faca2ef7 DM |
641 | } |
642 | if (!is_local_client(pool->client)) | |
643 | ramster_count_foreign_pages(is_ephemeral(pool), -1); | |
644 | if (page) | |
645 | zcache_free_page(page); | |
646 | } | |
647 | ||
648 | static struct tmem_pamops zcache_pamops = { | |
649 | .create_finish = zcache_pampd_create_finish, | |
650 | .get_data = zcache_pampd_get_data, | |
651 | .get_data_and_free = zcache_pampd_get_data_and_free, | |
652 | .free = zcache_pampd_free, | |
653 | }; | |
654 | ||
655 | /* | |
656 | * zcache compression/decompression and related per-cpu stuff | |
657 | */ | |
658 | ||
659 | static DEFINE_PER_CPU(unsigned char *, zcache_dstmem); | |
660 | #define ZCACHE_DSTMEM_ORDER 1 | |
661 | ||
662 | static void zcache_compress(struct page *from, void **out_va, unsigned *out_len) | |
663 | { | |
664 | int ret; | |
665 | unsigned char *dmem = __get_cpu_var(zcache_dstmem); | |
666 | char *from_va; | |
667 | ||
668 | BUG_ON(!irqs_disabled()); | |
669 | /* no buffer or no compressor so can't compress */ | |
670 | BUG_ON(dmem == NULL); | |
671 | *out_len = PAGE_SIZE << ZCACHE_DSTMEM_ORDER; | |
672 | from_va = kmap_atomic(from); | |
673 | mb(); | |
674 | ret = zcache_comp_op(ZCACHE_COMPOP_COMPRESS, from_va, PAGE_SIZE, dmem, | |
675 | out_len); | |
676 | BUG_ON(ret); | |
677 | *out_va = dmem; | |
678 | kunmap_atomic(from_va); | |
679 | } | |
680 | ||
681 | static int zcache_comp_cpu_up(int cpu) | |
682 | { | |
683 | struct crypto_comp *tfm; | |
684 | ||
685 | tfm = crypto_alloc_comp(zcache_comp_name, 0, 0); | |
686 | if (IS_ERR(tfm)) | |
687 | return NOTIFY_BAD; | |
688 | *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = tfm; | |
689 | return NOTIFY_OK; | |
690 | } | |
691 | ||
692 | static void zcache_comp_cpu_down(int cpu) | |
693 | { | |
694 | struct crypto_comp *tfm; | |
695 | ||
696 | tfm = *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu); | |
697 | crypto_free_comp(tfm); | |
698 | *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = NULL; | |
699 | } | |
700 | ||
701 | static int zcache_cpu_notifier(struct notifier_block *nb, | |
702 | unsigned long action, void *pcpu) | |
703 | { | |
704 | int ret, i, cpu = (long)pcpu; | |
705 | struct zcache_preload *kp; | |
706 | ||
707 | switch (action) { | |
708 | case CPU_UP_PREPARE: | |
709 | ret = zcache_comp_cpu_up(cpu); | |
710 | if (ret != NOTIFY_OK) { | |
711 | pr_err("%s: can't allocate compressor xform\n", | |
712 | namestr); | |
713 | return ret; | |
714 | } | |
715 | per_cpu(zcache_dstmem, cpu) = (void *)__get_free_pages( | |
716 | GFP_KERNEL | __GFP_REPEAT, ZCACHE_DSTMEM_ORDER); | |
717 | if (ramster_enabled) | |
718 | ramster_cpu_up(cpu); | |
719 | break; | |
720 | case CPU_DEAD: | |
721 | case CPU_UP_CANCELED: | |
722 | zcache_comp_cpu_down(cpu); | |
723 | free_pages((unsigned long)per_cpu(zcache_dstmem, cpu), | |
724 | ZCACHE_DSTMEM_ORDER); | |
725 | per_cpu(zcache_dstmem, cpu) = NULL; | |
726 | kp = &per_cpu(zcache_preloads, cpu); | |
727 | for (i = 0; i < ARRAY_SIZE(kp->objnodes); i++) { | |
728 | if (kp->objnodes[i]) | |
729 | kmem_cache_free(zcache_objnode_cache, | |
730 | kp->objnodes[i]); | |
731 | } | |
732 | if (kp->obj) { | |
733 | kmem_cache_free(zcache_obj_cache, kp->obj); | |
734 | kp->obj = NULL; | |
735 | } | |
736 | if (ramster_enabled) | |
737 | ramster_cpu_down(cpu); | |
738 | break; | |
739 | default: | |
740 | break; | |
741 | } | |
742 | return NOTIFY_OK; | |
743 | } | |
744 | ||
745 | static struct notifier_block zcache_cpu_notifier_block = { | |
746 | .notifier_call = zcache_cpu_notifier | |
747 | }; | |
748 | ||
749 | /* | |
750 | * The following code interacts with the zbud eviction and zbud | |
751 | * zombify code to access LRU pages | |
752 | */ | |
753 | ||
754 | static struct page *zcache_evict_eph_pageframe(void) | |
755 | { | |
756 | struct page *page; | |
757 | unsigned int zsize = 0, zpages = 0; | |
758 | ||
759 | page = zbud_evict_pageframe_lru(&zsize, &zpages); | |
760 | if (page == NULL) | |
761 | goto out; | |
6f4336fb KRW |
762 | dec_zcache_eph_zbytes(zsize); |
763 | dec_zcache_eph_zpages(zpages); | |
86d7de66 | 764 | inc_zcache_evicted_eph_zpages(zpages); |
6f4336fb | 765 | dec_zcache_eph_pageframes(); |
86d7de66 | 766 | inc_zcache_evicted_eph_pageframes(); |
faca2ef7 DM |
767 | out: |
768 | return page; | |
769 | } | |
770 | ||
76426daf DM |
771 | #ifdef CONFIG_ZCACHE_WRITEBACK |
772 | ||
773 | static atomic_t zcache_outstanding_writeback_pages_atomic = ATOMIC_INIT(0); | |
774 | ||
3f007ca4 KRW |
775 | static inline void inc_zcache_outstanding_writeback_pages(void) |
776 | { | |
777 | zcache_outstanding_writeback_pages = | |
778 | atomic_inc_return(&zcache_outstanding_writeback_pages_atomic); | |
779 | } | |
6f4336fb KRW |
780 | static inline void dec_zcache_outstanding_writeback_pages(void) |
781 | { | |
782 | zcache_outstanding_writeback_pages = | |
783 | atomic_dec_return(&zcache_outstanding_writeback_pages_atomic); | |
784 | }; | |
faca2ef7 DM |
785 | static void unswiz(struct tmem_oid oid, u32 index, |
786 | unsigned *type, pgoff_t *offset); | |
7892e560 | 787 | |
faca2ef7 | 788 | /* |
76426daf DM |
789 | * Choose an LRU persistent pageframe and attempt to write it back to |
790 | * the backing swap disk by calling frontswap_writeback on both zpages. | |
faca2ef7 DM |
791 | * |
792 | * This is work-in-progress. | |
793 | */ | |
794 | ||
76426daf DM |
795 | static void zcache_end_swap_write(struct bio *bio, int err) |
796 | { | |
797 | end_swap_bio_write(bio, err); | |
6f4336fb | 798 | dec_zcache_outstanding_writeback_pages(); |
76426daf DM |
799 | zcache_writtenback_pages++; |
800 | } | |
801 | ||
802 | /* | |
803 | * zcache_get_swap_cache_page | |
804 | * | |
805 | * This is an adaption of read_swap_cache_async() | |
806 | * | |
807 | * If success, page is returned in retpage | |
808 | * Returns 0 if page was already in the swap cache, page is not locked | |
809 | * Returns 1 if the new page needs to be populated, page is locked | |
810 | */ | |
811 | static int zcache_get_swap_cache_page(int type, pgoff_t offset, | |
812 | struct page *new_page) | |
813 | { | |
814 | struct page *found_page; | |
815 | swp_entry_t entry = swp_entry(type, offset); | |
816 | int err; | |
817 | ||
818 | BUG_ON(new_page == NULL); | |
819 | do { | |
820 | /* | |
821 | * First check the swap cache. Since this is normally | |
822 | * called after lookup_swap_cache() failed, re-calling | |
823 | * that would confuse statistics. | |
824 | */ | |
825 | found_page = find_get_page(&swapper_space, entry.val); | |
826 | if (found_page) | |
827 | return 0; | |
828 | ||
829 | /* | |
830 | * call radix_tree_preload() while we can wait. | |
831 | */ | |
832 | err = radix_tree_preload(GFP_KERNEL); | |
833 | if (err) | |
834 | break; | |
835 | ||
836 | /* | |
837 | * Swap entry may have been freed since our caller observed it. | |
838 | */ | |
839 | err = swapcache_prepare(entry); | |
840 | if (err == -EEXIST) { /* seems racy */ | |
841 | radix_tree_preload_end(); | |
842 | continue; | |
843 | } | |
844 | if (err) { /* swp entry is obsolete ? */ | |
845 | radix_tree_preload_end(); | |
846 | break; | |
847 | } | |
848 | ||
849 | /* May fail (-ENOMEM) if radix-tree node allocation failed. */ | |
850 | __set_page_locked(new_page); | |
851 | SetPageSwapBacked(new_page); | |
852 | err = __add_to_swap_cache(new_page, entry); | |
853 | if (likely(!err)) { | |
854 | radix_tree_preload_end(); | |
855 | lru_cache_add_anon(new_page); | |
856 | return 1; | |
857 | } | |
858 | radix_tree_preload_end(); | |
859 | ClearPageSwapBacked(new_page); | |
860 | __clear_page_locked(new_page); | |
861 | /* | |
862 | * add_to_swap_cache() doesn't return -EEXIST, so we can safely | |
863 | * clear SWAP_HAS_CACHE flag. | |
864 | */ | |
865 | swapcache_free(entry, NULL); | |
866 | /* FIXME: is it possible to get here without err==-ENOMEM? | |
867 | * If not, we can dispense with the do loop, use goto retry */ | |
868 | } while (err != -ENOMEM); | |
869 | ||
870 | return -ENOMEM; | |
871 | } | |
872 | ||
873 | /* | |
874 | * Given a frontswap zpage in zcache (identified by type/offset) and | |
875 | * an empty page, put the page into the swap cache, use frontswap | |
876 | * to get the page from zcache into the empty page, then give it | |
877 | * to the swap subsystem to send to disk (carefully avoiding the | |
878 | * possibility that frontswap might snatch it back). | |
879 | * Returns < 0 if error, 0 if successful, and 1 if successful but | |
880 | * the newpage passed in not needed and should be freed. | |
881 | */ | |
882 | static int zcache_frontswap_writeback_zpage(int type, pgoff_t offset, | |
883 | struct page *newpage) | |
884 | { | |
885 | struct page *page = newpage; | |
886 | int ret; | |
887 | struct writeback_control wbc = { | |
888 | .sync_mode = WB_SYNC_NONE, | |
889 | }; | |
890 | ||
891 | ret = zcache_get_swap_cache_page(type, offset, page); | |
892 | if (ret < 0) | |
893 | return ret; | |
894 | else if (ret == 0) { | |
895 | /* more uptodate page is already in swapcache */ | |
896 | __frontswap_invalidate_page(type, offset); | |
897 | return 1; | |
898 | } | |
899 | ||
900 | BUG_ON(!frontswap_has_exclusive_gets); /* load must also invalidate */ | |
901 | /* FIXME: how is it possible to get here when page is unlocked? */ | |
902 | __frontswap_load(page); | |
903 | SetPageUptodate(page); /* above does SetPageDirty, is that enough? */ | |
904 | ||
905 | /* start writeback */ | |
906 | SetPageReclaim(page); | |
907 | /* | |
908 | * Return value is ignored here because it doesn't change anything | |
909 | * for us. Page is returned unlocked. | |
910 | */ | |
911 | (void)__swap_writepage(page, &wbc, zcache_end_swap_write); | |
912 | page_cache_release(page); | |
3f007ca4 | 913 | inc_zcache_outstanding_writeback_pages(); |
76426daf DM |
914 | |
915 | return 0; | |
916 | } | |
917 | ||
918 | /* | |
919 | * The following is still a magic number... we want to allow forward progress | |
920 | * for writeback because it clears out needed RAM when under pressure, but | |
921 | * we don't want to allow writeback to absorb and queue too many GFP_KERNEL | |
922 | * pages if the swap device is very slow. | |
923 | */ | |
924 | #define ZCACHE_MAX_OUTSTANDING_WRITEBACK_PAGES 6400 | |
925 | ||
926 | /* | |
927 | * Try to allocate two free pages, first using a non-aggressive alloc, | |
928 | * then by evicting zcache ephemeral (clean pagecache) pages, and last | |
929 | * by aggressive GFP_KERNEL alloc. We allow zbud to choose a pageframe | |
930 | * consisting of 1-2 zbuds/zpages, then call the writeback_zpage helper | |
931 | * function above for each. | |
932 | */ | |
933 | static int zcache_frontswap_writeback(void) | |
faca2ef7 DM |
934 | { |
935 | struct tmem_handle th[2]; | |
76426daf DM |
936 | int ret = 0; |
937 | int nzbuds, writeback_ret; | |
faca2ef7 | 938 | unsigned type; |
76426daf | 939 | struct page *znewpage1 = NULL, *znewpage2 = NULL; |
faca2ef7 | 940 | struct page *evictpage1 = NULL, *evictpage2 = NULL; |
76426daf DM |
941 | struct page *newpage1 = NULL, *newpage2 = NULL; |
942 | struct page *page1 = NULL, *page2 = NULL; | |
faca2ef7 DM |
943 | pgoff_t offset; |
944 | ||
76426daf DM |
945 | znewpage1 = alloc_page(ZCACHE_GFP_MASK); |
946 | znewpage2 = alloc_page(ZCACHE_GFP_MASK); | |
947 | if (znewpage1 == NULL) | |
faca2ef7 | 948 | evictpage1 = zcache_evict_eph_pageframe(); |
76426daf | 949 | if (znewpage2 == NULL) |
faca2ef7 | 950 | evictpage2 = zcache_evict_eph_pageframe(); |
76426daf DM |
951 | |
952 | if ((evictpage1 == NULL || evictpage2 == NULL) && | |
953 | atomic_read(&zcache_outstanding_writeback_pages_atomic) > | |
954 | ZCACHE_MAX_OUTSTANDING_WRITEBACK_PAGES) { | |
faca2ef7 | 955 | goto free_and_out; |
76426daf DM |
956 | } |
957 | if (znewpage1 == NULL && evictpage1 == NULL) | |
958 | newpage1 = alloc_page(GFP_KERNEL); | |
959 | if (znewpage2 == NULL && evictpage2 == NULL) | |
960 | newpage2 = alloc_page(GFP_KERNEL); | |
961 | if (newpage1 == NULL || newpage2 == NULL) | |
962 | goto free_and_out; | |
963 | ||
964 | /* ok, we have two pageframes pre-allocated, get a pair of zbuds */ | |
faca2ef7 DM |
965 | nzbuds = zbud_make_zombie_lru(&th[0], NULL, NULL, false); |
966 | if (nzbuds == 0) { | |
967 | ret = -ENOENT; | |
968 | goto free_and_out; | |
969 | } | |
76426daf DM |
970 | |
971 | /* process the first zbud */ | |
faca2ef7 | 972 | unswiz(th[0].oid, th[0].index, &type, &offset); |
76426daf DM |
973 | page1 = (znewpage1 != NULL) ? znewpage1 : |
974 | ((newpage1 != NULL) ? newpage1 : evictpage1); | |
975 | writeback_ret = zcache_frontswap_writeback_zpage(type, offset, page1); | |
976 | if (writeback_ret < 0) { | |
977 | ret = -ENOMEM; | |
faca2ef7 | 978 | goto free_and_out; |
faca2ef7 | 979 | } |
76426daf DM |
980 | if (evictpage1 != NULL) |
981 | zcache_pageframes_freed = | |
982 | atomic_inc_return(&zcache_pageframes_freed_atomic); | |
983 | if (writeback_ret == 0) { | |
984 | /* zcache_get_swap_cache_page will free, don't double free */ | |
985 | znewpage1 = NULL; | |
986 | newpage1 = NULL; | |
987 | evictpage1 = NULL; | |
988 | } | |
989 | if (nzbuds < 2) | |
990 | goto free_and_out; | |
991 | ||
992 | /* if there is a second zbud, process it */ | |
993 | unswiz(th[1].oid, th[1].index, &type, &offset); | |
994 | page2 = (znewpage2 != NULL) ? znewpage2 : | |
995 | ((newpage2 != NULL) ? newpage2 : evictpage2); | |
996 | writeback_ret = zcache_frontswap_writeback_zpage(type, offset, page2); | |
997 | if (writeback_ret < 0) { | |
998 | ret = -ENOMEM; | |
999 | goto free_and_out; | |
1000 | } | |
1001 | if (evictpage2 != NULL) | |
1002 | zcache_pageframes_freed = | |
1003 | atomic_inc_return(&zcache_pageframes_freed_atomic); | |
1004 | if (writeback_ret == 0) { | |
1005 | znewpage2 = NULL; | |
1006 | newpage2 = NULL; | |
1007 | evictpage2 = NULL; | |
1008 | } | |
faca2ef7 DM |
1009 | |
1010 | free_and_out: | |
76426daf DM |
1011 | if (znewpage1 != NULL) |
1012 | page_cache_release(znewpage1); | |
1013 | if (znewpage2 != NULL) | |
1014 | page_cache_release(znewpage2); | |
faca2ef7 | 1015 | if (newpage1 != NULL) |
76426daf | 1016 | page_cache_release(newpage1); |
faca2ef7 | 1017 | if (newpage2 != NULL) |
76426daf | 1018 | page_cache_release(newpage2); |
faca2ef7 DM |
1019 | if (evictpage1 != NULL) |
1020 | zcache_free_page(evictpage1); | |
1021 | if (evictpage2 != NULL) | |
1022 | zcache_free_page(evictpage2); | |
faca2ef7 DM |
1023 | return ret; |
1024 | } | |
76426daf | 1025 | #endif /* CONFIG_ZCACHE_WRITEBACK */ |
faca2ef7 DM |
1026 | |
1027 | /* | |
1028 | * When zcache is disabled ("frozen"), pools can be created and destroyed, | |
1029 | * but all puts (and thus all other operations that require memory allocation) | |
1030 | * must fail. If zcache is unfrozen, accepts puts, then frozen again, | |
1031 | * data consistency requires all puts while frozen to be converted into | |
1032 | * flushes. | |
1033 | */ | |
1034 | static bool zcache_freeze; | |
1035 | ||
1036 | /* | |
1037 | * This zcache shrinker interface reduces the number of ephemeral pageframes | |
1038 | * used by zcache to approximately the same as the total number of LRU_FILE | |
76426daf DM |
1039 | * pageframes in use, and now also reduces the number of persistent pageframes |
1040 | * used by zcache to approximately the same as the total number of LRU_ANON | |
1041 | * pageframes in use. FIXME POLICY: Probably the writeback should only occur | |
1042 | * if the eviction doesn't free enough pages. | |
faca2ef7 DM |
1043 | */ |
1044 | static int shrink_zcache_memory(struct shrinker *shrink, | |
1045 | struct shrink_control *sc) | |
1046 | { | |
1047 | static bool in_progress; | |
1048 | int ret = -1; | |
1049 | int nr = sc->nr_to_scan; | |
1050 | int nr_evict = 0; | |
76426daf | 1051 | int nr_writeback = 0; |
faca2ef7 | 1052 | struct page *page; |
76426daf | 1053 | int file_pageframes_inuse, anon_pageframes_inuse; |
faca2ef7 DM |
1054 | |
1055 | if (nr <= 0) | |
1056 | goto skip_evict; | |
1057 | ||
1058 | /* don't allow more than one eviction thread at a time */ | |
1059 | if (in_progress) | |
1060 | goto skip_evict; | |
1061 | ||
1062 | in_progress = true; | |
1063 | ||
1064 | /* we are going to ignore nr, and target a different value */ | |
1065 | zcache_last_active_file_pageframes = | |
1066 | global_page_state(NR_LRU_BASE + LRU_ACTIVE_FILE); | |
1067 | zcache_last_inactive_file_pageframes = | |
1068 | global_page_state(NR_LRU_BASE + LRU_INACTIVE_FILE); | |
76426daf DM |
1069 | file_pageframes_inuse = zcache_last_active_file_pageframes + |
1070 | zcache_last_inactive_file_pageframes; | |
1071 | if (zcache_eph_pageframes > file_pageframes_inuse) | |
1072 | nr_evict = zcache_eph_pageframes - file_pageframes_inuse; | |
1073 | else | |
1074 | nr_evict = 0; | |
faca2ef7 DM |
1075 | while (nr_evict-- > 0) { |
1076 | page = zcache_evict_eph_pageframe(); | |
1077 | if (page == NULL) | |
1078 | break; | |
1079 | zcache_free_page(page); | |
1080 | } | |
1081 | ||
1082 | zcache_last_active_anon_pageframes = | |
1083 | global_page_state(NR_LRU_BASE + LRU_ACTIVE_ANON); | |
1084 | zcache_last_inactive_anon_pageframes = | |
1085 | global_page_state(NR_LRU_BASE + LRU_INACTIVE_ANON); | |
76426daf DM |
1086 | anon_pageframes_inuse = zcache_last_active_anon_pageframes + |
1087 | zcache_last_inactive_anon_pageframes; | |
1088 | if (zcache_pers_pageframes > anon_pageframes_inuse) | |
1089 | nr_writeback = zcache_pers_pageframes - anon_pageframes_inuse; | |
1090 | else | |
1091 | nr_writeback = 0; | |
1092 | while (nr_writeback-- > 0) { | |
1093 | #ifdef CONFIG_ZCACHE_WRITEBACK | |
1094 | int writeback_ret; | |
1095 | writeback_ret = zcache_frontswap_writeback(); | |
1096 | if (writeback_ret == -ENOMEM) | |
1097 | #endif | |
faca2ef7 DM |
1098 | break; |
1099 | } | |
faca2ef7 DM |
1100 | in_progress = false; |
1101 | ||
1102 | skip_evict: | |
1103 | /* resample: has changed, but maybe not all the way yet */ | |
1104 | zcache_last_active_file_pageframes = | |
1105 | global_page_state(NR_LRU_BASE + LRU_ACTIVE_FILE); | |
1106 | zcache_last_inactive_file_pageframes = | |
1107 | global_page_state(NR_LRU_BASE + LRU_INACTIVE_FILE); | |
1108 | ret = zcache_eph_pageframes - zcache_last_active_file_pageframes + | |
1109 | zcache_last_inactive_file_pageframes; | |
1110 | if (ret < 0) | |
1111 | ret = 0; | |
1112 | return ret; | |
1113 | } | |
1114 | ||
1115 | static struct shrinker zcache_shrinker = { | |
1116 | .shrink = shrink_zcache_memory, | |
1117 | .seeks = DEFAULT_SEEKS, | |
1118 | }; | |
1119 | ||
1120 | /* | |
1121 | * zcache shims between cleancache/frontswap ops and tmem | |
1122 | */ | |
1123 | ||
1124 | /* FIXME rename these core routines to zcache_tmemput etc? */ | |
1125 | int zcache_put_page(int cli_id, int pool_id, struct tmem_oid *oidp, | |
1126 | uint32_t index, void *page, | |
1127 | unsigned int size, bool raw, int ephemeral) | |
1128 | { | |
1129 | struct tmem_pool *pool; | |
1130 | struct tmem_handle th; | |
1131 | int ret = -1; | |
1132 | void *pampd = NULL; | |
1133 | ||
1134 | BUG_ON(!irqs_disabled()); | |
1135 | pool = zcache_get_pool_by_id(cli_id, pool_id); | |
1136 | if (unlikely(pool == NULL)) | |
1137 | goto out; | |
1138 | if (!zcache_freeze) { | |
1139 | ret = 0; | |
1140 | th.client_id = cli_id; | |
1141 | th.pool_id = pool_id; | |
1142 | th.oid = *oidp; | |
1143 | th.index = index; | |
1144 | pampd = zcache_pampd_create((char *)page, size, raw, | |
1145 | ephemeral, &th); | |
1146 | if (pampd == NULL) { | |
1147 | ret = -ENOMEM; | |
1148 | if (ephemeral) | |
86d7de66 | 1149 | inc_zcache_failed_eph_puts(); |
faca2ef7 | 1150 | else |
86d7de66 | 1151 | inc_zcache_failed_pers_puts(); |
faca2ef7 DM |
1152 | } else { |
1153 | if (ramster_enabled) | |
1154 | ramster_do_preload_flnode(pool); | |
1155 | ret = tmem_put(pool, oidp, index, 0, pampd); | |
1156 | if (ret < 0) | |
1157 | BUG(); | |
1158 | } | |
1159 | zcache_put_pool(pool); | |
1160 | } else { | |
86d7de66 | 1161 | inc_zcache_put_to_flush(); |
faca2ef7 DM |
1162 | if (ramster_enabled) |
1163 | ramster_do_preload_flnode(pool); | |
1164 | if (atomic_read(&pool->obj_count) > 0) | |
1165 | /* the put fails whether the flush succeeds or not */ | |
1166 | (void)tmem_flush_page(pool, oidp, index); | |
1167 | zcache_put_pool(pool); | |
1168 | } | |
1169 | out: | |
1170 | return ret; | |
1171 | } | |
1172 | ||
1173 | int zcache_get_page(int cli_id, int pool_id, struct tmem_oid *oidp, | |
1174 | uint32_t index, void *page, | |
1175 | size_t *sizep, bool raw, int get_and_free) | |
1176 | { | |
1177 | struct tmem_pool *pool; | |
1178 | int ret = -1; | |
1179 | bool eph; | |
1180 | ||
1181 | if (!raw) { | |
1182 | BUG_ON(irqs_disabled()); | |
1183 | BUG_ON(in_softirq()); | |
1184 | } | |
1185 | pool = zcache_get_pool_by_id(cli_id, pool_id); | |
1186 | eph = is_ephemeral(pool); | |
1187 | if (likely(pool != NULL)) { | |
1188 | if (atomic_read(&pool->obj_count) > 0) | |
1189 | ret = tmem_get(pool, oidp, index, (char *)(page), | |
1190 | sizep, raw, get_and_free); | |
1191 | zcache_put_pool(pool); | |
1192 | } | |
1193 | WARN_ONCE((!is_ephemeral(pool) && (ret != 0)), | |
1194 | "zcache_get fails on persistent pool, " | |
1195 | "bad things are very likely to happen soon\n"); | |
1196 | #ifdef RAMSTER_TESTING | |
1197 | if (ret != 0 && ret != -1 && !(ret == -EINVAL && is_ephemeral(pool))) | |
1198 | pr_err("TESTING zcache_get tmem_get returns ret=%d\n", ret); | |
1199 | #endif | |
1200 | return ret; | |
1201 | } | |
1202 | ||
1203 | int zcache_flush_page(int cli_id, int pool_id, | |
1204 | struct tmem_oid *oidp, uint32_t index) | |
1205 | { | |
1206 | struct tmem_pool *pool; | |
1207 | int ret = -1; | |
1208 | unsigned long flags; | |
1209 | ||
1210 | local_irq_save(flags); | |
86d7de66 | 1211 | inc_zcache_flush_total(); |
faca2ef7 DM |
1212 | pool = zcache_get_pool_by_id(cli_id, pool_id); |
1213 | if (ramster_enabled) | |
1214 | ramster_do_preload_flnode(pool); | |
1215 | if (likely(pool != NULL)) { | |
1216 | if (atomic_read(&pool->obj_count) > 0) | |
1217 | ret = tmem_flush_page(pool, oidp, index); | |
1218 | zcache_put_pool(pool); | |
1219 | } | |
1220 | if (ret >= 0) | |
86d7de66 | 1221 | inc_zcache_flush_found(); |
faca2ef7 DM |
1222 | local_irq_restore(flags); |
1223 | return ret; | |
1224 | } | |
1225 | ||
1226 | int zcache_flush_object(int cli_id, int pool_id, | |
1227 | struct tmem_oid *oidp) | |
1228 | { | |
1229 | struct tmem_pool *pool; | |
1230 | int ret = -1; | |
1231 | unsigned long flags; | |
1232 | ||
1233 | local_irq_save(flags); | |
86d7de66 | 1234 | inc_zcache_flobj_total(); |
faca2ef7 DM |
1235 | pool = zcache_get_pool_by_id(cli_id, pool_id); |
1236 | if (ramster_enabled) | |
1237 | ramster_do_preload_flnode(pool); | |
1238 | if (likely(pool != NULL)) { | |
1239 | if (atomic_read(&pool->obj_count) > 0) | |
1240 | ret = tmem_flush_object(pool, oidp); | |
1241 | zcache_put_pool(pool); | |
1242 | } | |
1243 | if (ret >= 0) | |
86d7de66 | 1244 | inc_zcache_flobj_found(); |
faca2ef7 DM |
1245 | local_irq_restore(flags); |
1246 | return ret; | |
1247 | } | |
1248 | ||
1249 | static int zcache_client_destroy_pool(int cli_id, int pool_id) | |
1250 | { | |
1251 | struct tmem_pool *pool = NULL; | |
1252 | struct zcache_client *cli = NULL; | |
1253 | int ret = -1; | |
1254 | ||
1255 | if (pool_id < 0) | |
1256 | goto out; | |
1257 | if (cli_id == LOCAL_CLIENT) | |
1258 | cli = &zcache_host; | |
1259 | else if ((unsigned int)cli_id < MAX_CLIENTS) | |
1260 | cli = &zcache_clients[cli_id]; | |
1261 | if (cli == NULL) | |
1262 | goto out; | |
1263 | atomic_inc(&cli->refcount); | |
1264 | pool = cli->tmem_pools[pool_id]; | |
1265 | if (pool == NULL) | |
1266 | goto out; | |
1267 | cli->tmem_pools[pool_id] = NULL; | |
1268 | /* wait for pool activity on other cpus to quiesce */ | |
1269 | while (atomic_read(&pool->refcount) != 0) | |
1270 | ; | |
1271 | atomic_dec(&cli->refcount); | |
1272 | local_bh_disable(); | |
1273 | ret = tmem_destroy_pool(pool); | |
1274 | local_bh_enable(); | |
1275 | kfree(pool); | |
1276 | if (cli_id == LOCAL_CLIENT) | |
1277 | pr_info("%s: destroyed local pool id=%d\n", namestr, pool_id); | |
1278 | else | |
1279 | pr_info("%s: destroyed pool id=%d, client=%d\n", | |
1280 | namestr, pool_id, cli_id); | |
1281 | out: | |
1282 | return ret; | |
1283 | } | |
1284 | ||
1285 | int zcache_new_pool(uint16_t cli_id, uint32_t flags) | |
1286 | { | |
1287 | int poolid = -1; | |
1288 | struct tmem_pool *pool; | |
1289 | struct zcache_client *cli = NULL; | |
1290 | ||
1291 | if (cli_id == LOCAL_CLIENT) | |
1292 | cli = &zcache_host; | |
1293 | else if ((unsigned int)cli_id < MAX_CLIENTS) | |
1294 | cli = &zcache_clients[cli_id]; | |
1295 | if (cli == NULL) | |
1296 | goto out; | |
1297 | atomic_inc(&cli->refcount); | |
1298 | pool = kmalloc(sizeof(struct tmem_pool), GFP_ATOMIC); | |
78110bb8 | 1299 | if (pool == NULL) |
faca2ef7 | 1300 | goto out; |
faca2ef7 DM |
1301 | |
1302 | for (poolid = 0; poolid < MAX_POOLS_PER_CLIENT; poolid++) | |
1303 | if (cli->tmem_pools[poolid] == NULL) | |
1304 | break; | |
1305 | if (poolid >= MAX_POOLS_PER_CLIENT) { | |
1306 | pr_info("%s: pool creation failed: max exceeded\n", namestr); | |
1307 | kfree(pool); | |
1308 | poolid = -1; | |
1309 | goto out; | |
1310 | } | |
1311 | atomic_set(&pool->refcount, 0); | |
1312 | pool->client = cli; | |
1313 | pool->pool_id = poolid; | |
1314 | tmem_new_pool(pool, flags); | |
1315 | cli->tmem_pools[poolid] = pool; | |
1316 | if (cli_id == LOCAL_CLIENT) | |
1317 | pr_info("%s: created %s local tmem pool, id=%d\n", namestr, | |
1318 | flags & TMEM_POOL_PERSIST ? "persistent" : "ephemeral", | |
1319 | poolid); | |
1320 | else | |
1321 | pr_info("%s: created %s tmem pool, id=%d, client=%d\n", namestr, | |
1322 | flags & TMEM_POOL_PERSIST ? "persistent" : "ephemeral", | |
1323 | poolid, cli_id); | |
1324 | out: | |
1325 | if (cli != NULL) | |
1326 | atomic_dec(&cli->refcount); | |
1327 | return poolid; | |
1328 | } | |
1329 | ||
1330 | static int zcache_local_new_pool(uint32_t flags) | |
1331 | { | |
1332 | return zcache_new_pool(LOCAL_CLIENT, flags); | |
1333 | } | |
1334 | ||
f0290de2 | 1335 | int zcache_autocreate_pool(unsigned int cli_id, unsigned int pool_id, bool eph) |
faca2ef7 DM |
1336 | { |
1337 | struct tmem_pool *pool; | |
76426daf | 1338 | struct zcache_client *cli; |
faca2ef7 DM |
1339 | uint32_t flags = eph ? 0 : TMEM_POOL_PERSIST; |
1340 | int ret = -1; | |
1341 | ||
1342 | BUG_ON(!ramster_enabled); | |
1343 | if (cli_id == LOCAL_CLIENT) | |
1344 | goto out; | |
1345 | if (pool_id >= MAX_POOLS_PER_CLIENT) | |
1346 | goto out; | |
f0290de2 DC |
1347 | if (cli_id >= MAX_CLIENTS) |
1348 | goto out; | |
1349 | ||
1350 | cli = &zcache_clients[cli_id]; | |
faca2ef7 DM |
1351 | if ((eph && disable_cleancache) || (!eph && disable_frontswap)) { |
1352 | pr_err("zcache_autocreate_pool: pool type disabled\n"); | |
1353 | goto out; | |
1354 | } | |
1355 | if (!cli->allocated) { | |
1356 | if (zcache_new_client(cli_id)) { | |
1357 | pr_err("zcache_autocreate_pool: can't create client\n"); | |
1358 | goto out; | |
1359 | } | |
1360 | cli = &zcache_clients[cli_id]; | |
1361 | } | |
1362 | atomic_inc(&cli->refcount); | |
1363 | pool = cli->tmem_pools[pool_id]; | |
1364 | if (pool != NULL) { | |
1365 | if (pool->persistent && eph) { | |
1366 | pr_err("zcache_autocreate_pool: type mismatch\n"); | |
1367 | goto out; | |
1368 | } | |
1369 | ret = 0; | |
1370 | goto out; | |
1371 | } | |
1372 | pool = kmalloc(sizeof(struct tmem_pool), GFP_KERNEL); | |
78110bb8 | 1373 | if (pool == NULL) |
faca2ef7 | 1374 | goto out; |
78110bb8 | 1375 | |
faca2ef7 DM |
1376 | atomic_set(&pool->refcount, 0); |
1377 | pool->client = cli; | |
1378 | pool->pool_id = pool_id; | |
1379 | tmem_new_pool(pool, flags); | |
1380 | cli->tmem_pools[pool_id] = pool; | |
1381 | pr_info("%s: AUTOcreated %s tmem poolid=%d, for remote client=%d\n", | |
1382 | namestr, flags & TMEM_POOL_PERSIST ? "persistent" : "ephemeral", | |
1383 | pool_id, cli_id); | |
1384 | ret = 0; | |
1385 | out: | |
1386 | if (cli != NULL) | |
1387 | atomic_dec(&cli->refcount); | |
1388 | return ret; | |
1389 | } | |
1390 | ||
1391 | /********** | |
1392 | * Two kernel functionalities currently can be layered on top of tmem. | |
1393 | * These are "cleancache" which is used as a second-chance cache for clean | |
1394 | * page cache pages; and "frontswap" which is used for swap pages | |
1395 | * to avoid writes to disk. A generic "shim" is provided here for each | |
1396 | * to translate in-kernel semantics to zcache semantics. | |
1397 | */ | |
1398 | ||
1399 | static void zcache_cleancache_put_page(int pool_id, | |
1400 | struct cleancache_filekey key, | |
1401 | pgoff_t index, struct page *page) | |
1402 | { | |
1403 | u32 ind = (u32) index; | |
1404 | struct tmem_oid oid = *(struct tmem_oid *)&key; | |
1405 | ||
1406 | if (!disable_cleancache_ignore_nonactive && !PageWasActive(page)) { | |
86d7de66 | 1407 | inc_zcache_eph_nonactive_puts_ignored(); |
faca2ef7 DM |
1408 | return; |
1409 | } | |
1410 | if (likely(ind == index)) | |
1411 | (void)zcache_put_page(LOCAL_CLIENT, pool_id, &oid, index, | |
1412 | page, PAGE_SIZE, false, 1); | |
1413 | } | |
1414 | ||
1415 | static int zcache_cleancache_get_page(int pool_id, | |
1416 | struct cleancache_filekey key, | |
1417 | pgoff_t index, struct page *page) | |
1418 | { | |
1419 | u32 ind = (u32) index; | |
1420 | struct tmem_oid oid = *(struct tmem_oid *)&key; | |
1421 | size_t size; | |
1422 | int ret = -1; | |
1423 | ||
1424 | if (likely(ind == index)) { | |
1425 | ret = zcache_get_page(LOCAL_CLIENT, pool_id, &oid, index, | |
1426 | page, &size, false, 0); | |
1427 | BUG_ON(ret >= 0 && size != PAGE_SIZE); | |
1428 | if (ret == 0) | |
1429 | SetPageWasActive(page); | |
1430 | } | |
1431 | return ret; | |
1432 | } | |
1433 | ||
1434 | static void zcache_cleancache_flush_page(int pool_id, | |
1435 | struct cleancache_filekey key, | |
1436 | pgoff_t index) | |
1437 | { | |
1438 | u32 ind = (u32) index; | |
1439 | struct tmem_oid oid = *(struct tmem_oid *)&key; | |
1440 | ||
1441 | if (likely(ind == index)) | |
1442 | (void)zcache_flush_page(LOCAL_CLIENT, pool_id, &oid, ind); | |
1443 | } | |
1444 | ||
1445 | static void zcache_cleancache_flush_inode(int pool_id, | |
1446 | struct cleancache_filekey key) | |
1447 | { | |
1448 | struct tmem_oid oid = *(struct tmem_oid *)&key; | |
1449 | ||
1450 | (void)zcache_flush_object(LOCAL_CLIENT, pool_id, &oid); | |
1451 | } | |
1452 | ||
1453 | static void zcache_cleancache_flush_fs(int pool_id) | |
1454 | { | |
1455 | if (pool_id >= 0) | |
1456 | (void)zcache_client_destroy_pool(LOCAL_CLIENT, pool_id); | |
1457 | } | |
1458 | ||
1459 | static int zcache_cleancache_init_fs(size_t pagesize) | |
1460 | { | |
1461 | BUG_ON(sizeof(struct cleancache_filekey) != | |
1462 | sizeof(struct tmem_oid)); | |
1463 | BUG_ON(pagesize != PAGE_SIZE); | |
1464 | return zcache_local_new_pool(0); | |
1465 | } | |
1466 | ||
1467 | static int zcache_cleancache_init_shared_fs(char *uuid, size_t pagesize) | |
1468 | { | |
1469 | /* shared pools are unsupported and map to private */ | |
1470 | BUG_ON(sizeof(struct cleancache_filekey) != | |
1471 | sizeof(struct tmem_oid)); | |
1472 | BUG_ON(pagesize != PAGE_SIZE); | |
1473 | return zcache_local_new_pool(0); | |
1474 | } | |
1475 | ||
1476 | static struct cleancache_ops zcache_cleancache_ops = { | |
1477 | .put_page = zcache_cleancache_put_page, | |
1478 | .get_page = zcache_cleancache_get_page, | |
1479 | .invalidate_page = zcache_cleancache_flush_page, | |
1480 | .invalidate_inode = zcache_cleancache_flush_inode, | |
1481 | .invalidate_fs = zcache_cleancache_flush_fs, | |
1482 | .init_shared_fs = zcache_cleancache_init_shared_fs, | |
1483 | .init_fs = zcache_cleancache_init_fs | |
1484 | }; | |
1485 | ||
1486 | struct cleancache_ops zcache_cleancache_register_ops(void) | |
1487 | { | |
1488 | struct cleancache_ops old_ops = | |
1489 | cleancache_register_ops(&zcache_cleancache_ops); | |
1490 | ||
1491 | return old_ops; | |
1492 | } | |
1493 | ||
1494 | /* a single tmem poolid is used for all frontswap "types" (swapfiles) */ | |
1495 | static int zcache_frontswap_poolid __read_mostly = -1; | |
1496 | ||
1497 | /* | |
1498 | * Swizzling increases objects per swaptype, increasing tmem concurrency | |
1499 | * for heavy swaploads. Later, larger nr_cpus -> larger SWIZ_BITS | |
1500 | * Setting SWIZ_BITS to 27 basically reconstructs the swap entry from | |
1501 | * frontswap_get_page(), but has side-effects. Hence using 8. | |
1502 | */ | |
1503 | #define SWIZ_BITS 8 | |
1504 | #define SWIZ_MASK ((1 << SWIZ_BITS) - 1) | |
1505 | #define _oswiz(_type, _ind) ((_type << SWIZ_BITS) | (_ind & SWIZ_MASK)) | |
1506 | #define iswiz(_ind) (_ind >> SWIZ_BITS) | |
1507 | ||
1508 | static inline struct tmem_oid oswiz(unsigned type, u32 ind) | |
1509 | { | |
1510 | struct tmem_oid oid = { .oid = { 0 } }; | |
1511 | oid.oid[0] = _oswiz(type, ind); | |
1512 | return oid; | |
1513 | } | |
1514 | ||
76426daf | 1515 | #ifdef CONFIG_ZCACHE_WRITEBACK |
faca2ef7 DM |
1516 | static void unswiz(struct tmem_oid oid, u32 index, |
1517 | unsigned *type, pgoff_t *offset) | |
1518 | { | |
1519 | *type = (unsigned)(oid.oid[0] >> SWIZ_BITS); | |
1520 | *offset = (pgoff_t)((index << SWIZ_BITS) | | |
1521 | (oid.oid[0] & SWIZ_MASK)); | |
1522 | } | |
7892e560 | 1523 | #endif |
faca2ef7 DM |
1524 | |
1525 | static int zcache_frontswap_put_page(unsigned type, pgoff_t offset, | |
1526 | struct page *page) | |
1527 | { | |
1528 | u64 ind64 = (u64)offset; | |
1529 | u32 ind = (u32)offset; | |
1530 | struct tmem_oid oid = oswiz(type, ind); | |
1531 | int ret = -1; | |
1532 | unsigned long flags; | |
faca2ef7 DM |
1533 | |
1534 | BUG_ON(!PageLocked(page)); | |
1535 | if (!disable_frontswap_ignore_nonactive && !PageWasActive(page)) { | |
86d7de66 | 1536 | inc_zcache_pers_nonactive_puts_ignored(); |
faca2ef7 DM |
1537 | ret = -ERANGE; |
1538 | goto out; | |
1539 | } | |
1540 | if (likely(ind64 == ind)) { | |
1541 | local_irq_save(flags); | |
1542 | ret = zcache_put_page(LOCAL_CLIENT, zcache_frontswap_poolid, | |
1543 | &oid, iswiz(ind), | |
1544 | page, PAGE_SIZE, false, 0); | |
1545 | local_irq_restore(flags); | |
1546 | } | |
1547 | out: | |
1548 | return ret; | |
1549 | } | |
1550 | ||
1551 | /* returns 0 if the page was successfully gotten from frontswap, -1 if | |
1552 | * was not present (should never happen!) */ | |
1553 | static int zcache_frontswap_get_page(unsigned type, pgoff_t offset, | |
1554 | struct page *page) | |
1555 | { | |
1556 | u64 ind64 = (u64)offset; | |
1557 | u32 ind = (u32)offset; | |
1558 | struct tmem_oid oid = oswiz(type, ind); | |
1559 | size_t size; | |
1560 | int ret = -1, get_and_free; | |
1561 | ||
1562 | if (frontswap_has_exclusive_gets) | |
1563 | get_and_free = 1; | |
1564 | else | |
1565 | get_and_free = -1; | |
1566 | BUG_ON(!PageLocked(page)); | |
1567 | if (likely(ind64 == ind)) { | |
1568 | ret = zcache_get_page(LOCAL_CLIENT, zcache_frontswap_poolid, | |
1569 | &oid, iswiz(ind), | |
1570 | page, &size, false, get_and_free); | |
1571 | BUG_ON(ret >= 0 && size != PAGE_SIZE); | |
1572 | } | |
1573 | return ret; | |
1574 | } | |
1575 | ||
1576 | /* flush a single page from frontswap */ | |
1577 | static void zcache_frontswap_flush_page(unsigned type, pgoff_t offset) | |
1578 | { | |
1579 | u64 ind64 = (u64)offset; | |
1580 | u32 ind = (u32)offset; | |
1581 | struct tmem_oid oid = oswiz(type, ind); | |
1582 | ||
1583 | if (likely(ind64 == ind)) | |
1584 | (void)zcache_flush_page(LOCAL_CLIENT, zcache_frontswap_poolid, | |
1585 | &oid, iswiz(ind)); | |
1586 | } | |
1587 | ||
1588 | /* flush all pages from the passed swaptype */ | |
1589 | static void zcache_frontswap_flush_area(unsigned type) | |
1590 | { | |
1591 | struct tmem_oid oid; | |
1592 | int ind; | |
1593 | ||
1594 | for (ind = SWIZ_MASK; ind >= 0; ind--) { | |
1595 | oid = oswiz(type, ind); | |
1596 | (void)zcache_flush_object(LOCAL_CLIENT, | |
1597 | zcache_frontswap_poolid, &oid); | |
1598 | } | |
1599 | } | |
1600 | ||
1601 | static void zcache_frontswap_init(unsigned ignored) | |
1602 | { | |
1603 | /* a single tmem poolid is used for all frontswap "types" (swapfiles) */ | |
1604 | if (zcache_frontswap_poolid < 0) | |
1605 | zcache_frontswap_poolid = | |
1606 | zcache_local_new_pool(TMEM_POOL_PERSIST); | |
1607 | } | |
1608 | ||
1609 | static struct frontswap_ops zcache_frontswap_ops = { | |
1610 | .store = zcache_frontswap_put_page, | |
1611 | .load = zcache_frontswap_get_page, | |
1612 | .invalidate_page = zcache_frontswap_flush_page, | |
1613 | .invalidate_area = zcache_frontswap_flush_area, | |
1614 | .init = zcache_frontswap_init | |
1615 | }; | |
1616 | ||
1617 | struct frontswap_ops zcache_frontswap_register_ops(void) | |
1618 | { | |
1619 | struct frontswap_ops old_ops = | |
1620 | frontswap_register_ops(&zcache_frontswap_ops); | |
1621 | ||
1622 | return old_ops; | |
1623 | } | |
1624 | ||
1625 | /* | |
1626 | * zcache initialization | |
1627 | * NOTE FOR NOW zcache or ramster MUST BE PROVIDED AS A KERNEL BOOT PARAMETER | |
1628 | * OR NOTHING HAPPENS! | |
1629 | */ | |
1630 | ||
1631 | static int __init enable_zcache(char *s) | |
1632 | { | |
7937d74a | 1633 | zcache_enabled = true; |
faca2ef7 DM |
1634 | return 1; |
1635 | } | |
1636 | __setup("zcache", enable_zcache); | |
1637 | ||
1638 | static int __init enable_ramster(char *s) | |
1639 | { | |
7937d74a | 1640 | zcache_enabled = true; |
faca2ef7 | 1641 | #ifdef CONFIG_RAMSTER |
7937d74a | 1642 | ramster_enabled = true; |
faca2ef7 DM |
1643 | #endif |
1644 | return 1; | |
1645 | } | |
1646 | __setup("ramster", enable_ramster); | |
1647 | ||
1648 | /* allow independent dynamic disabling of cleancache and frontswap */ | |
1649 | ||
1650 | static int __init no_cleancache(char *s) | |
1651 | { | |
7937d74a | 1652 | disable_cleancache = true; |
faca2ef7 DM |
1653 | return 1; |
1654 | } | |
1655 | ||
1656 | __setup("nocleancache", no_cleancache); | |
1657 | ||
1658 | static int __init no_frontswap(char *s) | |
1659 | { | |
7937d74a | 1660 | disable_frontswap = true; |
faca2ef7 DM |
1661 | return 1; |
1662 | } | |
1663 | ||
1664 | __setup("nofrontswap", no_frontswap); | |
1665 | ||
1666 | static int __init no_frontswap_exclusive_gets(char *s) | |
1667 | { | |
1668 | frontswap_has_exclusive_gets = false; | |
1669 | return 1; | |
1670 | } | |
1671 | ||
1672 | __setup("nofrontswapexclusivegets", no_frontswap_exclusive_gets); | |
1673 | ||
1674 | static int __init no_frontswap_ignore_nonactive(char *s) | |
1675 | { | |
7937d74a | 1676 | disable_frontswap_ignore_nonactive = true; |
faca2ef7 DM |
1677 | return 1; |
1678 | } | |
1679 | ||
1680 | __setup("nofrontswapignorenonactive", no_frontswap_ignore_nonactive); | |
1681 | ||
1682 | static int __init no_cleancache_ignore_nonactive(char *s) | |
1683 | { | |
7937d74a | 1684 | disable_cleancache_ignore_nonactive = true; |
faca2ef7 DM |
1685 | return 1; |
1686 | } | |
1687 | ||
1688 | __setup("nocleancacheignorenonactive", no_cleancache_ignore_nonactive); | |
1689 | ||
1690 | static int __init enable_zcache_compressor(char *s) | |
1691 | { | |
aeac64aa | 1692 | strlcpy(zcache_comp_name, s, sizeof(zcache_comp_name)); |
7937d74a | 1693 | zcache_enabled = true; |
faca2ef7 DM |
1694 | return 1; |
1695 | } | |
1696 | __setup("zcache=", enable_zcache_compressor); | |
1697 | ||
1698 | ||
1699 | static int __init zcache_comp_init(void) | |
1700 | { | |
1701 | int ret = 0; | |
1702 | ||
1703 | /* check crypto algorithm */ | |
1704 | if (*zcache_comp_name != '\0') { | |
1705 | ret = crypto_has_comp(zcache_comp_name, 0, 0); | |
1706 | if (!ret) | |
1707 | pr_info("zcache: %s not supported\n", | |
1708 | zcache_comp_name); | |
1709 | } | |
1710 | if (!ret) | |
1711 | strcpy(zcache_comp_name, "lzo"); | |
1712 | ret = crypto_has_comp(zcache_comp_name, 0, 0); | |
1713 | if (!ret) { | |
1714 | ret = 1; | |
1715 | goto out; | |
1716 | } | |
1717 | pr_info("zcache: using %s compressor\n", zcache_comp_name); | |
1718 | ||
1719 | /* alloc percpu transforms */ | |
1720 | ret = 0; | |
1721 | zcache_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *); | |
1722 | if (!zcache_comp_pcpu_tfms) | |
1723 | ret = 1; | |
1724 | out: | |
1725 | return ret; | |
1726 | } | |
1727 | ||
1728 | static int __init zcache_init(void) | |
1729 | { | |
1730 | int ret = 0; | |
1731 | ||
1732 | if (ramster_enabled) { | |
1733 | namestr = "ramster"; | |
1734 | ramster_register_pamops(&zcache_pamops); | |
1735 | } | |
1736 | #ifdef CONFIG_DEBUG_FS | |
1737 | zcache_debugfs_init(); | |
1738 | #endif | |
1739 | if (zcache_enabled) { | |
1740 | unsigned int cpu; | |
1741 | ||
1742 | tmem_register_hostops(&zcache_hostops); | |
1743 | tmem_register_pamops(&zcache_pamops); | |
1744 | ret = register_cpu_notifier(&zcache_cpu_notifier_block); | |
1745 | if (ret) { | |
1746 | pr_err("%s: can't register cpu notifier\n", namestr); | |
1747 | goto out; | |
1748 | } | |
1749 | ret = zcache_comp_init(); | |
1750 | if (ret) { | |
1751 | pr_err("%s: compressor initialization failed\n", | |
1752 | namestr); | |
1753 | goto out; | |
1754 | } | |
1755 | for_each_online_cpu(cpu) { | |
1756 | void *pcpu = (void *)(long)cpu; | |
1757 | zcache_cpu_notifier(&zcache_cpu_notifier_block, | |
1758 | CPU_UP_PREPARE, pcpu); | |
1759 | } | |
1760 | } | |
1761 | zcache_objnode_cache = kmem_cache_create("zcache_objnode", | |
1762 | sizeof(struct tmem_objnode), 0, 0, NULL); | |
1763 | zcache_obj_cache = kmem_cache_create("zcache_obj", | |
1764 | sizeof(struct tmem_obj), 0, 0, NULL); | |
1765 | ret = zcache_new_client(LOCAL_CLIENT); | |
1766 | if (ret) { | |
1767 | pr_err("%s: can't create client\n", namestr); | |
1768 | goto out; | |
1769 | } | |
1770 | zbud_init(); | |
1771 | if (zcache_enabled && !disable_cleancache) { | |
1772 | struct cleancache_ops old_ops; | |
1773 | ||
1774 | register_shrinker(&zcache_shrinker); | |
1775 | old_ops = zcache_cleancache_register_ops(); | |
1776 | pr_info("%s: cleancache enabled using kernel transcendent " | |
1777 | "memory and compression buddies\n", namestr); | |
67e2cba4 | 1778 | #ifdef CONFIG_ZCACHE_DEBUG |
faca2ef7 DM |
1779 | pr_info("%s: cleancache: ignorenonactive = %d\n", |
1780 | namestr, !disable_cleancache_ignore_nonactive); | |
1781 | #endif | |
1782 | if (old_ops.init_fs != NULL) | |
1783 | pr_warn("%s: cleancache_ops overridden\n", namestr); | |
1784 | } | |
1785 | if (zcache_enabled && !disable_frontswap) { | |
1786 | struct frontswap_ops old_ops; | |
1787 | ||
1788 | old_ops = zcache_frontswap_register_ops(); | |
1789 | if (frontswap_has_exclusive_gets) | |
1790 | frontswap_tmem_exclusive_gets(true); | |
1791 | pr_info("%s: frontswap enabled using kernel transcendent " | |
1792 | "memory and compression buddies\n", namestr); | |
67e2cba4 | 1793 | #ifdef CONFIG_ZCACHE_DEBUG |
faca2ef7 DM |
1794 | pr_info("%s: frontswap: excl gets = %d active only = %d\n", |
1795 | namestr, frontswap_has_exclusive_gets, | |
1796 | !disable_frontswap_ignore_nonactive); | |
1797 | #endif | |
1798 | if (old_ops.init != NULL) | |
1799 | pr_warn("%s: frontswap_ops overridden\n", namestr); | |
1800 | } | |
1801 | if (ramster_enabled) | |
1802 | ramster_init(!disable_cleancache, !disable_frontswap, | |
1803 | frontswap_has_exclusive_gets); | |
1804 | out: | |
1805 | return ret; | |
1806 | } | |
1807 | ||
1808 | late_initcall(zcache_init); |