Merge tag 'for-3.9-rc' of git://git.kernel.org/pub/scm/linux/kernel/git/rwlove/fcoe
[deliverable/linux.git] / drivers / staging / zcache / zcache-main.c
1 /*
2 * zcache.c
3 *
4 * Copyright (c) 2010-2012, Dan Magenheimer, Oracle Corp.
5 * Copyright (c) 2010,2011, Nitin Gupta
6 *
7 * Zcache provides an in-kernel "host implementation" for transcendent memory
8 * ("tmem") and, thus indirectly, for cleancache and frontswap. Zcache uses
9 * lzo1x compression to improve density and an embedded allocator called
10 * "zbud" which "buddies" two compressed pages semi-optimally in each physical
11 * pageframe. Zbud is integrally tied into tmem to allow pageframes to
12 * be "reclaimed" efficiently.
13 */
14
15 #include <linux/module.h>
16 #include <linux/cpu.h>
17 #include <linux/highmem.h>
18 #include <linux/list.h>
19 #include <linux/slab.h>
20 #include <linux/spinlock.h>
21 #include <linux/types.h>
22 #include <linux/atomic.h>
23 #include <linux/math64.h>
24 #include <linux/crypto.h>
25 #include <linux/swap.h>
26 #include <linux/swapops.h>
27 #include <linux/pagemap.h>
28 #include <linux/writeback.h>
29
30 #include <linux/cleancache.h>
31 #include <linux/frontswap.h>
32 #include "tmem.h"
33 #include "zcache.h"
34 #include "zbud.h"
35 #include "ramster.h"
36 #ifdef CONFIG_RAMSTER
37 static int ramster_enabled;
38 #else
39 #define ramster_enabled 0
40 #endif
41
42 #ifndef __PG_WAS_ACTIVE
43 static inline bool PageWasActive(struct page *page)
44 {
45 return true;
46 }
47
48 static inline void SetPageWasActive(struct page *page)
49 {
50 }
51 #endif
52
53 #ifdef FRONTSWAP_HAS_EXCLUSIVE_GETS
54 static bool frontswap_has_exclusive_gets __read_mostly = true;
55 #else
56 static bool frontswap_has_exclusive_gets __read_mostly;
57 static inline void frontswap_tmem_exclusive_gets(bool b)
58 {
59 }
60 #endif
61
62 /* enable (or fix code) when Seth's patches are accepted upstream */
63 #define zcache_writeback_enabled 0
64
65 static int zcache_enabled __read_mostly;
66 static int disable_cleancache __read_mostly;
67 static int disable_frontswap __read_mostly;
68 static int disable_frontswap_ignore_nonactive __read_mostly;
69 static int disable_cleancache_ignore_nonactive __read_mostly;
70 static char *namestr __read_mostly = "zcache";
71
72 #define ZCACHE_GFP_MASK \
73 (__GFP_FS | __GFP_NORETRY | __GFP_NOWARN | __GFP_NOMEMALLOC)
74
75 MODULE_LICENSE("GPL");
76
77 /* crypto API for zcache */
78 #define ZCACHE_COMP_NAME_SZ CRYPTO_MAX_ALG_NAME
79 static char zcache_comp_name[ZCACHE_COMP_NAME_SZ] __read_mostly;
80 static struct crypto_comp * __percpu *zcache_comp_pcpu_tfms __read_mostly;
81
82 enum comp_op {
83 ZCACHE_COMPOP_COMPRESS,
84 ZCACHE_COMPOP_DECOMPRESS
85 };
86
87 static inline int zcache_comp_op(enum comp_op op,
88 const u8 *src, unsigned int slen,
89 u8 *dst, unsigned int *dlen)
90 {
91 struct crypto_comp *tfm;
92 int ret = -1;
93
94 BUG_ON(!zcache_comp_pcpu_tfms);
95 tfm = *per_cpu_ptr(zcache_comp_pcpu_tfms, get_cpu());
96 BUG_ON(!tfm);
97 switch (op) {
98 case ZCACHE_COMPOP_COMPRESS:
99 ret = crypto_comp_compress(tfm, src, slen, dst, dlen);
100 break;
101 case ZCACHE_COMPOP_DECOMPRESS:
102 ret = crypto_comp_decompress(tfm, src, slen, dst, dlen);
103 break;
104 default:
105 ret = -EINVAL;
106 }
107 put_cpu();
108 return ret;
109 }
110
111 /*
112 * policy parameters
113 */
114
115 /*
116 * byte count defining poor compression; pages with greater zsize will be
117 * rejected
118 */
119 static unsigned int zbud_max_zsize __read_mostly = (PAGE_SIZE / 8) * 7;
120 /*
121 * byte count defining poor *mean* compression; pages with greater zsize
122 * will be rejected until sufficient better-compressed pages are accepted
123 * driving the mean below this threshold
124 */
125 static unsigned int zbud_max_mean_zsize __read_mostly = (PAGE_SIZE / 8) * 5;
126
127 /*
128 * for now, used named slabs so can easily track usage; later can
129 * either just use kmalloc, or perhaps add a slab-like allocator
130 * to more carefully manage total memory utilization
131 */
132 static struct kmem_cache *zcache_objnode_cache;
133 static struct kmem_cache *zcache_obj_cache;
134
135 static DEFINE_PER_CPU(struct zcache_preload, zcache_preloads) = { 0, };
136
137 /* we try to keep these statistics SMP-consistent */
138 static ssize_t zcache_obj_count;
139 static atomic_t zcache_obj_atomic = ATOMIC_INIT(0);
140 static ssize_t zcache_obj_count_max;
141 static ssize_t zcache_objnode_count;
142 static atomic_t zcache_objnode_atomic = ATOMIC_INIT(0);
143 static ssize_t zcache_objnode_count_max;
144 static u64 zcache_eph_zbytes;
145 static atomic_long_t zcache_eph_zbytes_atomic = ATOMIC_INIT(0);
146 static u64 zcache_eph_zbytes_max;
147 static u64 zcache_pers_zbytes;
148 static atomic_long_t zcache_pers_zbytes_atomic = ATOMIC_INIT(0);
149 static u64 zcache_pers_zbytes_max;
150 static ssize_t zcache_eph_pageframes;
151 static atomic_t zcache_eph_pageframes_atomic = ATOMIC_INIT(0);
152 static ssize_t zcache_eph_pageframes_max;
153 static ssize_t zcache_pers_pageframes;
154 static atomic_t zcache_pers_pageframes_atomic = ATOMIC_INIT(0);
155 static ssize_t zcache_pers_pageframes_max;
156 static ssize_t zcache_pageframes_alloced;
157 static atomic_t zcache_pageframes_alloced_atomic = ATOMIC_INIT(0);
158 static ssize_t zcache_pageframes_freed;
159 static atomic_t zcache_pageframes_freed_atomic = ATOMIC_INIT(0);
160 static ssize_t zcache_eph_zpages;
161 static ssize_t zcache_eph_zpages;
162 static atomic_t zcache_eph_zpages_atomic = ATOMIC_INIT(0);
163 static ssize_t zcache_eph_zpages_max;
164 static ssize_t zcache_pers_zpages;
165 static atomic_t zcache_pers_zpages_atomic = ATOMIC_INIT(0);
166 static ssize_t zcache_pers_zpages_max;
167
168 /* but for the rest of these, counting races are ok */
169 static ssize_t zcache_flush_total;
170 static ssize_t zcache_flush_found;
171 static ssize_t zcache_flobj_total;
172 static ssize_t zcache_flobj_found;
173 static ssize_t zcache_failed_eph_puts;
174 static ssize_t zcache_failed_pers_puts;
175 static ssize_t zcache_failed_getfreepages;
176 static ssize_t zcache_failed_alloc;
177 static ssize_t zcache_put_to_flush;
178 static ssize_t zcache_compress_poor;
179 static ssize_t zcache_mean_compress_poor;
180 static ssize_t zcache_eph_ate_tail;
181 static ssize_t zcache_eph_ate_tail_failed;
182 static ssize_t zcache_pers_ate_eph;
183 static ssize_t zcache_pers_ate_eph_failed;
184 static ssize_t zcache_evicted_eph_zpages;
185 static ssize_t zcache_evicted_eph_pageframes;
186 static ssize_t zcache_last_active_file_pageframes;
187 static ssize_t zcache_last_inactive_file_pageframes;
188 static ssize_t zcache_last_active_anon_pageframes;
189 static ssize_t zcache_last_inactive_anon_pageframes;
190 static ssize_t zcache_eph_nonactive_puts_ignored;
191 static ssize_t zcache_pers_nonactive_puts_ignored;
192 static ssize_t zcache_writtenback_pages;
193 static ssize_t zcache_outstanding_writeback_pages;
194
195 #ifdef CONFIG_DEBUG_FS
196 #include <linux/debugfs.h>
197 #define zdfs debugfs_create_size_t
198 #define zdfs64 debugfs_create_u64
199 static int zcache_debugfs_init(void)
200 {
201 struct dentry *root = debugfs_create_dir("zcache", NULL);
202 if (root == NULL)
203 return -ENXIO;
204
205 zdfs("obj_count", S_IRUGO, root, &zcache_obj_count);
206 zdfs("obj_count_max", S_IRUGO, root, &zcache_obj_count_max);
207 zdfs("objnode_count", S_IRUGO, root, &zcache_objnode_count);
208 zdfs("objnode_count_max", S_IRUGO, root, &zcache_objnode_count_max);
209 zdfs("flush_total", S_IRUGO, root, &zcache_flush_total);
210 zdfs("flush_found", S_IRUGO, root, &zcache_flush_found);
211 zdfs("flobj_total", S_IRUGO, root, &zcache_flobj_total);
212 zdfs("flobj_found", S_IRUGO, root, &zcache_flobj_found);
213 zdfs("failed_eph_puts", S_IRUGO, root, &zcache_failed_eph_puts);
214 zdfs("failed_pers_puts", S_IRUGO, root, &zcache_failed_pers_puts);
215 zdfs("failed_get_free_pages", S_IRUGO, root,
216 &zcache_failed_getfreepages);
217 zdfs("failed_alloc", S_IRUGO, root, &zcache_failed_alloc);
218 zdfs("put_to_flush", S_IRUGO, root, &zcache_put_to_flush);
219 zdfs("compress_poor", S_IRUGO, root, &zcache_compress_poor);
220 zdfs("mean_compress_poor", S_IRUGO, root, &zcache_mean_compress_poor);
221 zdfs("eph_ate_tail", S_IRUGO, root, &zcache_eph_ate_tail);
222 zdfs("eph_ate_tail_failed", S_IRUGO, root, &zcache_eph_ate_tail_failed);
223 zdfs("pers_ate_eph", S_IRUGO, root, &zcache_pers_ate_eph);
224 zdfs("pers_ate_eph_failed", S_IRUGO, root, &zcache_pers_ate_eph_failed);
225 zdfs("evicted_eph_zpages", S_IRUGO, root, &zcache_evicted_eph_zpages);
226 zdfs("evicted_eph_pageframes", S_IRUGO, root,
227 &zcache_evicted_eph_pageframes);
228 zdfs("eph_pageframes", S_IRUGO, root, &zcache_eph_pageframes);
229 zdfs("eph_pageframes_max", S_IRUGO, root, &zcache_eph_pageframes_max);
230 zdfs("pers_pageframes", S_IRUGO, root, &zcache_pers_pageframes);
231 zdfs("pers_pageframes_max", S_IRUGO, root, &zcache_pers_pageframes_max);
232 zdfs("eph_zpages", S_IRUGO, root, &zcache_eph_zpages);
233 zdfs("eph_zpages_max", S_IRUGO, root, &zcache_eph_zpages_max);
234 zdfs("pers_zpages", S_IRUGO, root, &zcache_pers_zpages);
235 zdfs("pers_zpages_max", S_IRUGO, root, &zcache_pers_zpages_max);
236 zdfs("last_active_file_pageframes", S_IRUGO, root,
237 &zcache_last_active_file_pageframes);
238 zdfs("last_inactive_file_pageframes", S_IRUGO, root,
239 &zcache_last_inactive_file_pageframes);
240 zdfs("last_active_anon_pageframes", S_IRUGO, root,
241 &zcache_last_active_anon_pageframes);
242 zdfs("last_inactive_anon_pageframes", S_IRUGO, root,
243 &zcache_last_inactive_anon_pageframes);
244 zdfs("eph_nonactive_puts_ignored", S_IRUGO, root,
245 &zcache_eph_nonactive_puts_ignored);
246 zdfs("pers_nonactive_puts_ignored", S_IRUGO, root,
247 &zcache_pers_nonactive_puts_ignored);
248 zdfs64("eph_zbytes", S_IRUGO, root, &zcache_eph_zbytes);
249 zdfs64("eph_zbytes_max", S_IRUGO, root, &zcache_eph_zbytes_max);
250 zdfs64("pers_zbytes", S_IRUGO, root, &zcache_pers_zbytes);
251 zdfs64("pers_zbytes_max", S_IRUGO, root, &zcache_pers_zbytes_max);
252 zdfs("outstanding_writeback_pages", S_IRUGO, root,
253 &zcache_outstanding_writeback_pages);
254 zdfs("writtenback_pages", S_IRUGO, root, &zcache_writtenback_pages);
255 return 0;
256 }
257 #undef zdebugfs
258 #undef zdfs64
259 #endif
260
261 #define ZCACHE_DEBUG
262 #ifdef ZCACHE_DEBUG
263 /* developers can call this in case of ooms, e.g. to find memory leaks */
264 void zcache_dump(void)
265 {
266 pr_info("zcache: obj_count=%zd\n", zcache_obj_count);
267 pr_info("zcache: obj_count_max=%zd\n", zcache_obj_count_max);
268 pr_info("zcache: objnode_count=%zd\n", zcache_objnode_count);
269 pr_info("zcache: objnode_count_max=%zd\n", zcache_objnode_count_max);
270 pr_info("zcache: flush_total=%zd\n", zcache_flush_total);
271 pr_info("zcache: flush_found=%zd\n", zcache_flush_found);
272 pr_info("zcache: flobj_total=%zd\n", zcache_flobj_total);
273 pr_info("zcache: flobj_found=%zd\n", zcache_flobj_found);
274 pr_info("zcache: failed_eph_puts=%zd\n", zcache_failed_eph_puts);
275 pr_info("zcache: failed_pers_puts=%zd\n", zcache_failed_pers_puts);
276 pr_info("zcache: failed_get_free_pages=%zd\n",
277 zcache_failed_getfreepages);
278 pr_info("zcache: failed_alloc=%zd\n", zcache_failed_alloc);
279 pr_info("zcache: put_to_flush=%zd\n", zcache_put_to_flush);
280 pr_info("zcache: compress_poor=%zd\n", zcache_compress_poor);
281 pr_info("zcache: mean_compress_poor=%zd\n",
282 zcache_mean_compress_poor);
283 pr_info("zcache: eph_ate_tail=%zd\n", zcache_eph_ate_tail);
284 pr_info("zcache: eph_ate_tail_failed=%zd\n",
285 zcache_eph_ate_tail_failed);
286 pr_info("zcache: pers_ate_eph=%zd\n", zcache_pers_ate_eph);
287 pr_info("zcache: pers_ate_eph_failed=%zd\n",
288 zcache_pers_ate_eph_failed);
289 pr_info("zcache: evicted_eph_zpages=%zd\n", zcache_evicted_eph_zpages);
290 pr_info("zcache: evicted_eph_pageframes=%zd\n",
291 zcache_evicted_eph_pageframes);
292 pr_info("zcache: eph_pageframes=%zd\n", zcache_eph_pageframes);
293 pr_info("zcache: eph_pageframes_max=%zd\n", zcache_eph_pageframes_max);
294 pr_info("zcache: pers_pageframes=%zd\n", zcache_pers_pageframes);
295 pr_info("zcache: pers_pageframes_max=%zd\n",
296 zcache_pers_pageframes_max);
297 pr_info("zcache: eph_zpages=%zd\n", zcache_eph_zpages);
298 pr_info("zcache: eph_zpages_max=%zd\n", zcache_eph_zpages_max);
299 pr_info("zcache: pers_zpages=%zd\n", zcache_pers_zpages);
300 pr_info("zcache: pers_zpages_max=%zd\n", zcache_pers_zpages_max);
301 pr_info("zcache: last_active_file_pageframes=%zd\n",
302 zcache_last_active_file_pageframes);
303 pr_info("zcache: last_inactive_file_pageframes=%zd\n",
304 zcache_last_inactive_file_pageframes);
305 pr_info("zcache: last_active_anon_pageframes=%zd\n",
306 zcache_last_active_anon_pageframes);
307 pr_info("zcache: last_inactive_anon_pageframes=%zd\n",
308 zcache_last_inactive_anon_pageframes);
309 pr_info("zcache: eph_nonactive_puts_ignored=%zd\n",
310 zcache_eph_nonactive_puts_ignored);
311 pr_info("zcache: pers_nonactive_puts_ignored=%zd\n",
312 zcache_pers_nonactive_puts_ignored);
313 pr_info("zcache: eph_zbytes=%llu\n",
314 zcache_eph_zbytes);
315 pr_info("zcache: eph_zbytes_max=%llu\n",
316 zcache_eph_zbytes_max);
317 pr_info("zcache: pers_zbytes=%llu\n",
318 zcache_pers_zbytes);
319 pr_info("zcache: pers_zbytes_max=%llu\n",
320 zcache_pers_zbytes_max);
321 pr_info("zcache: outstanding_writeback_pages=%zd\n",
322 zcache_outstanding_writeback_pages);
323 pr_info("zcache: writtenback_pages=%zd\n", zcache_writtenback_pages);
324 }
325 #endif
326
327 /*
328 * zcache core code starts here
329 */
330
331 static struct zcache_client zcache_host;
332 static struct zcache_client zcache_clients[MAX_CLIENTS];
333
334 static inline bool is_local_client(struct zcache_client *cli)
335 {
336 return cli == &zcache_host;
337 }
338
339 static struct zcache_client *zcache_get_client_by_id(uint16_t cli_id)
340 {
341 struct zcache_client *cli = &zcache_host;
342
343 if (cli_id != LOCAL_CLIENT) {
344 if (cli_id >= MAX_CLIENTS)
345 goto out;
346 cli = &zcache_clients[cli_id];
347 }
348 out:
349 return cli;
350 }
351
352 /*
353 * Tmem operations assume the poolid implies the invoking client.
354 * Zcache only has one client (the kernel itself): LOCAL_CLIENT.
355 * RAMster has each client numbered by cluster node, and a KVM version
356 * of zcache would have one client per guest and each client might
357 * have a poolid==N.
358 */
359 struct tmem_pool *zcache_get_pool_by_id(uint16_t cli_id, uint16_t poolid)
360 {
361 struct tmem_pool *pool = NULL;
362 struct zcache_client *cli = NULL;
363
364 cli = zcache_get_client_by_id(cli_id);
365 if (cli == NULL)
366 goto out;
367 if (!is_local_client(cli))
368 atomic_inc(&cli->refcount);
369 if (poolid < MAX_POOLS_PER_CLIENT) {
370 pool = cli->tmem_pools[poolid];
371 if (pool != NULL)
372 atomic_inc(&pool->refcount);
373 }
374 out:
375 return pool;
376 }
377
378 void zcache_put_pool(struct tmem_pool *pool)
379 {
380 struct zcache_client *cli = NULL;
381
382 if (pool == NULL)
383 BUG();
384 cli = pool->client;
385 atomic_dec(&pool->refcount);
386 if (!is_local_client(cli))
387 atomic_dec(&cli->refcount);
388 }
389
390 int zcache_new_client(uint16_t cli_id)
391 {
392 struct zcache_client *cli;
393 int ret = -1;
394
395 cli = zcache_get_client_by_id(cli_id);
396 if (cli == NULL)
397 goto out;
398 if (cli->allocated)
399 goto out;
400 cli->allocated = 1;
401 ret = 0;
402 out:
403 return ret;
404 }
405
406 /*
407 * zcache implementation for tmem host ops
408 */
409
410 static struct tmem_objnode *zcache_objnode_alloc(struct tmem_pool *pool)
411 {
412 struct tmem_objnode *objnode = NULL;
413 struct zcache_preload *kp;
414 int i;
415
416 kp = &__get_cpu_var(zcache_preloads);
417 for (i = 0; i < ARRAY_SIZE(kp->objnodes); i++) {
418 objnode = kp->objnodes[i];
419 if (objnode != NULL) {
420 kp->objnodes[i] = NULL;
421 break;
422 }
423 }
424 BUG_ON(objnode == NULL);
425 zcache_objnode_count = atomic_inc_return(&zcache_objnode_atomic);
426 if (zcache_objnode_count > zcache_objnode_count_max)
427 zcache_objnode_count_max = zcache_objnode_count;
428 return objnode;
429 }
430
431 static void zcache_objnode_free(struct tmem_objnode *objnode,
432 struct tmem_pool *pool)
433 {
434 zcache_objnode_count =
435 atomic_dec_return(&zcache_objnode_atomic);
436 BUG_ON(zcache_objnode_count < 0);
437 kmem_cache_free(zcache_objnode_cache, objnode);
438 }
439
440 static struct tmem_obj *zcache_obj_alloc(struct tmem_pool *pool)
441 {
442 struct tmem_obj *obj = NULL;
443 struct zcache_preload *kp;
444
445 kp = &__get_cpu_var(zcache_preloads);
446 obj = kp->obj;
447 BUG_ON(obj == NULL);
448 kp->obj = NULL;
449 zcache_obj_count = atomic_inc_return(&zcache_obj_atomic);
450 if (zcache_obj_count > zcache_obj_count_max)
451 zcache_obj_count_max = zcache_obj_count;
452 return obj;
453 }
454
455 static void zcache_obj_free(struct tmem_obj *obj, struct tmem_pool *pool)
456 {
457 zcache_obj_count =
458 atomic_dec_return(&zcache_obj_atomic);
459 BUG_ON(zcache_obj_count < 0);
460 kmem_cache_free(zcache_obj_cache, obj);
461 }
462
463 static struct tmem_hostops zcache_hostops = {
464 .obj_alloc = zcache_obj_alloc,
465 .obj_free = zcache_obj_free,
466 .objnode_alloc = zcache_objnode_alloc,
467 .objnode_free = zcache_objnode_free,
468 };
469
470 static struct page *zcache_alloc_page(void)
471 {
472 struct page *page = alloc_page(ZCACHE_GFP_MASK);
473
474 if (page != NULL)
475 zcache_pageframes_alloced =
476 atomic_inc_return(&zcache_pageframes_alloced_atomic);
477 return page;
478 }
479
480 static void zcache_free_page(struct page *page)
481 {
482 long curr_pageframes;
483 static long max_pageframes, min_pageframes;
484
485 if (page == NULL)
486 BUG();
487 __free_page(page);
488 zcache_pageframes_freed =
489 atomic_inc_return(&zcache_pageframes_freed_atomic);
490 curr_pageframes = zcache_pageframes_alloced -
491 atomic_read(&zcache_pageframes_freed_atomic) -
492 atomic_read(&zcache_eph_pageframes_atomic) -
493 atomic_read(&zcache_pers_pageframes_atomic);
494 if (curr_pageframes > max_pageframes)
495 max_pageframes = curr_pageframes;
496 if (curr_pageframes < min_pageframes)
497 min_pageframes = curr_pageframes;
498 #ifdef ZCACHE_DEBUG
499 if (curr_pageframes > 2L || curr_pageframes < -2L) {
500 /* pr_info here */
501 }
502 #endif
503 }
504
505 /*
506 * zcache implementations for PAM page descriptor ops
507 */
508
509 /* forward reference */
510 static void zcache_compress(struct page *from,
511 void **out_va, unsigned *out_len);
512
513 static struct page *zcache_evict_eph_pageframe(void);
514
515 static void *zcache_pampd_eph_create(char *data, size_t size, bool raw,
516 struct tmem_handle *th)
517 {
518 void *pampd = NULL, *cdata = data;
519 unsigned clen = size;
520 struct page *page = (struct page *)(data), *newpage;
521
522 if (!raw) {
523 zcache_compress(page, &cdata, &clen);
524 if (clen > zbud_max_buddy_size()) {
525 zcache_compress_poor++;
526 goto out;
527 }
528 } else {
529 BUG_ON(clen > zbud_max_buddy_size());
530 }
531
532 /* look for space via an existing match first */
533 pampd = (void *)zbud_match_prep(th, true, cdata, clen);
534 if (pampd != NULL)
535 goto got_pampd;
536
537 /* no match, now we need to find (or free up) a full page */
538 newpage = zcache_alloc_page();
539 if (newpage != NULL)
540 goto create_in_new_page;
541
542 zcache_failed_getfreepages++;
543 /* can't allocate a page, evict an ephemeral page via LRU */
544 newpage = zcache_evict_eph_pageframe();
545 if (newpage == NULL) {
546 zcache_eph_ate_tail_failed++;
547 goto out;
548 }
549 zcache_eph_ate_tail++;
550
551 create_in_new_page:
552 pampd = (void *)zbud_create_prep(th, true, cdata, clen, newpage);
553 BUG_ON(pampd == NULL);
554 zcache_eph_pageframes =
555 atomic_inc_return(&zcache_eph_pageframes_atomic);
556 if (zcache_eph_pageframes > zcache_eph_pageframes_max)
557 zcache_eph_pageframes_max = zcache_eph_pageframes;
558
559 got_pampd:
560 zcache_eph_zbytes =
561 atomic_long_add_return(clen, &zcache_eph_zbytes_atomic);
562 if (zcache_eph_zbytes > zcache_eph_zbytes_max)
563 zcache_eph_zbytes_max = zcache_eph_zbytes;
564 zcache_eph_zpages = atomic_inc_return(&zcache_eph_zpages_atomic);
565 if (zcache_eph_zpages > zcache_eph_zpages_max)
566 zcache_eph_zpages_max = zcache_eph_zpages;
567 if (ramster_enabled && raw)
568 ramster_count_foreign_pages(true, 1);
569 out:
570 return pampd;
571 }
572
573 static void *zcache_pampd_pers_create(char *data, size_t size, bool raw,
574 struct tmem_handle *th)
575 {
576 void *pampd = NULL, *cdata = data;
577 unsigned clen = size;
578 struct page *page = (struct page *)(data), *newpage;
579 unsigned long zbud_mean_zsize;
580 unsigned long curr_pers_zpages, total_zsize;
581
582 if (data == NULL) {
583 BUG_ON(!ramster_enabled);
584 goto create_pampd;
585 }
586 curr_pers_zpages = zcache_pers_zpages;
587 /* FIXME CONFIG_RAMSTER... subtract atomic remote_pers_pages here? */
588 if (!raw)
589 zcache_compress(page, &cdata, &clen);
590 /* reject if compression is too poor */
591 if (clen > zbud_max_zsize) {
592 zcache_compress_poor++;
593 goto out;
594 }
595 /* reject if mean compression is too poor */
596 if ((clen > zbud_max_mean_zsize) && (curr_pers_zpages > 0)) {
597 total_zsize = zcache_pers_zbytes;
598 if ((long)total_zsize < 0)
599 total_zsize = 0;
600 zbud_mean_zsize = div_u64(total_zsize,
601 curr_pers_zpages);
602 if (zbud_mean_zsize > zbud_max_mean_zsize) {
603 zcache_mean_compress_poor++;
604 goto out;
605 }
606 }
607
608 create_pampd:
609 /* look for space via an existing match first */
610 pampd = (void *)zbud_match_prep(th, false, cdata, clen);
611 if (pampd != NULL)
612 goto got_pampd;
613
614 /* no match, now we need to find (or free up) a full page */
615 newpage = zcache_alloc_page();
616 if (newpage != NULL)
617 goto create_in_new_page;
618 /*
619 * FIXME do the following only if eph is oversized?
620 * if (zcache_eph_pageframes >
621 * (global_page_state(NR_LRU_BASE + LRU_ACTIVE_FILE) +
622 * global_page_state(NR_LRU_BASE + LRU_INACTIVE_FILE)))
623 */
624 zcache_failed_getfreepages++;
625 /* can't allocate a page, evict an ephemeral page via LRU */
626 newpage = zcache_evict_eph_pageframe();
627 if (newpage == NULL) {
628 zcache_pers_ate_eph_failed++;
629 goto out;
630 }
631 zcache_pers_ate_eph++;
632
633 create_in_new_page:
634 pampd = (void *)zbud_create_prep(th, false, cdata, clen, newpage);
635 BUG_ON(pampd == NULL);
636 zcache_pers_pageframes =
637 atomic_inc_return(&zcache_pers_pageframes_atomic);
638 if (zcache_pers_pageframes > zcache_pers_pageframes_max)
639 zcache_pers_pageframes_max = zcache_pers_pageframes;
640
641 got_pampd:
642 zcache_pers_zpages = atomic_inc_return(&zcache_pers_zpages_atomic);
643 if (zcache_pers_zpages > zcache_pers_zpages_max)
644 zcache_pers_zpages_max = zcache_pers_zpages;
645 zcache_pers_zbytes =
646 atomic_long_add_return(clen, &zcache_pers_zbytes_atomic);
647 if (zcache_pers_zbytes > zcache_pers_zbytes_max)
648 zcache_pers_zbytes_max = zcache_pers_zbytes;
649 if (ramster_enabled && raw)
650 ramster_count_foreign_pages(false, 1);
651 out:
652 return pampd;
653 }
654
655 /*
656 * This is called directly from zcache_put_page to pre-allocate space
657 * to store a zpage.
658 */
659 void *zcache_pampd_create(char *data, unsigned int size, bool raw,
660 int eph, struct tmem_handle *th)
661 {
662 void *pampd = NULL;
663 struct zcache_preload *kp;
664 struct tmem_objnode *objnode;
665 struct tmem_obj *obj;
666 int i;
667
668 BUG_ON(!irqs_disabled());
669 /* pre-allocate per-cpu metadata */
670 BUG_ON(zcache_objnode_cache == NULL);
671 BUG_ON(zcache_obj_cache == NULL);
672 kp = &__get_cpu_var(zcache_preloads);
673 for (i = 0; i < ARRAY_SIZE(kp->objnodes); i++) {
674 objnode = kp->objnodes[i];
675 if (objnode == NULL) {
676 objnode = kmem_cache_alloc(zcache_objnode_cache,
677 ZCACHE_GFP_MASK);
678 if (unlikely(objnode == NULL)) {
679 zcache_failed_alloc++;
680 goto out;
681 }
682 kp->objnodes[i] = objnode;
683 }
684 }
685 if (kp->obj == NULL) {
686 obj = kmem_cache_alloc(zcache_obj_cache, ZCACHE_GFP_MASK);
687 kp->obj = obj;
688 }
689 if (unlikely(kp->obj == NULL)) {
690 zcache_failed_alloc++;
691 goto out;
692 }
693 /*
694 * ok, have all the metadata pre-allocated, now do the data
695 * but since how we allocate the data is dependent on ephemeral
696 * or persistent, we split the call here to different sub-functions
697 */
698 if (eph)
699 pampd = zcache_pampd_eph_create(data, size, raw, th);
700 else
701 pampd = zcache_pampd_pers_create(data, size, raw, th);
702 out:
703 return pampd;
704 }
705
706 /*
707 * This is a pamops called via tmem_put and is necessary to "finish"
708 * a pampd creation.
709 */
710 void zcache_pampd_create_finish(void *pampd, bool eph)
711 {
712 zbud_create_finish((struct zbudref *)pampd, eph);
713 }
714
715 /*
716 * This is passed as a function parameter to zbud_decompress so that
717 * zbud need not be familiar with the details of crypto. It assumes that
718 * the bytes from_va and to_va through from_va+size-1 and to_va+size-1 are
719 * kmapped. It must be successful, else there is a logic bug somewhere.
720 */
721 static void zcache_decompress(char *from_va, unsigned int size, char *to_va)
722 {
723 int ret;
724 unsigned int outlen = PAGE_SIZE;
725
726 ret = zcache_comp_op(ZCACHE_COMPOP_DECOMPRESS, from_va, size,
727 to_va, &outlen);
728 BUG_ON(ret);
729 BUG_ON(outlen != PAGE_SIZE);
730 }
731
732 /*
733 * Decompress from the kernel va to a pageframe
734 */
735 void zcache_decompress_to_page(char *from_va, unsigned int size,
736 struct page *to_page)
737 {
738 char *to_va = kmap_atomic(to_page);
739 zcache_decompress(from_va, size, to_va);
740 kunmap_atomic(to_va);
741 }
742
743 /*
744 * fill the pageframe corresponding to the struct page with the data
745 * from the passed pampd
746 */
747 static int zcache_pampd_get_data(char *data, size_t *sizep, bool raw,
748 void *pampd, struct tmem_pool *pool,
749 struct tmem_oid *oid, uint32_t index)
750 {
751 int ret;
752 bool eph = !is_persistent(pool);
753
754 BUG_ON(preemptible());
755 BUG_ON(eph); /* fix later if shared pools get implemented */
756 BUG_ON(pampd_is_remote(pampd));
757 if (raw)
758 ret = zbud_copy_from_zbud(data, (struct zbudref *)pampd,
759 sizep, eph);
760 else {
761 ret = zbud_decompress((struct page *)(data),
762 (struct zbudref *)pampd, false,
763 zcache_decompress);
764 *sizep = PAGE_SIZE;
765 }
766 return ret;
767 }
768
769 /*
770 * fill the pageframe corresponding to the struct page with the data
771 * from the passed pampd
772 */
773 static int zcache_pampd_get_data_and_free(char *data, size_t *sizep, bool raw,
774 void *pampd, struct tmem_pool *pool,
775 struct tmem_oid *oid, uint32_t index)
776 {
777 int ret;
778 bool eph = !is_persistent(pool);
779 struct page *page = NULL;
780 unsigned int zsize, zpages;
781
782 BUG_ON(preemptible());
783 BUG_ON(pampd_is_remote(pampd));
784 if (raw)
785 ret = zbud_copy_from_zbud(data, (struct zbudref *)pampd,
786 sizep, eph);
787 else {
788 ret = zbud_decompress((struct page *)(data),
789 (struct zbudref *)pampd, eph,
790 zcache_decompress);
791 *sizep = PAGE_SIZE;
792 }
793 page = zbud_free_and_delist((struct zbudref *)pampd, eph,
794 &zsize, &zpages);
795 if (eph) {
796 if (page)
797 zcache_eph_pageframes =
798 atomic_dec_return(&zcache_eph_pageframes_atomic);
799 zcache_eph_zpages =
800 atomic_sub_return(zpages, &zcache_eph_zpages_atomic);
801 zcache_eph_zbytes =
802 atomic_long_sub_return(zsize, &zcache_eph_zbytes_atomic);
803 } else {
804 if (page)
805 zcache_pers_pageframes =
806 atomic_dec_return(&zcache_pers_pageframes_atomic);
807 zcache_pers_zpages =
808 atomic_sub_return(zpages, &zcache_pers_zpages_atomic);
809 zcache_pers_zbytes =
810 atomic_long_sub_return(zsize, &zcache_pers_zbytes_atomic);
811 }
812 if (!is_local_client(pool->client))
813 ramster_count_foreign_pages(eph, -1);
814 if (page)
815 zcache_free_page(page);
816 return ret;
817 }
818
819 /*
820 * free the pampd and remove it from any zcache lists
821 * pampd must no longer be pointed to from any tmem data structures!
822 */
823 static void zcache_pampd_free(void *pampd, struct tmem_pool *pool,
824 struct tmem_oid *oid, uint32_t index, bool acct)
825 {
826 struct page *page = NULL;
827 unsigned int zsize, zpages;
828
829 BUG_ON(preemptible());
830 if (pampd_is_remote(pampd)) {
831 BUG_ON(!ramster_enabled);
832 pampd = ramster_pampd_free(pampd, pool, oid, index, acct);
833 if (pampd == NULL)
834 return;
835 }
836 if (is_ephemeral(pool)) {
837 page = zbud_free_and_delist((struct zbudref *)pampd,
838 true, &zsize, &zpages);
839 if (page)
840 zcache_eph_pageframes =
841 atomic_dec_return(&zcache_eph_pageframes_atomic);
842 zcache_eph_zpages =
843 atomic_sub_return(zpages, &zcache_eph_zpages_atomic);
844 zcache_eph_zbytes =
845 atomic_long_sub_return(zsize, &zcache_eph_zbytes_atomic);
846 /* FIXME CONFIG_RAMSTER... check acct parameter? */
847 } else {
848 page = zbud_free_and_delist((struct zbudref *)pampd,
849 false, &zsize, &zpages);
850 if (page)
851 zcache_pers_pageframes =
852 atomic_dec_return(&zcache_pers_pageframes_atomic);
853 zcache_pers_zpages =
854 atomic_sub_return(zpages, &zcache_pers_zpages_atomic);
855 zcache_pers_zbytes =
856 atomic_long_sub_return(zsize, &zcache_pers_zbytes_atomic);
857 }
858 if (!is_local_client(pool->client))
859 ramster_count_foreign_pages(is_ephemeral(pool), -1);
860 if (page)
861 zcache_free_page(page);
862 }
863
864 static struct tmem_pamops zcache_pamops = {
865 .create_finish = zcache_pampd_create_finish,
866 .get_data = zcache_pampd_get_data,
867 .get_data_and_free = zcache_pampd_get_data_and_free,
868 .free = zcache_pampd_free,
869 };
870
871 /*
872 * zcache compression/decompression and related per-cpu stuff
873 */
874
875 static DEFINE_PER_CPU(unsigned char *, zcache_dstmem);
876 #define ZCACHE_DSTMEM_ORDER 1
877
878 static void zcache_compress(struct page *from, void **out_va, unsigned *out_len)
879 {
880 int ret;
881 unsigned char *dmem = __get_cpu_var(zcache_dstmem);
882 char *from_va;
883
884 BUG_ON(!irqs_disabled());
885 /* no buffer or no compressor so can't compress */
886 BUG_ON(dmem == NULL);
887 *out_len = PAGE_SIZE << ZCACHE_DSTMEM_ORDER;
888 from_va = kmap_atomic(from);
889 mb();
890 ret = zcache_comp_op(ZCACHE_COMPOP_COMPRESS, from_va, PAGE_SIZE, dmem,
891 out_len);
892 BUG_ON(ret);
893 *out_va = dmem;
894 kunmap_atomic(from_va);
895 }
896
897 static int zcache_comp_cpu_up(int cpu)
898 {
899 struct crypto_comp *tfm;
900
901 tfm = crypto_alloc_comp(zcache_comp_name, 0, 0);
902 if (IS_ERR(tfm))
903 return NOTIFY_BAD;
904 *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = tfm;
905 return NOTIFY_OK;
906 }
907
908 static void zcache_comp_cpu_down(int cpu)
909 {
910 struct crypto_comp *tfm;
911
912 tfm = *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu);
913 crypto_free_comp(tfm);
914 *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = NULL;
915 }
916
917 static int zcache_cpu_notifier(struct notifier_block *nb,
918 unsigned long action, void *pcpu)
919 {
920 int ret, i, cpu = (long)pcpu;
921 struct zcache_preload *kp;
922
923 switch (action) {
924 case CPU_UP_PREPARE:
925 ret = zcache_comp_cpu_up(cpu);
926 if (ret != NOTIFY_OK) {
927 pr_err("%s: can't allocate compressor xform\n",
928 namestr);
929 return ret;
930 }
931 per_cpu(zcache_dstmem, cpu) = (void *)__get_free_pages(
932 GFP_KERNEL | __GFP_REPEAT, ZCACHE_DSTMEM_ORDER);
933 if (ramster_enabled)
934 ramster_cpu_up(cpu);
935 break;
936 case CPU_DEAD:
937 case CPU_UP_CANCELED:
938 zcache_comp_cpu_down(cpu);
939 free_pages((unsigned long)per_cpu(zcache_dstmem, cpu),
940 ZCACHE_DSTMEM_ORDER);
941 per_cpu(zcache_dstmem, cpu) = NULL;
942 kp = &per_cpu(zcache_preloads, cpu);
943 for (i = 0; i < ARRAY_SIZE(kp->objnodes); i++) {
944 if (kp->objnodes[i])
945 kmem_cache_free(zcache_objnode_cache,
946 kp->objnodes[i]);
947 }
948 if (kp->obj) {
949 kmem_cache_free(zcache_obj_cache, kp->obj);
950 kp->obj = NULL;
951 }
952 if (ramster_enabled)
953 ramster_cpu_down(cpu);
954 break;
955 default:
956 break;
957 }
958 return NOTIFY_OK;
959 }
960
961 static struct notifier_block zcache_cpu_notifier_block = {
962 .notifier_call = zcache_cpu_notifier
963 };
964
965 /*
966 * The following code interacts with the zbud eviction and zbud
967 * zombify code to access LRU pages
968 */
969
970 static struct page *zcache_evict_eph_pageframe(void)
971 {
972 struct page *page;
973 unsigned int zsize = 0, zpages = 0;
974
975 page = zbud_evict_pageframe_lru(&zsize, &zpages);
976 if (page == NULL)
977 goto out;
978 zcache_eph_zbytes = atomic_long_sub_return(zsize,
979 &zcache_eph_zbytes_atomic);
980 zcache_eph_zpages = atomic_sub_return(zpages,
981 &zcache_eph_zpages_atomic);
982 zcache_evicted_eph_zpages += zpages;
983 zcache_eph_pageframes =
984 atomic_dec_return(&zcache_eph_pageframes_atomic);
985 zcache_evicted_eph_pageframes++;
986 out:
987 return page;
988 }
989
990 #ifdef CONFIG_ZCACHE_WRITEBACK
991
992 static atomic_t zcache_outstanding_writeback_pages_atomic = ATOMIC_INIT(0);
993
994 static void unswiz(struct tmem_oid oid, u32 index,
995 unsigned *type, pgoff_t *offset);
996
997 /*
998 * Choose an LRU persistent pageframe and attempt to write it back to
999 * the backing swap disk by calling frontswap_writeback on both zpages.
1000 *
1001 * This is work-in-progress.
1002 */
1003
1004 static void zcache_end_swap_write(struct bio *bio, int err)
1005 {
1006 end_swap_bio_write(bio, err);
1007 zcache_outstanding_writeback_pages =
1008 atomic_dec_return(&zcache_outstanding_writeback_pages_atomic);
1009 zcache_writtenback_pages++;
1010 }
1011
1012 /*
1013 * zcache_get_swap_cache_page
1014 *
1015 * This is an adaption of read_swap_cache_async()
1016 *
1017 * If success, page is returned in retpage
1018 * Returns 0 if page was already in the swap cache, page is not locked
1019 * Returns 1 if the new page needs to be populated, page is locked
1020 */
1021 static int zcache_get_swap_cache_page(int type, pgoff_t offset,
1022 struct page *new_page)
1023 {
1024 struct page *found_page;
1025 swp_entry_t entry = swp_entry(type, offset);
1026 int err;
1027
1028 BUG_ON(new_page == NULL);
1029 do {
1030 /*
1031 * First check the swap cache. Since this is normally
1032 * called after lookup_swap_cache() failed, re-calling
1033 * that would confuse statistics.
1034 */
1035 found_page = find_get_page(&swapper_space, entry.val);
1036 if (found_page)
1037 return 0;
1038
1039 /*
1040 * call radix_tree_preload() while we can wait.
1041 */
1042 err = radix_tree_preload(GFP_KERNEL);
1043 if (err)
1044 break;
1045
1046 /*
1047 * Swap entry may have been freed since our caller observed it.
1048 */
1049 err = swapcache_prepare(entry);
1050 if (err == -EEXIST) { /* seems racy */
1051 radix_tree_preload_end();
1052 continue;
1053 }
1054 if (err) { /* swp entry is obsolete ? */
1055 radix_tree_preload_end();
1056 break;
1057 }
1058
1059 /* May fail (-ENOMEM) if radix-tree node allocation failed. */
1060 __set_page_locked(new_page);
1061 SetPageSwapBacked(new_page);
1062 err = __add_to_swap_cache(new_page, entry);
1063 if (likely(!err)) {
1064 radix_tree_preload_end();
1065 lru_cache_add_anon(new_page);
1066 return 1;
1067 }
1068 radix_tree_preload_end();
1069 ClearPageSwapBacked(new_page);
1070 __clear_page_locked(new_page);
1071 /*
1072 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
1073 * clear SWAP_HAS_CACHE flag.
1074 */
1075 swapcache_free(entry, NULL);
1076 /* FIXME: is it possible to get here without err==-ENOMEM?
1077 * If not, we can dispense with the do loop, use goto retry */
1078 } while (err != -ENOMEM);
1079
1080 return -ENOMEM;
1081 }
1082
1083 /*
1084 * Given a frontswap zpage in zcache (identified by type/offset) and
1085 * an empty page, put the page into the swap cache, use frontswap
1086 * to get the page from zcache into the empty page, then give it
1087 * to the swap subsystem to send to disk (carefully avoiding the
1088 * possibility that frontswap might snatch it back).
1089 * Returns < 0 if error, 0 if successful, and 1 if successful but
1090 * the newpage passed in not needed and should be freed.
1091 */
1092 static int zcache_frontswap_writeback_zpage(int type, pgoff_t offset,
1093 struct page *newpage)
1094 {
1095 struct page *page = newpage;
1096 int ret;
1097 struct writeback_control wbc = {
1098 .sync_mode = WB_SYNC_NONE,
1099 };
1100
1101 ret = zcache_get_swap_cache_page(type, offset, page);
1102 if (ret < 0)
1103 return ret;
1104 else if (ret == 0) {
1105 /* more uptodate page is already in swapcache */
1106 __frontswap_invalidate_page(type, offset);
1107 return 1;
1108 }
1109
1110 BUG_ON(!frontswap_has_exclusive_gets); /* load must also invalidate */
1111 /* FIXME: how is it possible to get here when page is unlocked? */
1112 __frontswap_load(page);
1113 SetPageUptodate(page); /* above does SetPageDirty, is that enough? */
1114
1115 /* start writeback */
1116 SetPageReclaim(page);
1117 /*
1118 * Return value is ignored here because it doesn't change anything
1119 * for us. Page is returned unlocked.
1120 */
1121 (void)__swap_writepage(page, &wbc, zcache_end_swap_write);
1122 page_cache_release(page);
1123 zcache_outstanding_writeback_pages =
1124 atomic_inc_return(&zcache_outstanding_writeback_pages_atomic);
1125
1126 return 0;
1127 }
1128
1129 /*
1130 * The following is still a magic number... we want to allow forward progress
1131 * for writeback because it clears out needed RAM when under pressure, but
1132 * we don't want to allow writeback to absorb and queue too many GFP_KERNEL
1133 * pages if the swap device is very slow.
1134 */
1135 #define ZCACHE_MAX_OUTSTANDING_WRITEBACK_PAGES 6400
1136
1137 /*
1138 * Try to allocate two free pages, first using a non-aggressive alloc,
1139 * then by evicting zcache ephemeral (clean pagecache) pages, and last
1140 * by aggressive GFP_KERNEL alloc. We allow zbud to choose a pageframe
1141 * consisting of 1-2 zbuds/zpages, then call the writeback_zpage helper
1142 * function above for each.
1143 */
1144 static int zcache_frontswap_writeback(void)
1145 {
1146 struct tmem_handle th[2];
1147 int ret = 0;
1148 int nzbuds, writeback_ret;
1149 unsigned type;
1150 struct page *znewpage1 = NULL, *znewpage2 = NULL;
1151 struct page *evictpage1 = NULL, *evictpage2 = NULL;
1152 struct page *newpage1 = NULL, *newpage2 = NULL;
1153 struct page *page1 = NULL, *page2 = NULL;
1154 pgoff_t offset;
1155
1156 znewpage1 = alloc_page(ZCACHE_GFP_MASK);
1157 znewpage2 = alloc_page(ZCACHE_GFP_MASK);
1158 if (znewpage1 == NULL)
1159 evictpage1 = zcache_evict_eph_pageframe();
1160 if (znewpage2 == NULL)
1161 evictpage2 = zcache_evict_eph_pageframe();
1162
1163 if ((evictpage1 == NULL || evictpage2 == NULL) &&
1164 atomic_read(&zcache_outstanding_writeback_pages_atomic) >
1165 ZCACHE_MAX_OUTSTANDING_WRITEBACK_PAGES) {
1166 goto free_and_out;
1167 }
1168 if (znewpage1 == NULL && evictpage1 == NULL)
1169 newpage1 = alloc_page(GFP_KERNEL);
1170 if (znewpage2 == NULL && evictpage2 == NULL)
1171 newpage2 = alloc_page(GFP_KERNEL);
1172 if (newpage1 == NULL || newpage2 == NULL)
1173 goto free_and_out;
1174
1175 /* ok, we have two pageframes pre-allocated, get a pair of zbuds */
1176 nzbuds = zbud_make_zombie_lru(&th[0], NULL, NULL, false);
1177 if (nzbuds == 0) {
1178 ret = -ENOENT;
1179 goto free_and_out;
1180 }
1181
1182 /* process the first zbud */
1183 unswiz(th[0].oid, th[0].index, &type, &offset);
1184 page1 = (znewpage1 != NULL) ? znewpage1 :
1185 ((newpage1 != NULL) ? newpage1 : evictpage1);
1186 writeback_ret = zcache_frontswap_writeback_zpage(type, offset, page1);
1187 if (writeback_ret < 0) {
1188 ret = -ENOMEM;
1189 goto free_and_out;
1190 }
1191 if (evictpage1 != NULL)
1192 zcache_pageframes_freed =
1193 atomic_inc_return(&zcache_pageframes_freed_atomic);
1194 if (writeback_ret == 0) {
1195 /* zcache_get_swap_cache_page will free, don't double free */
1196 znewpage1 = NULL;
1197 newpage1 = NULL;
1198 evictpage1 = NULL;
1199 }
1200 if (nzbuds < 2)
1201 goto free_and_out;
1202
1203 /* if there is a second zbud, process it */
1204 unswiz(th[1].oid, th[1].index, &type, &offset);
1205 page2 = (znewpage2 != NULL) ? znewpage2 :
1206 ((newpage2 != NULL) ? newpage2 : evictpage2);
1207 writeback_ret = zcache_frontswap_writeback_zpage(type, offset, page2);
1208 if (writeback_ret < 0) {
1209 ret = -ENOMEM;
1210 goto free_and_out;
1211 }
1212 if (evictpage2 != NULL)
1213 zcache_pageframes_freed =
1214 atomic_inc_return(&zcache_pageframes_freed_atomic);
1215 if (writeback_ret == 0) {
1216 znewpage2 = NULL;
1217 newpage2 = NULL;
1218 evictpage2 = NULL;
1219 }
1220
1221 free_and_out:
1222 if (znewpage1 != NULL)
1223 page_cache_release(znewpage1);
1224 if (znewpage2 != NULL)
1225 page_cache_release(znewpage2);
1226 if (newpage1 != NULL)
1227 page_cache_release(newpage1);
1228 if (newpage2 != NULL)
1229 page_cache_release(newpage2);
1230 if (evictpage1 != NULL)
1231 zcache_free_page(evictpage1);
1232 if (evictpage2 != NULL)
1233 zcache_free_page(evictpage2);
1234 return ret;
1235 }
1236 #endif /* CONFIG_ZCACHE_WRITEBACK */
1237
1238 /*
1239 * When zcache is disabled ("frozen"), pools can be created and destroyed,
1240 * but all puts (and thus all other operations that require memory allocation)
1241 * must fail. If zcache is unfrozen, accepts puts, then frozen again,
1242 * data consistency requires all puts while frozen to be converted into
1243 * flushes.
1244 */
1245 static bool zcache_freeze;
1246
1247 /*
1248 * This zcache shrinker interface reduces the number of ephemeral pageframes
1249 * used by zcache to approximately the same as the total number of LRU_FILE
1250 * pageframes in use, and now also reduces the number of persistent pageframes
1251 * used by zcache to approximately the same as the total number of LRU_ANON
1252 * pageframes in use. FIXME POLICY: Probably the writeback should only occur
1253 * if the eviction doesn't free enough pages.
1254 */
1255 static int shrink_zcache_memory(struct shrinker *shrink,
1256 struct shrink_control *sc)
1257 {
1258 static bool in_progress;
1259 int ret = -1;
1260 int nr = sc->nr_to_scan;
1261 int nr_evict = 0;
1262 int nr_writeback = 0;
1263 struct page *page;
1264 int file_pageframes_inuse, anon_pageframes_inuse;
1265
1266 if (nr <= 0)
1267 goto skip_evict;
1268
1269 /* don't allow more than one eviction thread at a time */
1270 if (in_progress)
1271 goto skip_evict;
1272
1273 in_progress = true;
1274
1275 /* we are going to ignore nr, and target a different value */
1276 zcache_last_active_file_pageframes =
1277 global_page_state(NR_LRU_BASE + LRU_ACTIVE_FILE);
1278 zcache_last_inactive_file_pageframes =
1279 global_page_state(NR_LRU_BASE + LRU_INACTIVE_FILE);
1280 file_pageframes_inuse = zcache_last_active_file_pageframes +
1281 zcache_last_inactive_file_pageframes;
1282 if (zcache_eph_pageframes > file_pageframes_inuse)
1283 nr_evict = zcache_eph_pageframes - file_pageframes_inuse;
1284 else
1285 nr_evict = 0;
1286 while (nr_evict-- > 0) {
1287 page = zcache_evict_eph_pageframe();
1288 if (page == NULL)
1289 break;
1290 zcache_free_page(page);
1291 }
1292
1293 zcache_last_active_anon_pageframes =
1294 global_page_state(NR_LRU_BASE + LRU_ACTIVE_ANON);
1295 zcache_last_inactive_anon_pageframes =
1296 global_page_state(NR_LRU_BASE + LRU_INACTIVE_ANON);
1297 anon_pageframes_inuse = zcache_last_active_anon_pageframes +
1298 zcache_last_inactive_anon_pageframes;
1299 if (zcache_pers_pageframes > anon_pageframes_inuse)
1300 nr_writeback = zcache_pers_pageframes - anon_pageframes_inuse;
1301 else
1302 nr_writeback = 0;
1303 while (nr_writeback-- > 0) {
1304 #ifdef CONFIG_ZCACHE_WRITEBACK
1305 int writeback_ret;
1306 writeback_ret = zcache_frontswap_writeback();
1307 if (writeback_ret == -ENOMEM)
1308 #endif
1309 break;
1310 }
1311 in_progress = false;
1312
1313 skip_evict:
1314 /* resample: has changed, but maybe not all the way yet */
1315 zcache_last_active_file_pageframes =
1316 global_page_state(NR_LRU_BASE + LRU_ACTIVE_FILE);
1317 zcache_last_inactive_file_pageframes =
1318 global_page_state(NR_LRU_BASE + LRU_INACTIVE_FILE);
1319 ret = zcache_eph_pageframes - zcache_last_active_file_pageframes +
1320 zcache_last_inactive_file_pageframes;
1321 if (ret < 0)
1322 ret = 0;
1323 return ret;
1324 }
1325
1326 static struct shrinker zcache_shrinker = {
1327 .shrink = shrink_zcache_memory,
1328 .seeks = DEFAULT_SEEKS,
1329 };
1330
1331 /*
1332 * zcache shims between cleancache/frontswap ops and tmem
1333 */
1334
1335 /* FIXME rename these core routines to zcache_tmemput etc? */
1336 int zcache_put_page(int cli_id, int pool_id, struct tmem_oid *oidp,
1337 uint32_t index, void *page,
1338 unsigned int size, bool raw, int ephemeral)
1339 {
1340 struct tmem_pool *pool;
1341 struct tmem_handle th;
1342 int ret = -1;
1343 void *pampd = NULL;
1344
1345 BUG_ON(!irqs_disabled());
1346 pool = zcache_get_pool_by_id(cli_id, pool_id);
1347 if (unlikely(pool == NULL))
1348 goto out;
1349 if (!zcache_freeze) {
1350 ret = 0;
1351 th.client_id = cli_id;
1352 th.pool_id = pool_id;
1353 th.oid = *oidp;
1354 th.index = index;
1355 pampd = zcache_pampd_create((char *)page, size, raw,
1356 ephemeral, &th);
1357 if (pampd == NULL) {
1358 ret = -ENOMEM;
1359 if (ephemeral)
1360 zcache_failed_eph_puts++;
1361 else
1362 zcache_failed_pers_puts++;
1363 } else {
1364 if (ramster_enabled)
1365 ramster_do_preload_flnode(pool);
1366 ret = tmem_put(pool, oidp, index, 0, pampd);
1367 if (ret < 0)
1368 BUG();
1369 }
1370 zcache_put_pool(pool);
1371 } else {
1372 zcache_put_to_flush++;
1373 if (ramster_enabled)
1374 ramster_do_preload_flnode(pool);
1375 if (atomic_read(&pool->obj_count) > 0)
1376 /* the put fails whether the flush succeeds or not */
1377 (void)tmem_flush_page(pool, oidp, index);
1378 zcache_put_pool(pool);
1379 }
1380 out:
1381 return ret;
1382 }
1383
1384 int zcache_get_page(int cli_id, int pool_id, struct tmem_oid *oidp,
1385 uint32_t index, void *page,
1386 size_t *sizep, bool raw, int get_and_free)
1387 {
1388 struct tmem_pool *pool;
1389 int ret = -1;
1390 bool eph;
1391
1392 if (!raw) {
1393 BUG_ON(irqs_disabled());
1394 BUG_ON(in_softirq());
1395 }
1396 pool = zcache_get_pool_by_id(cli_id, pool_id);
1397 eph = is_ephemeral(pool);
1398 if (likely(pool != NULL)) {
1399 if (atomic_read(&pool->obj_count) > 0)
1400 ret = tmem_get(pool, oidp, index, (char *)(page),
1401 sizep, raw, get_and_free);
1402 zcache_put_pool(pool);
1403 }
1404 WARN_ONCE((!is_ephemeral(pool) && (ret != 0)),
1405 "zcache_get fails on persistent pool, "
1406 "bad things are very likely to happen soon\n");
1407 #ifdef RAMSTER_TESTING
1408 if (ret != 0 && ret != -1 && !(ret == -EINVAL && is_ephemeral(pool)))
1409 pr_err("TESTING zcache_get tmem_get returns ret=%d\n", ret);
1410 #endif
1411 return ret;
1412 }
1413
1414 int zcache_flush_page(int cli_id, int pool_id,
1415 struct tmem_oid *oidp, uint32_t index)
1416 {
1417 struct tmem_pool *pool;
1418 int ret = -1;
1419 unsigned long flags;
1420
1421 local_irq_save(flags);
1422 zcache_flush_total++;
1423 pool = zcache_get_pool_by_id(cli_id, pool_id);
1424 if (ramster_enabled)
1425 ramster_do_preload_flnode(pool);
1426 if (likely(pool != NULL)) {
1427 if (atomic_read(&pool->obj_count) > 0)
1428 ret = tmem_flush_page(pool, oidp, index);
1429 zcache_put_pool(pool);
1430 }
1431 if (ret >= 0)
1432 zcache_flush_found++;
1433 local_irq_restore(flags);
1434 return ret;
1435 }
1436
1437 int zcache_flush_object(int cli_id, int pool_id,
1438 struct tmem_oid *oidp)
1439 {
1440 struct tmem_pool *pool;
1441 int ret = -1;
1442 unsigned long flags;
1443
1444 local_irq_save(flags);
1445 zcache_flobj_total++;
1446 pool = zcache_get_pool_by_id(cli_id, pool_id);
1447 if (ramster_enabled)
1448 ramster_do_preload_flnode(pool);
1449 if (likely(pool != NULL)) {
1450 if (atomic_read(&pool->obj_count) > 0)
1451 ret = tmem_flush_object(pool, oidp);
1452 zcache_put_pool(pool);
1453 }
1454 if (ret >= 0)
1455 zcache_flobj_found++;
1456 local_irq_restore(flags);
1457 return ret;
1458 }
1459
1460 static int zcache_client_destroy_pool(int cli_id, int pool_id)
1461 {
1462 struct tmem_pool *pool = NULL;
1463 struct zcache_client *cli = NULL;
1464 int ret = -1;
1465
1466 if (pool_id < 0)
1467 goto out;
1468 if (cli_id == LOCAL_CLIENT)
1469 cli = &zcache_host;
1470 else if ((unsigned int)cli_id < MAX_CLIENTS)
1471 cli = &zcache_clients[cli_id];
1472 if (cli == NULL)
1473 goto out;
1474 atomic_inc(&cli->refcount);
1475 pool = cli->tmem_pools[pool_id];
1476 if (pool == NULL)
1477 goto out;
1478 cli->tmem_pools[pool_id] = NULL;
1479 /* wait for pool activity on other cpus to quiesce */
1480 while (atomic_read(&pool->refcount) != 0)
1481 ;
1482 atomic_dec(&cli->refcount);
1483 local_bh_disable();
1484 ret = tmem_destroy_pool(pool);
1485 local_bh_enable();
1486 kfree(pool);
1487 if (cli_id == LOCAL_CLIENT)
1488 pr_info("%s: destroyed local pool id=%d\n", namestr, pool_id);
1489 else
1490 pr_info("%s: destroyed pool id=%d, client=%d\n",
1491 namestr, pool_id, cli_id);
1492 out:
1493 return ret;
1494 }
1495
1496 int zcache_new_pool(uint16_t cli_id, uint32_t flags)
1497 {
1498 int poolid = -1;
1499 struct tmem_pool *pool;
1500 struct zcache_client *cli = NULL;
1501
1502 if (cli_id == LOCAL_CLIENT)
1503 cli = &zcache_host;
1504 else if ((unsigned int)cli_id < MAX_CLIENTS)
1505 cli = &zcache_clients[cli_id];
1506 if (cli == NULL)
1507 goto out;
1508 atomic_inc(&cli->refcount);
1509 pool = kmalloc(sizeof(struct tmem_pool), GFP_ATOMIC);
1510 if (pool == NULL)
1511 goto out;
1512
1513 for (poolid = 0; poolid < MAX_POOLS_PER_CLIENT; poolid++)
1514 if (cli->tmem_pools[poolid] == NULL)
1515 break;
1516 if (poolid >= MAX_POOLS_PER_CLIENT) {
1517 pr_info("%s: pool creation failed: max exceeded\n", namestr);
1518 kfree(pool);
1519 poolid = -1;
1520 goto out;
1521 }
1522 atomic_set(&pool->refcount, 0);
1523 pool->client = cli;
1524 pool->pool_id = poolid;
1525 tmem_new_pool(pool, flags);
1526 cli->tmem_pools[poolid] = pool;
1527 if (cli_id == LOCAL_CLIENT)
1528 pr_info("%s: created %s local tmem pool, id=%d\n", namestr,
1529 flags & TMEM_POOL_PERSIST ? "persistent" : "ephemeral",
1530 poolid);
1531 else
1532 pr_info("%s: created %s tmem pool, id=%d, client=%d\n", namestr,
1533 flags & TMEM_POOL_PERSIST ? "persistent" : "ephemeral",
1534 poolid, cli_id);
1535 out:
1536 if (cli != NULL)
1537 atomic_dec(&cli->refcount);
1538 return poolid;
1539 }
1540
1541 static int zcache_local_new_pool(uint32_t flags)
1542 {
1543 return zcache_new_pool(LOCAL_CLIENT, flags);
1544 }
1545
1546 int zcache_autocreate_pool(unsigned int cli_id, unsigned int pool_id, bool eph)
1547 {
1548 struct tmem_pool *pool;
1549 struct zcache_client *cli;
1550 uint32_t flags = eph ? 0 : TMEM_POOL_PERSIST;
1551 int ret = -1;
1552
1553 BUG_ON(!ramster_enabled);
1554 if (cli_id == LOCAL_CLIENT)
1555 goto out;
1556 if (pool_id >= MAX_POOLS_PER_CLIENT)
1557 goto out;
1558 if (cli_id >= MAX_CLIENTS)
1559 goto out;
1560
1561 cli = &zcache_clients[cli_id];
1562 if ((eph && disable_cleancache) || (!eph && disable_frontswap)) {
1563 pr_err("zcache_autocreate_pool: pool type disabled\n");
1564 goto out;
1565 }
1566 if (!cli->allocated) {
1567 if (zcache_new_client(cli_id)) {
1568 pr_err("zcache_autocreate_pool: can't create client\n");
1569 goto out;
1570 }
1571 cli = &zcache_clients[cli_id];
1572 }
1573 atomic_inc(&cli->refcount);
1574 pool = cli->tmem_pools[pool_id];
1575 if (pool != NULL) {
1576 if (pool->persistent && eph) {
1577 pr_err("zcache_autocreate_pool: type mismatch\n");
1578 goto out;
1579 }
1580 ret = 0;
1581 goto out;
1582 }
1583 pool = kmalloc(sizeof(struct tmem_pool), GFP_KERNEL);
1584 if (pool == NULL)
1585 goto out;
1586
1587 atomic_set(&pool->refcount, 0);
1588 pool->client = cli;
1589 pool->pool_id = pool_id;
1590 tmem_new_pool(pool, flags);
1591 cli->tmem_pools[pool_id] = pool;
1592 pr_info("%s: AUTOcreated %s tmem poolid=%d, for remote client=%d\n",
1593 namestr, flags & TMEM_POOL_PERSIST ? "persistent" : "ephemeral",
1594 pool_id, cli_id);
1595 ret = 0;
1596 out:
1597 if (cli != NULL)
1598 atomic_dec(&cli->refcount);
1599 return ret;
1600 }
1601
1602 /**********
1603 * Two kernel functionalities currently can be layered on top of tmem.
1604 * These are "cleancache" which is used as a second-chance cache for clean
1605 * page cache pages; and "frontswap" which is used for swap pages
1606 * to avoid writes to disk. A generic "shim" is provided here for each
1607 * to translate in-kernel semantics to zcache semantics.
1608 */
1609
1610 static void zcache_cleancache_put_page(int pool_id,
1611 struct cleancache_filekey key,
1612 pgoff_t index, struct page *page)
1613 {
1614 u32 ind = (u32) index;
1615 struct tmem_oid oid = *(struct tmem_oid *)&key;
1616
1617 if (!disable_cleancache_ignore_nonactive && !PageWasActive(page)) {
1618 zcache_eph_nonactive_puts_ignored++;
1619 return;
1620 }
1621 if (likely(ind == index))
1622 (void)zcache_put_page(LOCAL_CLIENT, pool_id, &oid, index,
1623 page, PAGE_SIZE, false, 1);
1624 }
1625
1626 static int zcache_cleancache_get_page(int pool_id,
1627 struct cleancache_filekey key,
1628 pgoff_t index, struct page *page)
1629 {
1630 u32 ind = (u32) index;
1631 struct tmem_oid oid = *(struct tmem_oid *)&key;
1632 size_t size;
1633 int ret = -1;
1634
1635 if (likely(ind == index)) {
1636 ret = zcache_get_page(LOCAL_CLIENT, pool_id, &oid, index,
1637 page, &size, false, 0);
1638 BUG_ON(ret >= 0 && size != PAGE_SIZE);
1639 if (ret == 0)
1640 SetPageWasActive(page);
1641 }
1642 return ret;
1643 }
1644
1645 static void zcache_cleancache_flush_page(int pool_id,
1646 struct cleancache_filekey key,
1647 pgoff_t index)
1648 {
1649 u32 ind = (u32) index;
1650 struct tmem_oid oid = *(struct tmem_oid *)&key;
1651
1652 if (likely(ind == index))
1653 (void)zcache_flush_page(LOCAL_CLIENT, pool_id, &oid, ind);
1654 }
1655
1656 static void zcache_cleancache_flush_inode(int pool_id,
1657 struct cleancache_filekey key)
1658 {
1659 struct tmem_oid oid = *(struct tmem_oid *)&key;
1660
1661 (void)zcache_flush_object(LOCAL_CLIENT, pool_id, &oid);
1662 }
1663
1664 static void zcache_cleancache_flush_fs(int pool_id)
1665 {
1666 if (pool_id >= 0)
1667 (void)zcache_client_destroy_pool(LOCAL_CLIENT, pool_id);
1668 }
1669
1670 static int zcache_cleancache_init_fs(size_t pagesize)
1671 {
1672 BUG_ON(sizeof(struct cleancache_filekey) !=
1673 sizeof(struct tmem_oid));
1674 BUG_ON(pagesize != PAGE_SIZE);
1675 return zcache_local_new_pool(0);
1676 }
1677
1678 static int zcache_cleancache_init_shared_fs(char *uuid, size_t pagesize)
1679 {
1680 /* shared pools are unsupported and map to private */
1681 BUG_ON(sizeof(struct cleancache_filekey) !=
1682 sizeof(struct tmem_oid));
1683 BUG_ON(pagesize != PAGE_SIZE);
1684 return zcache_local_new_pool(0);
1685 }
1686
1687 static struct cleancache_ops zcache_cleancache_ops = {
1688 .put_page = zcache_cleancache_put_page,
1689 .get_page = zcache_cleancache_get_page,
1690 .invalidate_page = zcache_cleancache_flush_page,
1691 .invalidate_inode = zcache_cleancache_flush_inode,
1692 .invalidate_fs = zcache_cleancache_flush_fs,
1693 .init_shared_fs = zcache_cleancache_init_shared_fs,
1694 .init_fs = zcache_cleancache_init_fs
1695 };
1696
1697 struct cleancache_ops zcache_cleancache_register_ops(void)
1698 {
1699 struct cleancache_ops old_ops =
1700 cleancache_register_ops(&zcache_cleancache_ops);
1701
1702 return old_ops;
1703 }
1704
1705 /* a single tmem poolid is used for all frontswap "types" (swapfiles) */
1706 static int zcache_frontswap_poolid __read_mostly = -1;
1707
1708 /*
1709 * Swizzling increases objects per swaptype, increasing tmem concurrency
1710 * for heavy swaploads. Later, larger nr_cpus -> larger SWIZ_BITS
1711 * Setting SWIZ_BITS to 27 basically reconstructs the swap entry from
1712 * frontswap_get_page(), but has side-effects. Hence using 8.
1713 */
1714 #define SWIZ_BITS 8
1715 #define SWIZ_MASK ((1 << SWIZ_BITS) - 1)
1716 #define _oswiz(_type, _ind) ((_type << SWIZ_BITS) | (_ind & SWIZ_MASK))
1717 #define iswiz(_ind) (_ind >> SWIZ_BITS)
1718
1719 static inline struct tmem_oid oswiz(unsigned type, u32 ind)
1720 {
1721 struct tmem_oid oid = { .oid = { 0 } };
1722 oid.oid[0] = _oswiz(type, ind);
1723 return oid;
1724 }
1725
1726 #ifdef CONFIG_ZCACHE_WRITEBACK
1727 static void unswiz(struct tmem_oid oid, u32 index,
1728 unsigned *type, pgoff_t *offset)
1729 {
1730 *type = (unsigned)(oid.oid[0] >> SWIZ_BITS);
1731 *offset = (pgoff_t)((index << SWIZ_BITS) |
1732 (oid.oid[0] & SWIZ_MASK));
1733 }
1734 #endif
1735
1736 static int zcache_frontswap_put_page(unsigned type, pgoff_t offset,
1737 struct page *page)
1738 {
1739 u64 ind64 = (u64)offset;
1740 u32 ind = (u32)offset;
1741 struct tmem_oid oid = oswiz(type, ind);
1742 int ret = -1;
1743 unsigned long flags;
1744
1745 BUG_ON(!PageLocked(page));
1746 if (!disable_frontswap_ignore_nonactive && !PageWasActive(page)) {
1747 zcache_pers_nonactive_puts_ignored++;
1748 ret = -ERANGE;
1749 goto out;
1750 }
1751 if (likely(ind64 == ind)) {
1752 local_irq_save(flags);
1753 ret = zcache_put_page(LOCAL_CLIENT, zcache_frontswap_poolid,
1754 &oid, iswiz(ind),
1755 page, PAGE_SIZE, false, 0);
1756 local_irq_restore(flags);
1757 }
1758 out:
1759 return ret;
1760 }
1761
1762 /* returns 0 if the page was successfully gotten from frontswap, -1 if
1763 * was not present (should never happen!) */
1764 static int zcache_frontswap_get_page(unsigned type, pgoff_t offset,
1765 struct page *page)
1766 {
1767 u64 ind64 = (u64)offset;
1768 u32 ind = (u32)offset;
1769 struct tmem_oid oid = oswiz(type, ind);
1770 size_t size;
1771 int ret = -1, get_and_free;
1772
1773 if (frontswap_has_exclusive_gets)
1774 get_and_free = 1;
1775 else
1776 get_and_free = -1;
1777 BUG_ON(!PageLocked(page));
1778 if (likely(ind64 == ind)) {
1779 ret = zcache_get_page(LOCAL_CLIENT, zcache_frontswap_poolid,
1780 &oid, iswiz(ind),
1781 page, &size, false, get_and_free);
1782 BUG_ON(ret >= 0 && size != PAGE_SIZE);
1783 }
1784 return ret;
1785 }
1786
1787 /* flush a single page from frontswap */
1788 static void zcache_frontswap_flush_page(unsigned type, pgoff_t offset)
1789 {
1790 u64 ind64 = (u64)offset;
1791 u32 ind = (u32)offset;
1792 struct tmem_oid oid = oswiz(type, ind);
1793
1794 if (likely(ind64 == ind))
1795 (void)zcache_flush_page(LOCAL_CLIENT, zcache_frontswap_poolid,
1796 &oid, iswiz(ind));
1797 }
1798
1799 /* flush all pages from the passed swaptype */
1800 static void zcache_frontswap_flush_area(unsigned type)
1801 {
1802 struct tmem_oid oid;
1803 int ind;
1804
1805 for (ind = SWIZ_MASK; ind >= 0; ind--) {
1806 oid = oswiz(type, ind);
1807 (void)zcache_flush_object(LOCAL_CLIENT,
1808 zcache_frontswap_poolid, &oid);
1809 }
1810 }
1811
1812 static void zcache_frontswap_init(unsigned ignored)
1813 {
1814 /* a single tmem poolid is used for all frontswap "types" (swapfiles) */
1815 if (zcache_frontswap_poolid < 0)
1816 zcache_frontswap_poolid =
1817 zcache_local_new_pool(TMEM_POOL_PERSIST);
1818 }
1819
1820 static struct frontswap_ops zcache_frontswap_ops = {
1821 .store = zcache_frontswap_put_page,
1822 .load = zcache_frontswap_get_page,
1823 .invalidate_page = zcache_frontswap_flush_page,
1824 .invalidate_area = zcache_frontswap_flush_area,
1825 .init = zcache_frontswap_init
1826 };
1827
1828 struct frontswap_ops zcache_frontswap_register_ops(void)
1829 {
1830 struct frontswap_ops old_ops =
1831 frontswap_register_ops(&zcache_frontswap_ops);
1832
1833 return old_ops;
1834 }
1835
1836 /*
1837 * zcache initialization
1838 * NOTE FOR NOW zcache or ramster MUST BE PROVIDED AS A KERNEL BOOT PARAMETER
1839 * OR NOTHING HAPPENS!
1840 */
1841
1842 static int __init enable_zcache(char *s)
1843 {
1844 zcache_enabled = 1;
1845 return 1;
1846 }
1847 __setup("zcache", enable_zcache);
1848
1849 static int __init enable_ramster(char *s)
1850 {
1851 zcache_enabled = 1;
1852 #ifdef CONFIG_RAMSTER
1853 ramster_enabled = 1;
1854 #endif
1855 return 1;
1856 }
1857 __setup("ramster", enable_ramster);
1858
1859 /* allow independent dynamic disabling of cleancache and frontswap */
1860
1861 static int __init no_cleancache(char *s)
1862 {
1863 disable_cleancache = 1;
1864 return 1;
1865 }
1866
1867 __setup("nocleancache", no_cleancache);
1868
1869 static int __init no_frontswap(char *s)
1870 {
1871 disable_frontswap = 1;
1872 return 1;
1873 }
1874
1875 __setup("nofrontswap", no_frontswap);
1876
1877 static int __init no_frontswap_exclusive_gets(char *s)
1878 {
1879 frontswap_has_exclusive_gets = false;
1880 return 1;
1881 }
1882
1883 __setup("nofrontswapexclusivegets", no_frontswap_exclusive_gets);
1884
1885 static int __init no_frontswap_ignore_nonactive(char *s)
1886 {
1887 disable_frontswap_ignore_nonactive = 1;
1888 return 1;
1889 }
1890
1891 __setup("nofrontswapignorenonactive", no_frontswap_ignore_nonactive);
1892
1893 static int __init no_cleancache_ignore_nonactive(char *s)
1894 {
1895 disable_cleancache_ignore_nonactive = 1;
1896 return 1;
1897 }
1898
1899 __setup("nocleancacheignorenonactive", no_cleancache_ignore_nonactive);
1900
1901 static int __init enable_zcache_compressor(char *s)
1902 {
1903 strncpy(zcache_comp_name, s, ZCACHE_COMP_NAME_SZ);
1904 zcache_enabled = 1;
1905 return 1;
1906 }
1907 __setup("zcache=", enable_zcache_compressor);
1908
1909
1910 static int __init zcache_comp_init(void)
1911 {
1912 int ret = 0;
1913
1914 /* check crypto algorithm */
1915 if (*zcache_comp_name != '\0') {
1916 ret = crypto_has_comp(zcache_comp_name, 0, 0);
1917 if (!ret)
1918 pr_info("zcache: %s not supported\n",
1919 zcache_comp_name);
1920 }
1921 if (!ret)
1922 strcpy(zcache_comp_name, "lzo");
1923 ret = crypto_has_comp(zcache_comp_name, 0, 0);
1924 if (!ret) {
1925 ret = 1;
1926 goto out;
1927 }
1928 pr_info("zcache: using %s compressor\n", zcache_comp_name);
1929
1930 /* alloc percpu transforms */
1931 ret = 0;
1932 zcache_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *);
1933 if (!zcache_comp_pcpu_tfms)
1934 ret = 1;
1935 out:
1936 return ret;
1937 }
1938
1939 static int __init zcache_init(void)
1940 {
1941 int ret = 0;
1942
1943 if (ramster_enabled) {
1944 namestr = "ramster";
1945 ramster_register_pamops(&zcache_pamops);
1946 }
1947 #ifdef CONFIG_DEBUG_FS
1948 zcache_debugfs_init();
1949 #endif
1950 if (zcache_enabled) {
1951 unsigned int cpu;
1952
1953 tmem_register_hostops(&zcache_hostops);
1954 tmem_register_pamops(&zcache_pamops);
1955 ret = register_cpu_notifier(&zcache_cpu_notifier_block);
1956 if (ret) {
1957 pr_err("%s: can't register cpu notifier\n", namestr);
1958 goto out;
1959 }
1960 ret = zcache_comp_init();
1961 if (ret) {
1962 pr_err("%s: compressor initialization failed\n",
1963 namestr);
1964 goto out;
1965 }
1966 for_each_online_cpu(cpu) {
1967 void *pcpu = (void *)(long)cpu;
1968 zcache_cpu_notifier(&zcache_cpu_notifier_block,
1969 CPU_UP_PREPARE, pcpu);
1970 }
1971 }
1972 zcache_objnode_cache = kmem_cache_create("zcache_objnode",
1973 sizeof(struct tmem_objnode), 0, 0, NULL);
1974 zcache_obj_cache = kmem_cache_create("zcache_obj",
1975 sizeof(struct tmem_obj), 0, 0, NULL);
1976 ret = zcache_new_client(LOCAL_CLIENT);
1977 if (ret) {
1978 pr_err("%s: can't create client\n", namestr);
1979 goto out;
1980 }
1981 zbud_init();
1982 if (zcache_enabled && !disable_cleancache) {
1983 struct cleancache_ops old_ops;
1984
1985 register_shrinker(&zcache_shrinker);
1986 old_ops = zcache_cleancache_register_ops();
1987 pr_info("%s: cleancache enabled using kernel transcendent "
1988 "memory and compression buddies\n", namestr);
1989 #ifdef ZCACHE_DEBUG
1990 pr_info("%s: cleancache: ignorenonactive = %d\n",
1991 namestr, !disable_cleancache_ignore_nonactive);
1992 #endif
1993 if (old_ops.init_fs != NULL)
1994 pr_warn("%s: cleancache_ops overridden\n", namestr);
1995 }
1996 if (zcache_enabled && !disable_frontswap) {
1997 struct frontswap_ops old_ops;
1998
1999 old_ops = zcache_frontswap_register_ops();
2000 if (frontswap_has_exclusive_gets)
2001 frontswap_tmem_exclusive_gets(true);
2002 pr_info("%s: frontswap enabled using kernel transcendent "
2003 "memory and compression buddies\n", namestr);
2004 #ifdef ZCACHE_DEBUG
2005 pr_info("%s: frontswap: excl gets = %d active only = %d\n",
2006 namestr, frontswap_has_exclusive_gets,
2007 !disable_frontswap_ignore_nonactive);
2008 #endif
2009 if (old_ops.init != NULL)
2010 pr_warn("%s: frontswap_ops overridden\n", namestr);
2011 }
2012 if (ramster_enabled)
2013 ramster_init(!disable_cleancache, !disable_frontswap,
2014 frontswap_has_exclusive_gets);
2015 out:
2016 return ret;
2017 }
2018
2019 late_initcall(zcache_init);
This page took 0.072235 seconds and 5 git commands to generate.