drivers/staging/zcache/zcache-main.c: fix build
[deliverable/linux.git] / drivers / staging / zcache / zcache-main.c
CommitLineData
faca2ef7
DM
1/*
2 * zcache.c
3 *
4 * Copyright (c) 2010-2012, Dan Magenheimer, Oracle Corp.
5 * Copyright (c) 2010,2011, Nitin Gupta
6 *
7 * Zcache provides an in-kernel "host implementation" for transcendent memory
8 * ("tmem") and, thus indirectly, for cleancache and frontswap. Zcache uses
9 * lzo1x compression to improve density and an embedded allocator called
10 * "zbud" which "buddies" two compressed pages semi-optimally in each physical
11 * pageframe. Zbud is integrally tied into tmem to allow pageframes to
12 * be "reclaimed" efficiently.
13 */
14
15#include <linux/module.h>
16#include <linux/cpu.h>
17#include <linux/highmem.h>
18#include <linux/list.h>
19#include <linux/slab.h>
20#include <linux/spinlock.h>
21#include <linux/types.h>
aeac64aa 22#include <linux/string.h>
faca2ef7
DM
23#include <linux/atomic.h>
24#include <linux/math64.h>
25#include <linux/crypto.h>
76426daf
DM
26#include <linux/swap.h>
27#include <linux/swapops.h>
28#include <linux/pagemap.h>
29#include <linux/writeback.h>
faca2ef7
DM
30
31#include <linux/cleancache.h>
32#include <linux/frontswap.h>
33#include "tmem.h"
34#include "zcache.h"
35#include "zbud.h"
36#include "ramster.h"
95bdaee2 37#include "debug.h"
faca2ef7 38#ifdef CONFIG_RAMSTER
7937d74a 39static bool ramster_enabled __read_mostly;
faca2ef7 40#else
7937d74a 41#define ramster_enabled false
faca2ef7
DM
42#endif
43
44#ifndef __PG_WAS_ACTIVE
45static inline bool PageWasActive(struct page *page)
46{
47 return true;
48}
49
50static inline void SetPageWasActive(struct page *page)
51{
52}
53#endif
54
55#ifdef FRONTSWAP_HAS_EXCLUSIVE_GETS
56static bool frontswap_has_exclusive_gets __read_mostly = true;
57#else
58static bool frontswap_has_exclusive_gets __read_mostly;
59static inline void frontswap_tmem_exclusive_gets(bool b)
60{
61}
62#endif
63
76426daf
DM
64/* enable (or fix code) when Seth's patches are accepted upstream */
65#define zcache_writeback_enabled 0
66
7937d74a
KRW
67static bool zcache_enabled __read_mostly;
68static bool disable_cleancache __read_mostly;
69static bool disable_frontswap __read_mostly;
70static bool disable_frontswap_ignore_nonactive __read_mostly;
71static bool disable_cleancache_ignore_nonactive __read_mostly;
faca2ef7
DM
72static char *namestr __read_mostly = "zcache";
73
74#define ZCACHE_GFP_MASK \
75 (__GFP_FS | __GFP_NORETRY | __GFP_NOWARN | __GFP_NOMEMALLOC)
76
faca2ef7
DM
77/* crypto API for zcache */
78#define ZCACHE_COMP_NAME_SZ CRYPTO_MAX_ALG_NAME
79static char zcache_comp_name[ZCACHE_COMP_NAME_SZ] __read_mostly;
80static struct crypto_comp * __percpu *zcache_comp_pcpu_tfms __read_mostly;
81
82enum comp_op {
83 ZCACHE_COMPOP_COMPRESS,
84 ZCACHE_COMPOP_DECOMPRESS
85};
86
87static inline int zcache_comp_op(enum comp_op op,
88 const u8 *src, unsigned int slen,
89 u8 *dst, unsigned int *dlen)
90{
91 struct crypto_comp *tfm;
92 int ret = -1;
93
94 BUG_ON(!zcache_comp_pcpu_tfms);
95 tfm = *per_cpu_ptr(zcache_comp_pcpu_tfms, get_cpu());
96 BUG_ON(!tfm);
97 switch (op) {
98 case ZCACHE_COMPOP_COMPRESS:
99 ret = crypto_comp_compress(tfm, src, slen, dst, dlen);
100 break;
101 case ZCACHE_COMPOP_DECOMPRESS:
102 ret = crypto_comp_decompress(tfm, src, slen, dst, dlen);
103 break;
104 default:
105 ret = -EINVAL;
106 }
107 put_cpu();
108 return ret;
109}
110
111/*
112 * policy parameters
113 */
114
115/*
116 * byte count defining poor compression; pages with greater zsize will be
117 * rejected
118 */
119static unsigned int zbud_max_zsize __read_mostly = (PAGE_SIZE / 8) * 7;
120/*
121 * byte count defining poor *mean* compression; pages with greater zsize
122 * will be rejected until sufficient better-compressed pages are accepted
123 * driving the mean below this threshold
124 */
125static unsigned int zbud_max_mean_zsize __read_mostly = (PAGE_SIZE / 8) * 5;
126
127/*
128 * for now, used named slabs so can easily track usage; later can
129 * either just use kmalloc, or perhaps add a slab-like allocator
130 * to more carefully manage total memory utilization
131 */
132static struct kmem_cache *zcache_objnode_cache;
133static struct kmem_cache *zcache_obj_cache;
134
135static DEFINE_PER_CPU(struct zcache_preload, zcache_preloads) = { 0, };
136
95bdaee2
KRW
137/* Used by debug.c */
138ssize_t zcache_pers_zpages;
139u64 zcache_pers_zbytes;
140ssize_t zcache_eph_pageframes;
141ssize_t zcache_pers_pageframes;
e0d11aed 142
95bdaee2 143/* Used by this code. */
86d7de66
KRW
144ssize_t zcache_last_active_file_pageframes;
145ssize_t zcache_last_inactive_file_pageframes;
146ssize_t zcache_last_active_anon_pageframes;
147ssize_t zcache_last_inactive_anon_pageframes;
95bdaee2 148#ifdef CONFIG_ZCACHE_WRITEBACK
86d7de66
KRW
149ssize_t zcache_writtenback_pages;
150ssize_t zcache_outstanding_writeback_pages;
faca2ef7 151#endif
faca2ef7
DM
152/*
153 * zcache core code starts here
154 */
155
156static struct zcache_client zcache_host;
157static struct zcache_client zcache_clients[MAX_CLIENTS];
158
159static inline bool is_local_client(struct zcache_client *cli)
160{
161 return cli == &zcache_host;
162}
163
164static struct zcache_client *zcache_get_client_by_id(uint16_t cli_id)
165{
166 struct zcache_client *cli = &zcache_host;
167
168 if (cli_id != LOCAL_CLIENT) {
169 if (cli_id >= MAX_CLIENTS)
170 goto out;
171 cli = &zcache_clients[cli_id];
172 }
173out:
174 return cli;
175}
176
177/*
178 * Tmem operations assume the poolid implies the invoking client.
179 * Zcache only has one client (the kernel itself): LOCAL_CLIENT.
180 * RAMster has each client numbered by cluster node, and a KVM version
181 * of zcache would have one client per guest and each client might
182 * have a poolid==N.
183 */
184struct tmem_pool *zcache_get_pool_by_id(uint16_t cli_id, uint16_t poolid)
185{
186 struct tmem_pool *pool = NULL;
187 struct zcache_client *cli = NULL;
188
189 cli = zcache_get_client_by_id(cli_id);
190 if (cli == NULL)
191 goto out;
192 if (!is_local_client(cli))
193 atomic_inc(&cli->refcount);
194 if (poolid < MAX_POOLS_PER_CLIENT) {
195 pool = cli->tmem_pools[poolid];
196 if (pool != NULL)
197 atomic_inc(&pool->refcount);
198 }
199out:
200 return pool;
201}
202
203void zcache_put_pool(struct tmem_pool *pool)
204{
205 struct zcache_client *cli = NULL;
206
207 if (pool == NULL)
208 BUG();
209 cli = pool->client;
210 atomic_dec(&pool->refcount);
211 if (!is_local_client(cli))
212 atomic_dec(&cli->refcount);
213}
214
215int zcache_new_client(uint16_t cli_id)
216{
217 struct zcache_client *cli;
218 int ret = -1;
219
220 cli = zcache_get_client_by_id(cli_id);
221 if (cli == NULL)
222 goto out;
223 if (cli->allocated)
224 goto out;
225 cli->allocated = 1;
226 ret = 0;
227out:
228 return ret;
229}
230
231/*
232 * zcache implementation for tmem host ops
233 */
234
235static struct tmem_objnode *zcache_objnode_alloc(struct tmem_pool *pool)
236{
237 struct tmem_objnode *objnode = NULL;
238 struct zcache_preload *kp;
239 int i;
240
241 kp = &__get_cpu_var(zcache_preloads);
242 for (i = 0; i < ARRAY_SIZE(kp->objnodes); i++) {
243 objnode = kp->objnodes[i];
244 if (objnode != NULL) {
245 kp->objnodes[i] = NULL;
246 break;
247 }
248 }
249 BUG_ON(objnode == NULL);
3f007ca4 250 inc_zcache_objnode_count();
faca2ef7
DM
251 return objnode;
252}
253
254static void zcache_objnode_free(struct tmem_objnode *objnode,
255 struct tmem_pool *pool)
256{
6f4336fb 257 dec_zcache_objnode_count();
faca2ef7
DM
258 kmem_cache_free(zcache_objnode_cache, objnode);
259}
260
261static struct tmem_obj *zcache_obj_alloc(struct tmem_pool *pool)
262{
263 struct tmem_obj *obj = NULL;
264 struct zcache_preload *kp;
265
266 kp = &__get_cpu_var(zcache_preloads);
267 obj = kp->obj;
268 BUG_ON(obj == NULL);
269 kp->obj = NULL;
3f007ca4 270 inc_zcache_obj_count();
faca2ef7
DM
271 return obj;
272}
273
274static void zcache_obj_free(struct tmem_obj *obj, struct tmem_pool *pool)
275{
6f4336fb 276 dec_zcache_obj_count();
faca2ef7
DM
277 kmem_cache_free(zcache_obj_cache, obj);
278}
279
280static struct tmem_hostops zcache_hostops = {
281 .obj_alloc = zcache_obj_alloc,
282 .obj_free = zcache_obj_free,
283 .objnode_alloc = zcache_objnode_alloc,
284 .objnode_free = zcache_objnode_free,
285};
286
287static struct page *zcache_alloc_page(void)
288{
289 struct page *page = alloc_page(ZCACHE_GFP_MASK);
290
291 if (page != NULL)
3f007ca4 292 inc_zcache_pageframes_alloced();
faca2ef7
DM
293 return page;
294}
295
faca2ef7
DM
296static void zcache_free_page(struct page *page)
297{
298 long curr_pageframes;
7892e560 299 static long max_pageframes, min_pageframes;
faca2ef7
DM
300
301 if (page == NULL)
302 BUG();
303 __free_page(page);
3f007ca4 304 inc_zcache_pageframes_freed();
e0d11aed 305 curr_pageframes = curr_pageframes_count();
faca2ef7
DM
306 if (curr_pageframes > max_pageframes)
307 max_pageframes = curr_pageframes;
308 if (curr_pageframes < min_pageframes)
309 min_pageframes = curr_pageframes;
67e2cba4 310#ifdef CONFIG_ZCACHE_DEBUG
faca2ef7
DM
311 if (curr_pageframes > 2L || curr_pageframes < -2L) {
312 /* pr_info here */
313 }
314#endif
315}
316
317/*
318 * zcache implementations for PAM page descriptor ops
319 */
320
321/* forward reference */
322static void zcache_compress(struct page *from,
323 void **out_va, unsigned *out_len);
324
325static struct page *zcache_evict_eph_pageframe(void);
326
327static void *zcache_pampd_eph_create(char *data, size_t size, bool raw,
328 struct tmem_handle *th)
329{
330 void *pampd = NULL, *cdata = data;
331 unsigned clen = size;
332 struct page *page = (struct page *)(data), *newpage;
333
334 if (!raw) {
335 zcache_compress(page, &cdata, &clen);
336 if (clen > zbud_max_buddy_size()) {
86d7de66 337 inc_zcache_compress_poor();
faca2ef7
DM
338 goto out;
339 }
340 } else {
341 BUG_ON(clen > zbud_max_buddy_size());
342 }
343
344 /* look for space via an existing match first */
345 pampd = (void *)zbud_match_prep(th, true, cdata, clen);
346 if (pampd != NULL)
347 goto got_pampd;
348
349 /* no match, now we need to find (or free up) a full page */
350 newpage = zcache_alloc_page();
351 if (newpage != NULL)
352 goto create_in_new_page;
353
86d7de66 354 inc_zcache_failed_getfreepages();
faca2ef7
DM
355 /* can't allocate a page, evict an ephemeral page via LRU */
356 newpage = zcache_evict_eph_pageframe();
357 if (newpage == NULL) {
86d7de66 358 inc_zcache_eph_ate_tail_failed();
faca2ef7
DM
359 goto out;
360 }
86d7de66 361 inc_zcache_eph_ate_tail();
faca2ef7
DM
362
363create_in_new_page:
364 pampd = (void *)zbud_create_prep(th, true, cdata, clen, newpage);
365 BUG_ON(pampd == NULL);
3f007ca4 366 inc_zcache_eph_pageframes();
faca2ef7
DM
367
368got_pampd:
3f007ca4
KRW
369 inc_zcache_eph_zbytes(clen);
370 inc_zcache_eph_zpages();
faca2ef7
DM
371 if (ramster_enabled && raw)
372 ramster_count_foreign_pages(true, 1);
373out:
374 return pampd;
375}
376
377static void *zcache_pampd_pers_create(char *data, size_t size, bool raw,
378 struct tmem_handle *th)
379{
380 void *pampd = NULL, *cdata = data;
381 unsigned clen = size;
382 struct page *page = (struct page *)(data), *newpage;
383 unsigned long zbud_mean_zsize;
384 unsigned long curr_pers_zpages, total_zsize;
385
386 if (data == NULL) {
387 BUG_ON(!ramster_enabled);
388 goto create_pampd;
389 }
390 curr_pers_zpages = zcache_pers_zpages;
391/* FIXME CONFIG_RAMSTER... subtract atomic remote_pers_pages here? */
392 if (!raw)
393 zcache_compress(page, &cdata, &clen);
394 /* reject if compression is too poor */
395 if (clen > zbud_max_zsize) {
86d7de66 396 inc_zcache_compress_poor();
faca2ef7
DM
397 goto out;
398 }
399 /* reject if mean compression is too poor */
400 if ((clen > zbud_max_mean_zsize) && (curr_pers_zpages > 0)) {
401 total_zsize = zcache_pers_zbytes;
402 if ((long)total_zsize < 0)
403 total_zsize = 0;
404 zbud_mean_zsize = div_u64(total_zsize,
405 curr_pers_zpages);
406 if (zbud_mean_zsize > zbud_max_mean_zsize) {
86d7de66 407 inc_zcache_mean_compress_poor();
faca2ef7
DM
408 goto out;
409 }
410 }
411
412create_pampd:
413 /* look for space via an existing match first */
414 pampd = (void *)zbud_match_prep(th, false, cdata, clen);
415 if (pampd != NULL)
416 goto got_pampd;
417
418 /* no match, now we need to find (or free up) a full page */
419 newpage = zcache_alloc_page();
420 if (newpage != NULL)
421 goto create_in_new_page;
422 /*
423 * FIXME do the following only if eph is oversized?
424 * if (zcache_eph_pageframes >
425 * (global_page_state(NR_LRU_BASE + LRU_ACTIVE_FILE) +
426 * global_page_state(NR_LRU_BASE + LRU_INACTIVE_FILE)))
427 */
86d7de66 428 inc_zcache_failed_getfreepages();
faca2ef7
DM
429 /* can't allocate a page, evict an ephemeral page via LRU */
430 newpage = zcache_evict_eph_pageframe();
431 if (newpage == NULL) {
86d7de66 432 inc_zcache_pers_ate_eph_failed();
faca2ef7
DM
433 goto out;
434 }
86d7de66 435 inc_zcache_pers_ate_eph();
faca2ef7
DM
436
437create_in_new_page:
438 pampd = (void *)zbud_create_prep(th, false, cdata, clen, newpage);
439 BUG_ON(pampd == NULL);
3f007ca4 440 inc_zcache_pers_pageframes();
faca2ef7
DM
441
442got_pampd:
3f007ca4
KRW
443 inc_zcache_pers_zpages();
444 inc_zcache_pers_zbytes(clen);
faca2ef7
DM
445 if (ramster_enabled && raw)
446 ramster_count_foreign_pages(false, 1);
447out:
448 return pampd;
449}
450
451/*
452 * This is called directly from zcache_put_page to pre-allocate space
453 * to store a zpage.
454 */
455void *zcache_pampd_create(char *data, unsigned int size, bool raw,
456 int eph, struct tmem_handle *th)
457{
458 void *pampd = NULL;
459 struct zcache_preload *kp;
460 struct tmem_objnode *objnode;
461 struct tmem_obj *obj;
462 int i;
463
464 BUG_ON(!irqs_disabled());
465 /* pre-allocate per-cpu metadata */
466 BUG_ON(zcache_objnode_cache == NULL);
467 BUG_ON(zcache_obj_cache == NULL);
468 kp = &__get_cpu_var(zcache_preloads);
469 for (i = 0; i < ARRAY_SIZE(kp->objnodes); i++) {
470 objnode = kp->objnodes[i];
471 if (objnode == NULL) {
472 objnode = kmem_cache_alloc(zcache_objnode_cache,
473 ZCACHE_GFP_MASK);
474 if (unlikely(objnode == NULL)) {
86d7de66 475 inc_zcache_failed_alloc();
faca2ef7
DM
476 goto out;
477 }
478 kp->objnodes[i] = objnode;
479 }
480 }
481 if (kp->obj == NULL) {
482 obj = kmem_cache_alloc(zcache_obj_cache, ZCACHE_GFP_MASK);
483 kp->obj = obj;
484 }
485 if (unlikely(kp->obj == NULL)) {
86d7de66 486 inc_zcache_failed_alloc();
faca2ef7
DM
487 goto out;
488 }
489 /*
490 * ok, have all the metadata pre-allocated, now do the data
491 * but since how we allocate the data is dependent on ephemeral
492 * or persistent, we split the call here to different sub-functions
493 */
494 if (eph)
495 pampd = zcache_pampd_eph_create(data, size, raw, th);
496 else
497 pampd = zcache_pampd_pers_create(data, size, raw, th);
498out:
499 return pampd;
500}
501
502/*
503 * This is a pamops called via tmem_put and is necessary to "finish"
504 * a pampd creation.
505 */
506void zcache_pampd_create_finish(void *pampd, bool eph)
507{
508 zbud_create_finish((struct zbudref *)pampd, eph);
509}
510
511/*
512 * This is passed as a function parameter to zbud_decompress so that
513 * zbud need not be familiar with the details of crypto. It assumes that
514 * the bytes from_va and to_va through from_va+size-1 and to_va+size-1 are
515 * kmapped. It must be successful, else there is a logic bug somewhere.
516 */
517static void zcache_decompress(char *from_va, unsigned int size, char *to_va)
518{
519 int ret;
520 unsigned int outlen = PAGE_SIZE;
521
522 ret = zcache_comp_op(ZCACHE_COMPOP_DECOMPRESS, from_va, size,
523 to_va, &outlen);
524 BUG_ON(ret);
525 BUG_ON(outlen != PAGE_SIZE);
526}
527
528/*
529 * Decompress from the kernel va to a pageframe
530 */
531void zcache_decompress_to_page(char *from_va, unsigned int size,
532 struct page *to_page)
533{
534 char *to_va = kmap_atomic(to_page);
535 zcache_decompress(from_va, size, to_va);
536 kunmap_atomic(to_va);
537}
538
539/*
540 * fill the pageframe corresponding to the struct page with the data
541 * from the passed pampd
542 */
543static int zcache_pampd_get_data(char *data, size_t *sizep, bool raw,
544 void *pampd, struct tmem_pool *pool,
545 struct tmem_oid *oid, uint32_t index)
546{
547 int ret;
548 bool eph = !is_persistent(pool);
549
550 BUG_ON(preemptible());
551 BUG_ON(eph); /* fix later if shared pools get implemented */
552 BUG_ON(pampd_is_remote(pampd));
553 if (raw)
554 ret = zbud_copy_from_zbud(data, (struct zbudref *)pampd,
555 sizep, eph);
556 else {
557 ret = zbud_decompress((struct page *)(data),
558 (struct zbudref *)pampd, false,
559 zcache_decompress);
560 *sizep = PAGE_SIZE;
561 }
562 return ret;
563}
564
565/*
566 * fill the pageframe corresponding to the struct page with the data
567 * from the passed pampd
568 */
569static int zcache_pampd_get_data_and_free(char *data, size_t *sizep, bool raw,
570 void *pampd, struct tmem_pool *pool,
571 struct tmem_oid *oid, uint32_t index)
572{
573 int ret;
574 bool eph = !is_persistent(pool);
575 struct page *page = NULL;
576 unsigned int zsize, zpages;
577
578 BUG_ON(preemptible());
579 BUG_ON(pampd_is_remote(pampd));
580 if (raw)
581 ret = zbud_copy_from_zbud(data, (struct zbudref *)pampd,
582 sizep, eph);
583 else {
584 ret = zbud_decompress((struct page *)(data),
585 (struct zbudref *)pampd, eph,
586 zcache_decompress);
587 *sizep = PAGE_SIZE;
588 }
589 page = zbud_free_and_delist((struct zbudref *)pampd, eph,
590 &zsize, &zpages);
591 if (eph) {
592 if (page)
6f4336fb
KRW
593 dec_zcache_eph_pageframes();
594 dec_zcache_eph_zpages(zpages);
595 dec_zcache_eph_zbytes(zsize);
faca2ef7
DM
596 } else {
597 if (page)
6f4336fb
KRW
598 dec_zcache_pers_pageframes();
599 dec_zcache_pers_zpages(zpages);
600 dec_zcache_pers_zbytes(zsize);
faca2ef7
DM
601 }
602 if (!is_local_client(pool->client))
603 ramster_count_foreign_pages(eph, -1);
604 if (page)
605 zcache_free_page(page);
606 return ret;
607}
608
609/*
610 * free the pampd and remove it from any zcache lists
611 * pampd must no longer be pointed to from any tmem data structures!
612 */
613static void zcache_pampd_free(void *pampd, struct tmem_pool *pool,
614 struct tmem_oid *oid, uint32_t index, bool acct)
615{
616 struct page *page = NULL;
617 unsigned int zsize, zpages;
618
619 BUG_ON(preemptible());
620 if (pampd_is_remote(pampd)) {
621 BUG_ON(!ramster_enabled);
622 pampd = ramster_pampd_free(pampd, pool, oid, index, acct);
623 if (pampd == NULL)
624 return;
625 }
626 if (is_ephemeral(pool)) {
627 page = zbud_free_and_delist((struct zbudref *)pampd,
628 true, &zsize, &zpages);
629 if (page)
6f4336fb
KRW
630 dec_zcache_eph_pageframes();
631 dec_zcache_eph_zpages(zpages);
632 dec_zcache_eph_zbytes(zsize);
faca2ef7
DM
633 /* FIXME CONFIG_RAMSTER... check acct parameter? */
634 } else {
635 page = zbud_free_and_delist((struct zbudref *)pampd,
636 false, &zsize, &zpages);
637 if (page)
6f4336fb
KRW
638 dec_zcache_pers_pageframes();
639 dec_zcache_pers_zpages(zpages);
640 dec_zcache_pers_zbytes(zsize);
faca2ef7
DM
641 }
642 if (!is_local_client(pool->client))
643 ramster_count_foreign_pages(is_ephemeral(pool), -1);
644 if (page)
645 zcache_free_page(page);
646}
647
648static struct tmem_pamops zcache_pamops = {
649 .create_finish = zcache_pampd_create_finish,
650 .get_data = zcache_pampd_get_data,
651 .get_data_and_free = zcache_pampd_get_data_and_free,
652 .free = zcache_pampd_free,
653};
654
655/*
656 * zcache compression/decompression and related per-cpu stuff
657 */
658
659static DEFINE_PER_CPU(unsigned char *, zcache_dstmem);
660#define ZCACHE_DSTMEM_ORDER 1
661
662static void zcache_compress(struct page *from, void **out_va, unsigned *out_len)
663{
664 int ret;
665 unsigned char *dmem = __get_cpu_var(zcache_dstmem);
666 char *from_va;
667
668 BUG_ON(!irqs_disabled());
669 /* no buffer or no compressor so can't compress */
670 BUG_ON(dmem == NULL);
671 *out_len = PAGE_SIZE << ZCACHE_DSTMEM_ORDER;
672 from_va = kmap_atomic(from);
673 mb();
674 ret = zcache_comp_op(ZCACHE_COMPOP_COMPRESS, from_va, PAGE_SIZE, dmem,
675 out_len);
676 BUG_ON(ret);
677 *out_va = dmem;
678 kunmap_atomic(from_va);
679}
680
681static int zcache_comp_cpu_up(int cpu)
682{
683 struct crypto_comp *tfm;
684
685 tfm = crypto_alloc_comp(zcache_comp_name, 0, 0);
686 if (IS_ERR(tfm))
687 return NOTIFY_BAD;
688 *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = tfm;
689 return NOTIFY_OK;
690}
691
692static void zcache_comp_cpu_down(int cpu)
693{
694 struct crypto_comp *tfm;
695
696 tfm = *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu);
697 crypto_free_comp(tfm);
698 *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = NULL;
699}
700
701static int zcache_cpu_notifier(struct notifier_block *nb,
702 unsigned long action, void *pcpu)
703{
704 int ret, i, cpu = (long)pcpu;
705 struct zcache_preload *kp;
706
707 switch (action) {
708 case CPU_UP_PREPARE:
709 ret = zcache_comp_cpu_up(cpu);
710 if (ret != NOTIFY_OK) {
711 pr_err("%s: can't allocate compressor xform\n",
712 namestr);
713 return ret;
714 }
715 per_cpu(zcache_dstmem, cpu) = (void *)__get_free_pages(
716 GFP_KERNEL | __GFP_REPEAT, ZCACHE_DSTMEM_ORDER);
717 if (ramster_enabled)
718 ramster_cpu_up(cpu);
719 break;
720 case CPU_DEAD:
721 case CPU_UP_CANCELED:
722 zcache_comp_cpu_down(cpu);
723 free_pages((unsigned long)per_cpu(zcache_dstmem, cpu),
724 ZCACHE_DSTMEM_ORDER);
725 per_cpu(zcache_dstmem, cpu) = NULL;
726 kp = &per_cpu(zcache_preloads, cpu);
727 for (i = 0; i < ARRAY_SIZE(kp->objnodes); i++) {
728 if (kp->objnodes[i])
729 kmem_cache_free(zcache_objnode_cache,
730 kp->objnodes[i]);
731 }
732 if (kp->obj) {
733 kmem_cache_free(zcache_obj_cache, kp->obj);
734 kp->obj = NULL;
735 }
736 if (ramster_enabled)
737 ramster_cpu_down(cpu);
738 break;
739 default:
740 break;
741 }
742 return NOTIFY_OK;
743}
744
745static struct notifier_block zcache_cpu_notifier_block = {
746 .notifier_call = zcache_cpu_notifier
747};
748
749/*
750 * The following code interacts with the zbud eviction and zbud
751 * zombify code to access LRU pages
752 */
753
754static struct page *zcache_evict_eph_pageframe(void)
755{
756 struct page *page;
757 unsigned int zsize = 0, zpages = 0;
758
759 page = zbud_evict_pageframe_lru(&zsize, &zpages);
760 if (page == NULL)
761 goto out;
6f4336fb
KRW
762 dec_zcache_eph_zbytes(zsize);
763 dec_zcache_eph_zpages(zpages);
86d7de66 764 inc_zcache_evicted_eph_zpages(zpages);
6f4336fb 765 dec_zcache_eph_pageframes();
86d7de66 766 inc_zcache_evicted_eph_pageframes();
faca2ef7
DM
767out:
768 return page;
769}
770
76426daf
DM
771#ifdef CONFIG_ZCACHE_WRITEBACK
772
773static atomic_t zcache_outstanding_writeback_pages_atomic = ATOMIC_INIT(0);
774
3f007ca4
KRW
775static inline void inc_zcache_outstanding_writeback_pages(void)
776{
777 zcache_outstanding_writeback_pages =
778 atomic_inc_return(&zcache_outstanding_writeback_pages_atomic);
779}
6f4336fb
KRW
780static inline void dec_zcache_outstanding_writeback_pages(void)
781{
782 zcache_outstanding_writeback_pages =
783 atomic_dec_return(&zcache_outstanding_writeback_pages_atomic);
784};
faca2ef7
DM
785static void unswiz(struct tmem_oid oid, u32 index,
786 unsigned *type, pgoff_t *offset);
7892e560 787
faca2ef7 788/*
76426daf
DM
789 * Choose an LRU persistent pageframe and attempt to write it back to
790 * the backing swap disk by calling frontswap_writeback on both zpages.
faca2ef7
DM
791 *
792 * This is work-in-progress.
793 */
794
76426daf
DM
795static void zcache_end_swap_write(struct bio *bio, int err)
796{
797 end_swap_bio_write(bio, err);
6f4336fb 798 dec_zcache_outstanding_writeback_pages();
76426daf
DM
799 zcache_writtenback_pages++;
800}
801
802/*
803 * zcache_get_swap_cache_page
804 *
805 * This is an adaption of read_swap_cache_async()
806 *
807 * If success, page is returned in retpage
808 * Returns 0 if page was already in the swap cache, page is not locked
809 * Returns 1 if the new page needs to be populated, page is locked
810 */
811static int zcache_get_swap_cache_page(int type, pgoff_t offset,
812 struct page *new_page)
813{
814 struct page *found_page;
815 swp_entry_t entry = swp_entry(type, offset);
816 int err;
817
818 BUG_ON(new_page == NULL);
819 do {
820 /*
821 * First check the swap cache. Since this is normally
822 * called after lookup_swap_cache() failed, re-calling
823 * that would confuse statistics.
824 */
825 found_page = find_get_page(&swapper_space, entry.val);
826 if (found_page)
827 return 0;
828
829 /*
830 * call radix_tree_preload() while we can wait.
831 */
832 err = radix_tree_preload(GFP_KERNEL);
833 if (err)
834 break;
835
836 /*
837 * Swap entry may have been freed since our caller observed it.
838 */
839 err = swapcache_prepare(entry);
840 if (err == -EEXIST) { /* seems racy */
841 radix_tree_preload_end();
842 continue;
843 }
844 if (err) { /* swp entry is obsolete ? */
845 radix_tree_preload_end();
846 break;
847 }
848
849 /* May fail (-ENOMEM) if radix-tree node allocation failed. */
850 __set_page_locked(new_page);
851 SetPageSwapBacked(new_page);
852 err = __add_to_swap_cache(new_page, entry);
853 if (likely(!err)) {
854 radix_tree_preload_end();
855 lru_cache_add_anon(new_page);
856 return 1;
857 }
858 radix_tree_preload_end();
859 ClearPageSwapBacked(new_page);
860 __clear_page_locked(new_page);
861 /*
862 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
863 * clear SWAP_HAS_CACHE flag.
864 */
865 swapcache_free(entry, NULL);
866 /* FIXME: is it possible to get here without err==-ENOMEM?
867 * If not, we can dispense with the do loop, use goto retry */
868 } while (err != -ENOMEM);
869
870 return -ENOMEM;
871}
872
873/*
874 * Given a frontswap zpage in zcache (identified by type/offset) and
875 * an empty page, put the page into the swap cache, use frontswap
876 * to get the page from zcache into the empty page, then give it
877 * to the swap subsystem to send to disk (carefully avoiding the
878 * possibility that frontswap might snatch it back).
879 * Returns < 0 if error, 0 if successful, and 1 if successful but
880 * the newpage passed in not needed and should be freed.
881 */
882static int zcache_frontswap_writeback_zpage(int type, pgoff_t offset,
883 struct page *newpage)
884{
885 struct page *page = newpage;
886 int ret;
887 struct writeback_control wbc = {
888 .sync_mode = WB_SYNC_NONE,
889 };
890
891 ret = zcache_get_swap_cache_page(type, offset, page);
892 if (ret < 0)
893 return ret;
894 else if (ret == 0) {
895 /* more uptodate page is already in swapcache */
896 __frontswap_invalidate_page(type, offset);
897 return 1;
898 }
899
900 BUG_ON(!frontswap_has_exclusive_gets); /* load must also invalidate */
901 /* FIXME: how is it possible to get here when page is unlocked? */
902 __frontswap_load(page);
903 SetPageUptodate(page); /* above does SetPageDirty, is that enough? */
904
905 /* start writeback */
906 SetPageReclaim(page);
907 /*
908 * Return value is ignored here because it doesn't change anything
909 * for us. Page is returned unlocked.
910 */
911 (void)__swap_writepage(page, &wbc, zcache_end_swap_write);
912 page_cache_release(page);
3f007ca4 913 inc_zcache_outstanding_writeback_pages();
76426daf
DM
914
915 return 0;
916}
917
918/*
919 * The following is still a magic number... we want to allow forward progress
920 * for writeback because it clears out needed RAM when under pressure, but
921 * we don't want to allow writeback to absorb and queue too many GFP_KERNEL
922 * pages if the swap device is very slow.
923 */
924#define ZCACHE_MAX_OUTSTANDING_WRITEBACK_PAGES 6400
925
926/*
927 * Try to allocate two free pages, first using a non-aggressive alloc,
928 * then by evicting zcache ephemeral (clean pagecache) pages, and last
929 * by aggressive GFP_KERNEL alloc. We allow zbud to choose a pageframe
930 * consisting of 1-2 zbuds/zpages, then call the writeback_zpage helper
931 * function above for each.
932 */
933static int zcache_frontswap_writeback(void)
faca2ef7
DM
934{
935 struct tmem_handle th[2];
76426daf
DM
936 int ret = 0;
937 int nzbuds, writeback_ret;
faca2ef7 938 unsigned type;
76426daf 939 struct page *znewpage1 = NULL, *znewpage2 = NULL;
faca2ef7 940 struct page *evictpage1 = NULL, *evictpage2 = NULL;
76426daf
DM
941 struct page *newpage1 = NULL, *newpage2 = NULL;
942 struct page *page1 = NULL, *page2 = NULL;
faca2ef7
DM
943 pgoff_t offset;
944
76426daf
DM
945 znewpage1 = alloc_page(ZCACHE_GFP_MASK);
946 znewpage2 = alloc_page(ZCACHE_GFP_MASK);
947 if (znewpage1 == NULL)
faca2ef7 948 evictpage1 = zcache_evict_eph_pageframe();
76426daf 949 if (znewpage2 == NULL)
faca2ef7 950 evictpage2 = zcache_evict_eph_pageframe();
76426daf
DM
951
952 if ((evictpage1 == NULL || evictpage2 == NULL) &&
953 atomic_read(&zcache_outstanding_writeback_pages_atomic) >
954 ZCACHE_MAX_OUTSTANDING_WRITEBACK_PAGES) {
faca2ef7 955 goto free_and_out;
76426daf
DM
956 }
957 if (znewpage1 == NULL && evictpage1 == NULL)
958 newpage1 = alloc_page(GFP_KERNEL);
959 if (znewpage2 == NULL && evictpage2 == NULL)
960 newpage2 = alloc_page(GFP_KERNEL);
961 if (newpage1 == NULL || newpage2 == NULL)
962 goto free_and_out;
963
964 /* ok, we have two pageframes pre-allocated, get a pair of zbuds */
faca2ef7
DM
965 nzbuds = zbud_make_zombie_lru(&th[0], NULL, NULL, false);
966 if (nzbuds == 0) {
967 ret = -ENOENT;
968 goto free_and_out;
969 }
76426daf
DM
970
971 /* process the first zbud */
faca2ef7 972 unswiz(th[0].oid, th[0].index, &type, &offset);
76426daf
DM
973 page1 = (znewpage1 != NULL) ? znewpage1 :
974 ((newpage1 != NULL) ? newpage1 : evictpage1);
975 writeback_ret = zcache_frontswap_writeback_zpage(type, offset, page1);
976 if (writeback_ret < 0) {
977 ret = -ENOMEM;
faca2ef7 978 goto free_and_out;
faca2ef7 979 }
76426daf
DM
980 if (evictpage1 != NULL)
981 zcache_pageframes_freed =
982 atomic_inc_return(&zcache_pageframes_freed_atomic);
983 if (writeback_ret == 0) {
984 /* zcache_get_swap_cache_page will free, don't double free */
985 znewpage1 = NULL;
986 newpage1 = NULL;
987 evictpage1 = NULL;
988 }
989 if (nzbuds < 2)
990 goto free_and_out;
991
992 /* if there is a second zbud, process it */
993 unswiz(th[1].oid, th[1].index, &type, &offset);
994 page2 = (znewpage2 != NULL) ? znewpage2 :
995 ((newpage2 != NULL) ? newpage2 : evictpage2);
996 writeback_ret = zcache_frontswap_writeback_zpage(type, offset, page2);
997 if (writeback_ret < 0) {
998 ret = -ENOMEM;
999 goto free_and_out;
1000 }
1001 if (evictpage2 != NULL)
1002 zcache_pageframes_freed =
1003 atomic_inc_return(&zcache_pageframes_freed_atomic);
1004 if (writeback_ret == 0) {
1005 znewpage2 = NULL;
1006 newpage2 = NULL;
1007 evictpage2 = NULL;
1008 }
faca2ef7
DM
1009
1010free_and_out:
76426daf
DM
1011 if (znewpage1 != NULL)
1012 page_cache_release(znewpage1);
1013 if (znewpage2 != NULL)
1014 page_cache_release(znewpage2);
faca2ef7 1015 if (newpage1 != NULL)
76426daf 1016 page_cache_release(newpage1);
faca2ef7 1017 if (newpage2 != NULL)
76426daf 1018 page_cache_release(newpage2);
faca2ef7
DM
1019 if (evictpage1 != NULL)
1020 zcache_free_page(evictpage1);
1021 if (evictpage2 != NULL)
1022 zcache_free_page(evictpage2);
faca2ef7
DM
1023 return ret;
1024}
76426daf 1025#endif /* CONFIG_ZCACHE_WRITEBACK */
faca2ef7
DM
1026
1027/*
1028 * When zcache is disabled ("frozen"), pools can be created and destroyed,
1029 * but all puts (and thus all other operations that require memory allocation)
1030 * must fail. If zcache is unfrozen, accepts puts, then frozen again,
1031 * data consistency requires all puts while frozen to be converted into
1032 * flushes.
1033 */
1034static bool zcache_freeze;
1035
1036/*
1037 * This zcache shrinker interface reduces the number of ephemeral pageframes
1038 * used by zcache to approximately the same as the total number of LRU_FILE
76426daf
DM
1039 * pageframes in use, and now also reduces the number of persistent pageframes
1040 * used by zcache to approximately the same as the total number of LRU_ANON
1041 * pageframes in use. FIXME POLICY: Probably the writeback should only occur
1042 * if the eviction doesn't free enough pages.
faca2ef7
DM
1043 */
1044static int shrink_zcache_memory(struct shrinker *shrink,
1045 struct shrink_control *sc)
1046{
1047 static bool in_progress;
1048 int ret = -1;
1049 int nr = sc->nr_to_scan;
1050 int nr_evict = 0;
76426daf 1051 int nr_writeback = 0;
faca2ef7 1052 struct page *page;
76426daf 1053 int file_pageframes_inuse, anon_pageframes_inuse;
faca2ef7
DM
1054
1055 if (nr <= 0)
1056 goto skip_evict;
1057
1058 /* don't allow more than one eviction thread at a time */
1059 if (in_progress)
1060 goto skip_evict;
1061
1062 in_progress = true;
1063
1064 /* we are going to ignore nr, and target a different value */
1065 zcache_last_active_file_pageframes =
1066 global_page_state(NR_LRU_BASE + LRU_ACTIVE_FILE);
1067 zcache_last_inactive_file_pageframes =
1068 global_page_state(NR_LRU_BASE + LRU_INACTIVE_FILE);
76426daf
DM
1069 file_pageframes_inuse = zcache_last_active_file_pageframes +
1070 zcache_last_inactive_file_pageframes;
1071 if (zcache_eph_pageframes > file_pageframes_inuse)
1072 nr_evict = zcache_eph_pageframes - file_pageframes_inuse;
1073 else
1074 nr_evict = 0;
faca2ef7
DM
1075 while (nr_evict-- > 0) {
1076 page = zcache_evict_eph_pageframe();
1077 if (page == NULL)
1078 break;
1079 zcache_free_page(page);
1080 }
1081
1082 zcache_last_active_anon_pageframes =
1083 global_page_state(NR_LRU_BASE + LRU_ACTIVE_ANON);
1084 zcache_last_inactive_anon_pageframes =
1085 global_page_state(NR_LRU_BASE + LRU_INACTIVE_ANON);
76426daf
DM
1086 anon_pageframes_inuse = zcache_last_active_anon_pageframes +
1087 zcache_last_inactive_anon_pageframes;
1088 if (zcache_pers_pageframes > anon_pageframes_inuse)
1089 nr_writeback = zcache_pers_pageframes - anon_pageframes_inuse;
1090 else
1091 nr_writeback = 0;
1092 while (nr_writeback-- > 0) {
1093#ifdef CONFIG_ZCACHE_WRITEBACK
1094 int writeback_ret;
1095 writeback_ret = zcache_frontswap_writeback();
1096 if (writeback_ret == -ENOMEM)
1097#endif
faca2ef7
DM
1098 break;
1099 }
faca2ef7
DM
1100 in_progress = false;
1101
1102skip_evict:
1103 /* resample: has changed, but maybe not all the way yet */
1104 zcache_last_active_file_pageframes =
1105 global_page_state(NR_LRU_BASE + LRU_ACTIVE_FILE);
1106 zcache_last_inactive_file_pageframes =
1107 global_page_state(NR_LRU_BASE + LRU_INACTIVE_FILE);
1108 ret = zcache_eph_pageframes - zcache_last_active_file_pageframes +
1109 zcache_last_inactive_file_pageframes;
1110 if (ret < 0)
1111 ret = 0;
1112 return ret;
1113}
1114
1115static struct shrinker zcache_shrinker = {
1116 .shrink = shrink_zcache_memory,
1117 .seeks = DEFAULT_SEEKS,
1118};
1119
1120/*
1121 * zcache shims between cleancache/frontswap ops and tmem
1122 */
1123
1124/* FIXME rename these core routines to zcache_tmemput etc? */
1125int zcache_put_page(int cli_id, int pool_id, struct tmem_oid *oidp,
1126 uint32_t index, void *page,
1127 unsigned int size, bool raw, int ephemeral)
1128{
1129 struct tmem_pool *pool;
1130 struct tmem_handle th;
1131 int ret = -1;
1132 void *pampd = NULL;
1133
1134 BUG_ON(!irqs_disabled());
1135 pool = zcache_get_pool_by_id(cli_id, pool_id);
1136 if (unlikely(pool == NULL))
1137 goto out;
1138 if (!zcache_freeze) {
1139 ret = 0;
1140 th.client_id = cli_id;
1141 th.pool_id = pool_id;
1142 th.oid = *oidp;
1143 th.index = index;
1144 pampd = zcache_pampd_create((char *)page, size, raw,
1145 ephemeral, &th);
1146 if (pampd == NULL) {
1147 ret = -ENOMEM;
1148 if (ephemeral)
86d7de66 1149 inc_zcache_failed_eph_puts();
faca2ef7 1150 else
86d7de66 1151 inc_zcache_failed_pers_puts();
faca2ef7
DM
1152 } else {
1153 if (ramster_enabled)
1154 ramster_do_preload_flnode(pool);
1155 ret = tmem_put(pool, oidp, index, 0, pampd);
1156 if (ret < 0)
1157 BUG();
1158 }
1159 zcache_put_pool(pool);
1160 } else {
86d7de66 1161 inc_zcache_put_to_flush();
faca2ef7
DM
1162 if (ramster_enabled)
1163 ramster_do_preload_flnode(pool);
1164 if (atomic_read(&pool->obj_count) > 0)
1165 /* the put fails whether the flush succeeds or not */
1166 (void)tmem_flush_page(pool, oidp, index);
1167 zcache_put_pool(pool);
1168 }
1169out:
1170 return ret;
1171}
1172
1173int zcache_get_page(int cli_id, int pool_id, struct tmem_oid *oidp,
1174 uint32_t index, void *page,
1175 size_t *sizep, bool raw, int get_and_free)
1176{
1177 struct tmem_pool *pool;
1178 int ret = -1;
1179 bool eph;
1180
1181 if (!raw) {
1182 BUG_ON(irqs_disabled());
1183 BUG_ON(in_softirq());
1184 }
1185 pool = zcache_get_pool_by_id(cli_id, pool_id);
1186 eph = is_ephemeral(pool);
1187 if (likely(pool != NULL)) {
1188 if (atomic_read(&pool->obj_count) > 0)
1189 ret = tmem_get(pool, oidp, index, (char *)(page),
1190 sizep, raw, get_and_free);
1191 zcache_put_pool(pool);
1192 }
1193 WARN_ONCE((!is_ephemeral(pool) && (ret != 0)),
1194 "zcache_get fails on persistent pool, "
1195 "bad things are very likely to happen soon\n");
1196#ifdef RAMSTER_TESTING
1197 if (ret != 0 && ret != -1 && !(ret == -EINVAL && is_ephemeral(pool)))
1198 pr_err("TESTING zcache_get tmem_get returns ret=%d\n", ret);
1199#endif
1200 return ret;
1201}
1202
1203int zcache_flush_page(int cli_id, int pool_id,
1204 struct tmem_oid *oidp, uint32_t index)
1205{
1206 struct tmem_pool *pool;
1207 int ret = -1;
1208 unsigned long flags;
1209
1210 local_irq_save(flags);
86d7de66 1211 inc_zcache_flush_total();
faca2ef7
DM
1212 pool = zcache_get_pool_by_id(cli_id, pool_id);
1213 if (ramster_enabled)
1214 ramster_do_preload_flnode(pool);
1215 if (likely(pool != NULL)) {
1216 if (atomic_read(&pool->obj_count) > 0)
1217 ret = tmem_flush_page(pool, oidp, index);
1218 zcache_put_pool(pool);
1219 }
1220 if (ret >= 0)
86d7de66 1221 inc_zcache_flush_found();
faca2ef7
DM
1222 local_irq_restore(flags);
1223 return ret;
1224}
1225
1226int zcache_flush_object(int cli_id, int pool_id,
1227 struct tmem_oid *oidp)
1228{
1229 struct tmem_pool *pool;
1230 int ret = -1;
1231 unsigned long flags;
1232
1233 local_irq_save(flags);
86d7de66 1234 inc_zcache_flobj_total();
faca2ef7
DM
1235 pool = zcache_get_pool_by_id(cli_id, pool_id);
1236 if (ramster_enabled)
1237 ramster_do_preload_flnode(pool);
1238 if (likely(pool != NULL)) {
1239 if (atomic_read(&pool->obj_count) > 0)
1240 ret = tmem_flush_object(pool, oidp);
1241 zcache_put_pool(pool);
1242 }
1243 if (ret >= 0)
86d7de66 1244 inc_zcache_flobj_found();
faca2ef7
DM
1245 local_irq_restore(flags);
1246 return ret;
1247}
1248
1249static int zcache_client_destroy_pool(int cli_id, int pool_id)
1250{
1251 struct tmem_pool *pool = NULL;
1252 struct zcache_client *cli = NULL;
1253 int ret = -1;
1254
1255 if (pool_id < 0)
1256 goto out;
1257 if (cli_id == LOCAL_CLIENT)
1258 cli = &zcache_host;
1259 else if ((unsigned int)cli_id < MAX_CLIENTS)
1260 cli = &zcache_clients[cli_id];
1261 if (cli == NULL)
1262 goto out;
1263 atomic_inc(&cli->refcount);
1264 pool = cli->tmem_pools[pool_id];
1265 if (pool == NULL)
1266 goto out;
1267 cli->tmem_pools[pool_id] = NULL;
1268 /* wait for pool activity on other cpus to quiesce */
1269 while (atomic_read(&pool->refcount) != 0)
1270 ;
1271 atomic_dec(&cli->refcount);
1272 local_bh_disable();
1273 ret = tmem_destroy_pool(pool);
1274 local_bh_enable();
1275 kfree(pool);
1276 if (cli_id == LOCAL_CLIENT)
1277 pr_info("%s: destroyed local pool id=%d\n", namestr, pool_id);
1278 else
1279 pr_info("%s: destroyed pool id=%d, client=%d\n",
1280 namestr, pool_id, cli_id);
1281out:
1282 return ret;
1283}
1284
1285int zcache_new_pool(uint16_t cli_id, uint32_t flags)
1286{
1287 int poolid = -1;
1288 struct tmem_pool *pool;
1289 struct zcache_client *cli = NULL;
1290
1291 if (cli_id == LOCAL_CLIENT)
1292 cli = &zcache_host;
1293 else if ((unsigned int)cli_id < MAX_CLIENTS)
1294 cli = &zcache_clients[cli_id];
1295 if (cli == NULL)
1296 goto out;
1297 atomic_inc(&cli->refcount);
1298 pool = kmalloc(sizeof(struct tmem_pool), GFP_ATOMIC);
78110bb8 1299 if (pool == NULL)
faca2ef7 1300 goto out;
faca2ef7
DM
1301
1302 for (poolid = 0; poolid < MAX_POOLS_PER_CLIENT; poolid++)
1303 if (cli->tmem_pools[poolid] == NULL)
1304 break;
1305 if (poolid >= MAX_POOLS_PER_CLIENT) {
1306 pr_info("%s: pool creation failed: max exceeded\n", namestr);
1307 kfree(pool);
1308 poolid = -1;
1309 goto out;
1310 }
1311 atomic_set(&pool->refcount, 0);
1312 pool->client = cli;
1313 pool->pool_id = poolid;
1314 tmem_new_pool(pool, flags);
1315 cli->tmem_pools[poolid] = pool;
1316 if (cli_id == LOCAL_CLIENT)
1317 pr_info("%s: created %s local tmem pool, id=%d\n", namestr,
1318 flags & TMEM_POOL_PERSIST ? "persistent" : "ephemeral",
1319 poolid);
1320 else
1321 pr_info("%s: created %s tmem pool, id=%d, client=%d\n", namestr,
1322 flags & TMEM_POOL_PERSIST ? "persistent" : "ephemeral",
1323 poolid, cli_id);
1324out:
1325 if (cli != NULL)
1326 atomic_dec(&cli->refcount);
1327 return poolid;
1328}
1329
1330static int zcache_local_new_pool(uint32_t flags)
1331{
1332 return zcache_new_pool(LOCAL_CLIENT, flags);
1333}
1334
f0290de2 1335int zcache_autocreate_pool(unsigned int cli_id, unsigned int pool_id, bool eph)
faca2ef7
DM
1336{
1337 struct tmem_pool *pool;
76426daf 1338 struct zcache_client *cli;
faca2ef7
DM
1339 uint32_t flags = eph ? 0 : TMEM_POOL_PERSIST;
1340 int ret = -1;
1341
1342 BUG_ON(!ramster_enabled);
1343 if (cli_id == LOCAL_CLIENT)
1344 goto out;
1345 if (pool_id >= MAX_POOLS_PER_CLIENT)
1346 goto out;
f0290de2
DC
1347 if (cli_id >= MAX_CLIENTS)
1348 goto out;
1349
1350 cli = &zcache_clients[cli_id];
faca2ef7
DM
1351 if ((eph && disable_cleancache) || (!eph && disable_frontswap)) {
1352 pr_err("zcache_autocreate_pool: pool type disabled\n");
1353 goto out;
1354 }
1355 if (!cli->allocated) {
1356 if (zcache_new_client(cli_id)) {
1357 pr_err("zcache_autocreate_pool: can't create client\n");
1358 goto out;
1359 }
1360 cli = &zcache_clients[cli_id];
1361 }
1362 atomic_inc(&cli->refcount);
1363 pool = cli->tmem_pools[pool_id];
1364 if (pool != NULL) {
1365 if (pool->persistent && eph) {
1366 pr_err("zcache_autocreate_pool: type mismatch\n");
1367 goto out;
1368 }
1369 ret = 0;
1370 goto out;
1371 }
1372 pool = kmalloc(sizeof(struct tmem_pool), GFP_KERNEL);
78110bb8 1373 if (pool == NULL)
faca2ef7 1374 goto out;
78110bb8 1375
faca2ef7
DM
1376 atomic_set(&pool->refcount, 0);
1377 pool->client = cli;
1378 pool->pool_id = pool_id;
1379 tmem_new_pool(pool, flags);
1380 cli->tmem_pools[pool_id] = pool;
1381 pr_info("%s: AUTOcreated %s tmem poolid=%d, for remote client=%d\n",
1382 namestr, flags & TMEM_POOL_PERSIST ? "persistent" : "ephemeral",
1383 pool_id, cli_id);
1384 ret = 0;
1385out:
1386 if (cli != NULL)
1387 atomic_dec(&cli->refcount);
1388 return ret;
1389}
1390
1391/**********
1392 * Two kernel functionalities currently can be layered on top of tmem.
1393 * These are "cleancache" which is used as a second-chance cache for clean
1394 * page cache pages; and "frontswap" which is used for swap pages
1395 * to avoid writes to disk. A generic "shim" is provided here for each
1396 * to translate in-kernel semantics to zcache semantics.
1397 */
1398
1399static void zcache_cleancache_put_page(int pool_id,
1400 struct cleancache_filekey key,
1401 pgoff_t index, struct page *page)
1402{
1403 u32 ind = (u32) index;
1404 struct tmem_oid oid = *(struct tmem_oid *)&key;
1405
1406 if (!disable_cleancache_ignore_nonactive && !PageWasActive(page)) {
86d7de66 1407 inc_zcache_eph_nonactive_puts_ignored();
faca2ef7
DM
1408 return;
1409 }
1410 if (likely(ind == index))
1411 (void)zcache_put_page(LOCAL_CLIENT, pool_id, &oid, index,
1412 page, PAGE_SIZE, false, 1);
1413}
1414
1415static int zcache_cleancache_get_page(int pool_id,
1416 struct cleancache_filekey key,
1417 pgoff_t index, struct page *page)
1418{
1419 u32 ind = (u32) index;
1420 struct tmem_oid oid = *(struct tmem_oid *)&key;
1421 size_t size;
1422 int ret = -1;
1423
1424 if (likely(ind == index)) {
1425 ret = zcache_get_page(LOCAL_CLIENT, pool_id, &oid, index,
1426 page, &size, false, 0);
1427 BUG_ON(ret >= 0 && size != PAGE_SIZE);
1428 if (ret == 0)
1429 SetPageWasActive(page);
1430 }
1431 return ret;
1432}
1433
1434static void zcache_cleancache_flush_page(int pool_id,
1435 struct cleancache_filekey key,
1436 pgoff_t index)
1437{
1438 u32 ind = (u32) index;
1439 struct tmem_oid oid = *(struct tmem_oid *)&key;
1440
1441 if (likely(ind == index))
1442 (void)zcache_flush_page(LOCAL_CLIENT, pool_id, &oid, ind);
1443}
1444
1445static void zcache_cleancache_flush_inode(int pool_id,
1446 struct cleancache_filekey key)
1447{
1448 struct tmem_oid oid = *(struct tmem_oid *)&key;
1449
1450 (void)zcache_flush_object(LOCAL_CLIENT, pool_id, &oid);
1451}
1452
1453static void zcache_cleancache_flush_fs(int pool_id)
1454{
1455 if (pool_id >= 0)
1456 (void)zcache_client_destroy_pool(LOCAL_CLIENT, pool_id);
1457}
1458
1459static int zcache_cleancache_init_fs(size_t pagesize)
1460{
1461 BUG_ON(sizeof(struct cleancache_filekey) !=
1462 sizeof(struct tmem_oid));
1463 BUG_ON(pagesize != PAGE_SIZE);
1464 return zcache_local_new_pool(0);
1465}
1466
1467static int zcache_cleancache_init_shared_fs(char *uuid, size_t pagesize)
1468{
1469 /* shared pools are unsupported and map to private */
1470 BUG_ON(sizeof(struct cleancache_filekey) !=
1471 sizeof(struct tmem_oid));
1472 BUG_ON(pagesize != PAGE_SIZE);
1473 return zcache_local_new_pool(0);
1474}
1475
1476static struct cleancache_ops zcache_cleancache_ops = {
1477 .put_page = zcache_cleancache_put_page,
1478 .get_page = zcache_cleancache_get_page,
1479 .invalidate_page = zcache_cleancache_flush_page,
1480 .invalidate_inode = zcache_cleancache_flush_inode,
1481 .invalidate_fs = zcache_cleancache_flush_fs,
1482 .init_shared_fs = zcache_cleancache_init_shared_fs,
1483 .init_fs = zcache_cleancache_init_fs
1484};
1485
1486struct cleancache_ops zcache_cleancache_register_ops(void)
1487{
1488 struct cleancache_ops old_ops =
1489 cleancache_register_ops(&zcache_cleancache_ops);
1490
1491 return old_ops;
1492}
1493
1494/* a single tmem poolid is used for all frontswap "types" (swapfiles) */
1495static int zcache_frontswap_poolid __read_mostly = -1;
1496
1497/*
1498 * Swizzling increases objects per swaptype, increasing tmem concurrency
1499 * for heavy swaploads. Later, larger nr_cpus -> larger SWIZ_BITS
1500 * Setting SWIZ_BITS to 27 basically reconstructs the swap entry from
1501 * frontswap_get_page(), but has side-effects. Hence using 8.
1502 */
1503#define SWIZ_BITS 8
1504#define SWIZ_MASK ((1 << SWIZ_BITS) - 1)
1505#define _oswiz(_type, _ind) ((_type << SWIZ_BITS) | (_ind & SWIZ_MASK))
1506#define iswiz(_ind) (_ind >> SWIZ_BITS)
1507
1508static inline struct tmem_oid oswiz(unsigned type, u32 ind)
1509{
1510 struct tmem_oid oid = { .oid = { 0 } };
1511 oid.oid[0] = _oswiz(type, ind);
1512 return oid;
1513}
1514
76426daf 1515#ifdef CONFIG_ZCACHE_WRITEBACK
faca2ef7
DM
1516static void unswiz(struct tmem_oid oid, u32 index,
1517 unsigned *type, pgoff_t *offset)
1518{
1519 *type = (unsigned)(oid.oid[0] >> SWIZ_BITS);
1520 *offset = (pgoff_t)((index << SWIZ_BITS) |
1521 (oid.oid[0] & SWIZ_MASK));
1522}
7892e560 1523#endif
faca2ef7
DM
1524
1525static int zcache_frontswap_put_page(unsigned type, pgoff_t offset,
1526 struct page *page)
1527{
1528 u64 ind64 = (u64)offset;
1529 u32 ind = (u32)offset;
1530 struct tmem_oid oid = oswiz(type, ind);
1531 int ret = -1;
1532 unsigned long flags;
faca2ef7
DM
1533
1534 BUG_ON(!PageLocked(page));
1535 if (!disable_frontswap_ignore_nonactive && !PageWasActive(page)) {
86d7de66 1536 inc_zcache_pers_nonactive_puts_ignored();
faca2ef7
DM
1537 ret = -ERANGE;
1538 goto out;
1539 }
1540 if (likely(ind64 == ind)) {
1541 local_irq_save(flags);
1542 ret = zcache_put_page(LOCAL_CLIENT, zcache_frontswap_poolid,
1543 &oid, iswiz(ind),
1544 page, PAGE_SIZE, false, 0);
1545 local_irq_restore(flags);
1546 }
1547out:
1548 return ret;
1549}
1550
1551/* returns 0 if the page was successfully gotten from frontswap, -1 if
1552 * was not present (should never happen!) */
1553static int zcache_frontswap_get_page(unsigned type, pgoff_t offset,
1554 struct page *page)
1555{
1556 u64 ind64 = (u64)offset;
1557 u32 ind = (u32)offset;
1558 struct tmem_oid oid = oswiz(type, ind);
1559 size_t size;
1560 int ret = -1, get_and_free;
1561
1562 if (frontswap_has_exclusive_gets)
1563 get_and_free = 1;
1564 else
1565 get_and_free = -1;
1566 BUG_ON(!PageLocked(page));
1567 if (likely(ind64 == ind)) {
1568 ret = zcache_get_page(LOCAL_CLIENT, zcache_frontswap_poolid,
1569 &oid, iswiz(ind),
1570 page, &size, false, get_and_free);
1571 BUG_ON(ret >= 0 && size != PAGE_SIZE);
1572 }
1573 return ret;
1574}
1575
1576/* flush a single page from frontswap */
1577static void zcache_frontswap_flush_page(unsigned type, pgoff_t offset)
1578{
1579 u64 ind64 = (u64)offset;
1580 u32 ind = (u32)offset;
1581 struct tmem_oid oid = oswiz(type, ind);
1582
1583 if (likely(ind64 == ind))
1584 (void)zcache_flush_page(LOCAL_CLIENT, zcache_frontswap_poolid,
1585 &oid, iswiz(ind));
1586}
1587
1588/* flush all pages from the passed swaptype */
1589static void zcache_frontswap_flush_area(unsigned type)
1590{
1591 struct tmem_oid oid;
1592 int ind;
1593
1594 for (ind = SWIZ_MASK; ind >= 0; ind--) {
1595 oid = oswiz(type, ind);
1596 (void)zcache_flush_object(LOCAL_CLIENT,
1597 zcache_frontswap_poolid, &oid);
1598 }
1599}
1600
1601static void zcache_frontswap_init(unsigned ignored)
1602{
1603 /* a single tmem poolid is used for all frontswap "types" (swapfiles) */
1604 if (zcache_frontswap_poolid < 0)
1605 zcache_frontswap_poolid =
1606 zcache_local_new_pool(TMEM_POOL_PERSIST);
1607}
1608
1609static struct frontswap_ops zcache_frontswap_ops = {
1610 .store = zcache_frontswap_put_page,
1611 .load = zcache_frontswap_get_page,
1612 .invalidate_page = zcache_frontswap_flush_page,
1613 .invalidate_area = zcache_frontswap_flush_area,
1614 .init = zcache_frontswap_init
1615};
1616
1617struct frontswap_ops zcache_frontswap_register_ops(void)
1618{
1619 struct frontswap_ops old_ops =
1620 frontswap_register_ops(&zcache_frontswap_ops);
1621
1622 return old_ops;
1623}
1624
1625/*
1626 * zcache initialization
1627 * NOTE FOR NOW zcache or ramster MUST BE PROVIDED AS A KERNEL BOOT PARAMETER
1628 * OR NOTHING HAPPENS!
1629 */
1630
1631static int __init enable_zcache(char *s)
1632{
7937d74a 1633 zcache_enabled = true;
faca2ef7
DM
1634 return 1;
1635}
1636__setup("zcache", enable_zcache);
1637
1638static int __init enable_ramster(char *s)
1639{
7937d74a 1640 zcache_enabled = true;
faca2ef7 1641#ifdef CONFIG_RAMSTER
7937d74a 1642 ramster_enabled = true;
faca2ef7
DM
1643#endif
1644 return 1;
1645}
1646__setup("ramster", enable_ramster);
1647
1648/* allow independent dynamic disabling of cleancache and frontswap */
1649
1650static int __init no_cleancache(char *s)
1651{
7937d74a 1652 disable_cleancache = true;
faca2ef7
DM
1653 return 1;
1654}
1655
1656__setup("nocleancache", no_cleancache);
1657
1658static int __init no_frontswap(char *s)
1659{
7937d74a 1660 disable_frontswap = true;
faca2ef7
DM
1661 return 1;
1662}
1663
1664__setup("nofrontswap", no_frontswap);
1665
1666static int __init no_frontswap_exclusive_gets(char *s)
1667{
1668 frontswap_has_exclusive_gets = false;
1669 return 1;
1670}
1671
1672__setup("nofrontswapexclusivegets", no_frontswap_exclusive_gets);
1673
1674static int __init no_frontswap_ignore_nonactive(char *s)
1675{
7937d74a 1676 disable_frontswap_ignore_nonactive = true;
faca2ef7
DM
1677 return 1;
1678}
1679
1680__setup("nofrontswapignorenonactive", no_frontswap_ignore_nonactive);
1681
1682static int __init no_cleancache_ignore_nonactive(char *s)
1683{
7937d74a 1684 disable_cleancache_ignore_nonactive = true;
faca2ef7
DM
1685 return 1;
1686}
1687
1688__setup("nocleancacheignorenonactive", no_cleancache_ignore_nonactive);
1689
1690static int __init enable_zcache_compressor(char *s)
1691{
aeac64aa 1692 strlcpy(zcache_comp_name, s, sizeof(zcache_comp_name));
7937d74a 1693 zcache_enabled = true;
faca2ef7
DM
1694 return 1;
1695}
1696__setup("zcache=", enable_zcache_compressor);
1697
1698
1699static int __init zcache_comp_init(void)
1700{
1701 int ret = 0;
1702
1703 /* check crypto algorithm */
1704 if (*zcache_comp_name != '\0') {
1705 ret = crypto_has_comp(zcache_comp_name, 0, 0);
1706 if (!ret)
1707 pr_info("zcache: %s not supported\n",
1708 zcache_comp_name);
1709 }
1710 if (!ret)
1711 strcpy(zcache_comp_name, "lzo");
1712 ret = crypto_has_comp(zcache_comp_name, 0, 0);
1713 if (!ret) {
1714 ret = 1;
1715 goto out;
1716 }
1717 pr_info("zcache: using %s compressor\n", zcache_comp_name);
1718
1719 /* alloc percpu transforms */
1720 ret = 0;
1721 zcache_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *);
1722 if (!zcache_comp_pcpu_tfms)
1723 ret = 1;
1724out:
1725 return ret;
1726}
1727
1728static int __init zcache_init(void)
1729{
1730 int ret = 0;
1731
1732 if (ramster_enabled) {
1733 namestr = "ramster";
1734 ramster_register_pamops(&zcache_pamops);
1735 }
1736#ifdef CONFIG_DEBUG_FS
1737 zcache_debugfs_init();
1738#endif
1739 if (zcache_enabled) {
1740 unsigned int cpu;
1741
1742 tmem_register_hostops(&zcache_hostops);
1743 tmem_register_pamops(&zcache_pamops);
1744 ret = register_cpu_notifier(&zcache_cpu_notifier_block);
1745 if (ret) {
1746 pr_err("%s: can't register cpu notifier\n", namestr);
1747 goto out;
1748 }
1749 ret = zcache_comp_init();
1750 if (ret) {
1751 pr_err("%s: compressor initialization failed\n",
1752 namestr);
1753 goto out;
1754 }
1755 for_each_online_cpu(cpu) {
1756 void *pcpu = (void *)(long)cpu;
1757 zcache_cpu_notifier(&zcache_cpu_notifier_block,
1758 CPU_UP_PREPARE, pcpu);
1759 }
1760 }
1761 zcache_objnode_cache = kmem_cache_create("zcache_objnode",
1762 sizeof(struct tmem_objnode), 0, 0, NULL);
1763 zcache_obj_cache = kmem_cache_create("zcache_obj",
1764 sizeof(struct tmem_obj), 0, 0, NULL);
1765 ret = zcache_new_client(LOCAL_CLIENT);
1766 if (ret) {
1767 pr_err("%s: can't create client\n", namestr);
1768 goto out;
1769 }
1770 zbud_init();
1771 if (zcache_enabled && !disable_cleancache) {
1772 struct cleancache_ops old_ops;
1773
1774 register_shrinker(&zcache_shrinker);
1775 old_ops = zcache_cleancache_register_ops();
1776 pr_info("%s: cleancache enabled using kernel transcendent "
1777 "memory and compression buddies\n", namestr);
67e2cba4 1778#ifdef CONFIG_ZCACHE_DEBUG
faca2ef7
DM
1779 pr_info("%s: cleancache: ignorenonactive = %d\n",
1780 namestr, !disable_cleancache_ignore_nonactive);
1781#endif
1782 if (old_ops.init_fs != NULL)
1783 pr_warn("%s: cleancache_ops overridden\n", namestr);
1784 }
1785 if (zcache_enabled && !disable_frontswap) {
1786 struct frontswap_ops old_ops;
1787
1788 old_ops = zcache_frontswap_register_ops();
1789 if (frontswap_has_exclusive_gets)
1790 frontswap_tmem_exclusive_gets(true);
1791 pr_info("%s: frontswap enabled using kernel transcendent "
1792 "memory and compression buddies\n", namestr);
67e2cba4 1793#ifdef CONFIG_ZCACHE_DEBUG
faca2ef7
DM
1794 pr_info("%s: frontswap: excl gets = %d active only = %d\n",
1795 namestr, frontswap_has_exclusive_gets,
1796 !disable_frontswap_ignore_nonactive);
1797#endif
1798 if (old_ops.init != NULL)
1799 pr_warn("%s: frontswap_ops overridden\n", namestr);
1800 }
1801 if (ramster_enabled)
1802 ramster_init(!disable_cleancache, !disable_frontswap,
1803 frontswap_has_exclusive_gets);
1804out:
1805 return ret;
1806}
1807
1808late_initcall(zcache_init);
This page took 0.166518 seconds and 5 git commands to generate.