Merge remote-tracking branch 'staging/staging-next'
[deliverable/linux.git] / drivers / staging / lustre / lustre / obdclass / cl_page.c
CommitLineData
d7e09d03
PT
1/*
2 * GPL HEADER START
3 *
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
6a5b99a4 18 * http://www.gnu.org/licenses/gpl-2.0.html
d7e09d03 19 *
d7e09d03
PT
20 * GPL HEADER END
21 */
22/*
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
25 *
1dc563a6 26 * Copyright (c) 2011, 2015, Intel Corporation.
d7e09d03
PT
27 */
28/*
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
31 *
32 * Client Lustre Page.
33 *
34 * Author: Nikita Danilov <nikita.danilov@sun.com>
d9d47901 35 * Author: Jinshan Xiong <jinshan.xiong@intel.com>
d7e09d03
PT
36 */
37
38#define DEBUG_SUBSYSTEM S_CLASS
39
9fdaf8c0 40#include "../../include/linux/libcfs/libcfs.h"
610f7377
GKH
41#include "../include/obd_class.h"
42#include "../include/obd_support.h"
d7e09d03
PT
43#include <linux/list.h>
44
610f7377 45#include "../include/cl_object.h"
d7e09d03
PT
46#include "cl_internal.h"
47
d9d47901 48static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg);
d7e09d03 49
b16d335d
PL
50# define PASSERT(env, page, expr) \
51 do { \
52 if (unlikely(!(expr))) { \
53 CL_PAGE_DEBUG(D_ERROR, (env), (page), #expr "\n"); \
54 LASSERT(0); \
55 } \
648d6e6c 56 } while (0)
d7e09d03
PT
57
58# define PINVRNT(env, page, exp) \
59 ((void)sizeof(env), (void)sizeof(page), (void)sizeof !!(exp))
60
d7e09d03
PT
61/**
62 * Internal version of cl_page_get().
63 *
64 * This function can be used to obtain initial reference to previously
65 * unreferenced cached object. It can be called only if concurrent page
d9d47901 66 * reclamation is somehow prevented, e.g., by keeping a lock on a VM page,
d7e09d03
PT
67 * associated with \a page.
68 *
69 * Use with care! Not exported.
70 */
71static void cl_page_get_trust(struct cl_page *page)
72{
73 LASSERT(atomic_read(&page->cp_ref) > 0);
74 atomic_inc(&page->cp_ref);
75}
76
77/**
78 * Returns a slice within a page, corresponding to the given layer in the
79 * device stack.
80 *
81 * \see cl_lock_at()
82 */
83static const struct cl_page_slice *
84cl_page_at_trusted(const struct cl_page *page,
85 const struct lu_device_type *dtype)
86{
87 const struct cl_page_slice *slice;
d7e09d03 88
7addf402
JX
89 list_for_each_entry(slice, &page->cp_layers, cpl_linkage) {
90 if (slice->cpl_obj->co_lu.lo_dev->ld_type == dtype)
91 return slice;
92 }
0a3bdb00 93 return NULL;
d7e09d03
PT
94}
95
d7e09d03
PT
96static void cl_page_free(const struct lu_env *env, struct cl_page *page)
97{
98 struct cl_object *obj = page->cp_obj;
d7e09d03
PT
99
100 PASSERT(env, page, list_empty(&page->cp_batch));
cce3c2da
OD
101 PASSERT(env, page, !page->cp_owner);
102 PASSERT(env, page, !page->cp_req);
d7e09d03
PT
103 PASSERT(env, page, page->cp_state == CPS_FREEING);
104
d7e09d03
PT
105 while (!list_empty(&page->cp_layers)) {
106 struct cl_page_slice *slice;
107
108 slice = list_entry(page->cp_layers.next,
926d6fb2 109 struct cl_page_slice, cpl_linkage);
d7e09d03 110 list_del_init(page->cp_layers.next);
7addf402
JX
111 if (unlikely(slice->cpl_ops->cpo_fini))
112 slice->cpl_ops->cpo_fini(env, slice);
d7e09d03 113 }
631abc6e 114 lu_object_ref_del_at(&obj->co_lu, &page->cp_obj_ref, "cl_page", page);
d7e09d03
PT
115 cl_object_put(env, obj);
116 lu_ref_fini(&page->cp_reference);
d7279044 117 kfree(page);
d7e09d03
PT
118}
119
120/**
121 * Helper function updating page state. This is the only place in the code
122 * where cl_page::cp_state field is mutated.
123 */
124static inline void cl_page_state_set_trust(struct cl_page *page,
125 enum cl_page_state state)
126{
127 /* bypass const. */
128 *(enum cl_page_state *)&page->cp_state = state;
129}
130
d9d47901
JX
131struct cl_page *cl_page_alloc(const struct lu_env *env,
132 struct cl_object *o, pgoff_t ind,
133 struct page *vmpage,
134 enum cl_page_type type)
d7e09d03
PT
135{
136 struct cl_page *page;
137 struct lu_object_header *head;
138
d37c8781 139 page = kzalloc(cl_object_header(o)->coh_page_bufsize, GFP_NOFS);
cce3c2da 140 if (page) {
d7e09d03 141 int result = 0;
8de056a1 142
d7e09d03 143 atomic_set(&page->cp_ref, 1);
d7e09d03
PT
144 page->cp_obj = o;
145 cl_object_get(o);
631abc6e
JH
146 lu_object_ref_add_at(&o->co_lu, &page->cp_obj_ref, "cl_page",
147 page);
7addf402 148 page->cp_vmpage = vmpage;
d7e09d03
PT
149 cl_page_state_set_trust(page, CPS_CACHED);
150 page->cp_type = type;
151 INIT_LIST_HEAD(&page->cp_layers);
152 INIT_LIST_HEAD(&page->cp_batch);
153 INIT_LIST_HEAD(&page->cp_flight);
d7e09d03
PT
154 lu_ref_init(&page->cp_reference);
155 head = o->co_lu.lo_header;
926d6fb2 156 list_for_each_entry(o, &head->loh_layers, co_lu.lo_linkage) {
cce3c2da 157 if (o->co_ops->coo_page_init) {
7addf402
JX
158 result = o->co_ops->coo_page_init(env, o, page,
159 ind);
d7e09d03 160 if (result != 0) {
d9d47901 161 cl_page_delete0(env, page);
d7e09d03
PT
162 cl_page_free(env, page);
163 page = ERR_PTR(result);
164 break;
165 }
166 }
167 }
d7e09d03
PT
168 } else {
169 page = ERR_PTR(-ENOMEM);
170 }
0a3bdb00 171 return page;
d7e09d03 172}
d9d47901 173EXPORT_SYMBOL(cl_page_alloc);
d7e09d03
PT
174
175/**
176 * Returns a cl_page with index \a idx at the object \a o, and associated with
177 * the VM page \a vmpage.
178 *
179 * This is the main entry point into the cl_page caching interface. First, a
180 * cache (implemented as a per-object radix tree) is consulted. If page is
181 * found there, it is returned immediately. Otherwise new page is allocated
182 * and returned. In any case, additional reference to page is acquired.
183 *
184 * \see cl_object_find(), cl_lock_find()
185 */
d9d47901
JX
186struct cl_page *cl_page_find(const struct lu_env *env,
187 struct cl_object *o,
188 pgoff_t idx, struct page *vmpage,
189 enum cl_page_type type)
d7e09d03
PT
190{
191 struct cl_page *page = NULL;
d7e09d03 192 struct cl_object_header *hdr;
d7e09d03
PT
193
194 LASSERT(type == CPT_CACHEABLE || type == CPT_TRANSIENT);
195 might_sleep();
196
d7e09d03 197 hdr = cl_object_header(o);
d7e09d03
PT
198
199 CDEBUG(D_PAGE, "%lu@"DFID" %p %lx %d\n",
200 idx, PFID(&hdr->coh_lu.loh_fid), vmpage, vmpage->private, type);
201 /* fast path. */
202 if (type == CPT_CACHEABLE) {
437dffeb
PL
203 /*
204 * vmpage lock is used to protect the child/parent
205 * relationship
206 */
d7e09d03
PT
207 KLASSERT(PageLocked(vmpage));
208 /*
209 * cl_vmpage_page() can be called here without any locks as
210 *
211 * - "vmpage" is locked (which prevents ->private from
212 * concurrent updates), and
213 *
214 * - "o" cannot be destroyed while current thread holds a
215 * reference on it.
216 */
217 page = cl_vmpage_page(vmpage, o);
d7e09d03 218
d9d47901
JX
219 if (page)
220 return page;
221 }
d7e09d03
PT
222
223 /* allocate and initialize cl_page */
224 page = cl_page_alloc(env, o, idx, vmpage, type);
0a3bdb00 225 return page;
d7e09d03 226}
d7e09d03
PT
227EXPORT_SYMBOL(cl_page_find);
228
d7e09d03
PT
229static inline int cl_page_invariant(const struct cl_page *pg)
230{
d7e09d03
PT
231 /*
232 * Page invariant is protected by a VM lock.
233 */
234 LINVRNT(cl_page_is_vmlocked(NULL, pg));
235
7addf402 236 return cl_page_in_use_noref(pg);
d7e09d03
PT
237}
238
239static void cl_page_state_set0(const struct lu_env *env,
240 struct cl_page *page, enum cl_page_state state)
241{
242 enum cl_page_state old;
243
244 /*
245 * Matrix of allowed state transitions [old][new], for sanity
246 * checking.
247 */
248 static const int allowed_transitions[CPS_NR][CPS_NR] = {
249 [CPS_CACHED] = {
250 [CPS_CACHED] = 0,
251 [CPS_OWNED] = 1, /* io finds existing cached page */
252 [CPS_PAGEIN] = 0,
253 [CPS_PAGEOUT] = 1, /* write-out from the cache */
254 [CPS_FREEING] = 1, /* eviction on the memory pressure */
255 },
256 [CPS_OWNED] = {
257 [CPS_CACHED] = 1, /* release to the cache */
258 [CPS_OWNED] = 0,
259 [CPS_PAGEIN] = 1, /* start read immediately */
260 [CPS_PAGEOUT] = 1, /* start write immediately */
261 [CPS_FREEING] = 1, /* lock invalidation or truncate */
262 },
263 [CPS_PAGEIN] = {
264 [CPS_CACHED] = 1, /* io completion */
265 [CPS_OWNED] = 0,
266 [CPS_PAGEIN] = 0,
267 [CPS_PAGEOUT] = 0,
268 [CPS_FREEING] = 0,
269 },
270 [CPS_PAGEOUT] = {
271 [CPS_CACHED] = 1, /* io completion */
272 [CPS_OWNED] = 0,
273 [CPS_PAGEIN] = 0,
274 [CPS_PAGEOUT] = 0,
275 [CPS_FREEING] = 0,
276 },
277 [CPS_FREEING] = {
278 [CPS_CACHED] = 0,
279 [CPS_OWNED] = 0,
280 [CPS_PAGEIN] = 0,
281 [CPS_PAGEOUT] = 0,
282 [CPS_FREEING] = 0,
283 }
284 };
285
d7e09d03
PT
286 old = page->cp_state;
287 PASSERT(env, page, allowed_transitions[old][state]);
288 CL_PAGE_HEADER(D_TRACE, env, page, "%d -> %d\n", old, state);
7addf402
JX
289 PASSERT(env, page, page->cp_state == old);
290 PASSERT(env, page, equi(state == CPS_OWNED, page->cp_owner));
291 cl_page_state_set_trust(page, state);
d7e09d03
PT
292}
293
294static void cl_page_state_set(const struct lu_env *env,
295 struct cl_page *page, enum cl_page_state state)
296{
297 cl_page_state_set0(env, page, state);
298}
299
300/**
301 * Acquires an additional reference to a page.
302 *
303 * This can be called only by caller already possessing a reference to \a
304 * page.
305 *
306 * \see cl_object_get(), cl_lock_get().
307 */
308void cl_page_get(struct cl_page *page)
309{
d7e09d03 310 cl_page_get_trust(page);
d7e09d03
PT
311}
312EXPORT_SYMBOL(cl_page_get);
313
314/**
315 * Releases a reference to a page.
316 *
317 * When last reference is released, page is returned to the cache, unless it
318 * is in cl_page_state::CPS_FREEING state, in which case it is immediately
319 * destroyed.
320 *
321 * \see cl_object_put(), cl_lock_put().
322 */
323void cl_page_put(const struct lu_env *env, struct cl_page *page)
324{
d7e09d03
PT
325 CL_PAGE_HEADER(D_TRACE, env, page, "%d\n",
326 atomic_read(&page->cp_ref));
327
328 if (atomic_dec_and_test(&page->cp_ref)) {
329 LASSERT(page->cp_state == CPS_FREEING);
330
331 LASSERT(atomic_read(&page->cp_ref) == 0);
cce3c2da 332 PASSERT(env, page, !page->cp_owner);
d7e09d03
PT
333 PASSERT(env, page, list_empty(&page->cp_batch));
334 /*
335 * Page is no longer reachable by other threads. Tear
336 * it down.
337 */
338 cl_page_free(env, page);
339 }
d7e09d03
PT
340}
341EXPORT_SYMBOL(cl_page_put);
342
d7e09d03
PT
343/**
344 * Returns a cl_page associated with a VM page, and given cl_object.
345 */
346struct cl_page *cl_vmpage_page(struct page *vmpage, struct cl_object *obj)
347{
d7e09d03
PT
348 struct cl_page *page;
349
d7e09d03
PT
350 KLASSERT(PageLocked(vmpage));
351
352 /*
353 * NOTE: absence of races and liveness of data are guaranteed by page
354 * lock on a "vmpage". That works because object destruction has
355 * bottom-to-top pass.
356 */
357
7addf402
JX
358 page = (struct cl_page *)vmpage->private;
359 if (page) {
360 cl_page_get_trust(page);
361 LASSERT(page->cp_type == CPT_CACHEABLE);
d7e09d03 362 }
0a3bdb00 363 return page;
d7e09d03
PT
364}
365EXPORT_SYMBOL(cl_vmpage_page);
366
d7e09d03
PT
367const struct cl_page_slice *cl_page_at(const struct cl_page *page,
368 const struct lu_device_type *dtype)
369{
370 return cl_page_at_trusted(page, dtype);
371}
372EXPORT_SYMBOL(cl_page_at);
373
374#define CL_PAGE_OP(opname) offsetof(struct cl_page_operations, opname)
375
376#define CL_PAGE_INVOKE(_env, _page, _op, _proto, ...) \
377({ \
378 const struct lu_env *__env = (_env); \
379 struct cl_page *__page = (_page); \
380 const struct cl_page_slice *__scan; \
381 int __result; \
382 ptrdiff_t __op = (_op); \
383 int (*__method)_proto; \
384 \
385 __result = 0; \
7addf402
JX
386 list_for_each_entry(__scan, &__page->cp_layers, cpl_linkage) { \
387 __method = *(void **)((char *)__scan->cpl_ops + __op); \
388 if (__method) { \
389 __result = (*__method)(__env, __scan, ## __VA_ARGS__); \
390 if (__result != 0) \
391 break; \
392 } \
393 } \
d7e09d03
PT
394 if (__result > 0) \
395 __result = 0; \
396 __result; \
397})
398
fd7444fe
JX
399#define CL_PAGE_INVOKE_REVERSE(_env, _page, _op, _proto, ...) \
400({ \
401 const struct lu_env *__env = (_env); \
402 struct cl_page *__page = (_page); \
403 const struct cl_page_slice *__scan; \
404 int __result; \
405 ptrdiff_t __op = (_op); \
406 int (*__method)_proto; \
407 \
408 __result = 0; \
409 list_for_each_entry_reverse(__scan, &__page->cp_layers, \
410 cpl_linkage) { \
411 __method = *(void **)((char *)__scan->cpl_ops + __op); \
412 if (__method) { \
413 __result = (*__method)(__env, __scan, ## __VA_ARGS__); \
414 if (__result != 0) \
415 break; \
416 } \
417 } \
418 if (__result > 0) \
419 __result = 0; \
420 __result; \
421})
422
d7e09d03
PT
423#define CL_PAGE_INVOID(_env, _page, _op, _proto, ...) \
424do { \
425 const struct lu_env *__env = (_env); \
426 struct cl_page *__page = (_page); \
427 const struct cl_page_slice *__scan; \
428 ptrdiff_t __op = (_op); \
429 void (*__method)_proto; \
430 \
7addf402
JX
431 list_for_each_entry(__scan, &__page->cp_layers, cpl_linkage) { \
432 __method = *(void **)((char *)__scan->cpl_ops + __op); \
433 if (__method) \
434 (*__method)(__env, __scan, ## __VA_ARGS__); \
435 } \
d7e09d03
PT
436} while (0)
437
438#define CL_PAGE_INVOID_REVERSE(_env, _page, _op, _proto, ...) \
439do { \
440 const struct lu_env *__env = (_env); \
441 struct cl_page *__page = (_page); \
442 const struct cl_page_slice *__scan; \
443 ptrdiff_t __op = (_op); \
444 void (*__method)_proto; \
445 \
7addf402
JX
446 list_for_each_entry_reverse(__scan, &__page->cp_layers, cpl_linkage) { \
447 __method = *(void **)((char *)__scan->cpl_ops + __op); \
448 if (__method) \
449 (*__method)(__env, __scan, ## __VA_ARGS__); \
450 } \
d7e09d03
PT
451} while (0)
452
453static int cl_page_invoke(const struct lu_env *env,
454 struct cl_io *io, struct cl_page *page, ptrdiff_t op)
455
456{
457 PINVRNT(env, page, cl_object_same(page->cp_obj, io->ci_obj));
0a3bdb00 458 return CL_PAGE_INVOKE(env, page, op,
d7e09d03
PT
459 (const struct lu_env *,
460 const struct cl_page_slice *, struct cl_io *),
0a3bdb00 461 io);
d7e09d03
PT
462}
463
464static void cl_page_invoid(const struct lu_env *env,
465 struct cl_io *io, struct cl_page *page, ptrdiff_t op)
466
467{
468 PINVRNT(env, page, cl_object_same(page->cp_obj, io->ci_obj));
d7e09d03
PT
469 CL_PAGE_INVOID(env, page, op,
470 (const struct lu_env *,
471 const struct cl_page_slice *, struct cl_io *), io);
d7e09d03
PT
472}
473
474static void cl_page_owner_clear(struct cl_page *page)
475{
7addf402
JX
476 if (page->cp_owner) {
477 LASSERT(page->cp_owner->ci_owned_nr > 0);
478 page->cp_owner->ci_owned_nr--;
479 page->cp_owner = NULL;
d7e09d03 480 }
d7e09d03
PT
481}
482
483static void cl_page_owner_set(struct cl_page *page)
484{
7addf402 485 page->cp_owner->ci_owned_nr++;
d7e09d03
PT
486}
487
488void cl_page_disown0(const struct lu_env *env,
489 struct cl_io *io, struct cl_page *pg)
490{
491 enum cl_page_state state;
492
d7e09d03
PT
493 state = pg->cp_state;
494 PINVRNT(env, pg, state == CPS_OWNED || state == CPS_FREEING);
800548b1 495 PINVRNT(env, pg, cl_page_invariant(pg) || state == CPS_FREEING);
d7e09d03
PT
496 cl_page_owner_clear(pg);
497
498 if (state == CPS_OWNED)
499 cl_page_state_set(env, pg, CPS_CACHED);
500 /*
501 * Completion call-backs are executed in the bottom-up order, so that
502 * uppermost layer (llite), responsible for VFS/VM interaction runs
503 * last and can release locks safely.
504 */
505 CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_disown),
506 (const struct lu_env *,
507 const struct cl_page_slice *, struct cl_io *),
508 io);
d7e09d03
PT
509}
510
511/**
512 * returns true, iff page is owned by the given io.
513 */
514int cl_page_is_owned(const struct cl_page *pg, const struct cl_io *io)
515{
7addf402 516 struct cl_io *top = cl_io_top((struct cl_io *)io);
d7e09d03 517 LINVRNT(cl_object_same(pg->cp_obj, io->ci_obj));
7addf402 518 return pg->cp_state == CPS_OWNED && pg->cp_owner == top;
d7e09d03
PT
519}
520EXPORT_SYMBOL(cl_page_is_owned);
521
522/**
523 * Try to own a page by IO.
524 *
525 * Waits until page is in cl_page_state::CPS_CACHED state, and then switch it
526 * into cl_page_state::CPS_OWNED state.
527 *
528 * \pre !cl_page_is_owned(pg, io)
529 * \post result == 0 iff cl_page_is_owned(pg, io)
530 *
531 * \retval 0 success
532 *
533 * \retval -ve failure, e.g., page was destroyed (and landed in
534 * cl_page_state::CPS_FREEING instead of cl_page_state::CPS_CACHED).
535 * or, page was owned by another thread, or in IO.
536 *
537 * \see cl_page_disown()
538 * \see cl_page_operations::cpo_own()
539 * \see cl_page_own_try()
540 * \see cl_page_own
541 */
542static int cl_page_own0(const struct lu_env *env, struct cl_io *io,
543 struct cl_page *pg, int nonblock)
544{
545 int result;
546
547 PINVRNT(env, pg, !cl_page_is_owned(pg, io));
548
d7e09d03
PT
549 io = cl_io_top(io);
550
551 if (pg->cp_state == CPS_FREEING) {
552 result = -ENOENT;
553 } else {
554 result = CL_PAGE_INVOKE(env, pg, CL_PAGE_OP(cpo_own),
555 (const struct lu_env *,
556 const struct cl_page_slice *,
557 struct cl_io *, int),
558 io, nonblock);
559 if (result == 0) {
cce3c2da
OD
560 PASSERT(env, pg, !pg->cp_owner);
561 PASSERT(env, pg, !pg->cp_req);
7addf402 562 pg->cp_owner = cl_io_top(io);
d7e09d03
PT
563 cl_page_owner_set(pg);
564 if (pg->cp_state != CPS_FREEING) {
565 cl_page_state_set(env, pg, CPS_OWNED);
566 } else {
567 cl_page_disown0(env, io, pg);
568 result = -ENOENT;
569 }
570 }
571 }
572 PINVRNT(env, pg, ergo(result == 0, cl_page_invariant(pg)));
0a3bdb00 573 return result;
d7e09d03
PT
574}
575
576/**
577 * Own a page, might be blocked.
578 *
579 * \see cl_page_own0()
580 */
581int cl_page_own(const struct lu_env *env, struct cl_io *io, struct cl_page *pg)
582{
583 return cl_page_own0(env, io, pg, 0);
584}
585EXPORT_SYMBOL(cl_page_own);
586
587/**
588 * Nonblock version of cl_page_own().
589 *
590 * \see cl_page_own0()
591 */
592int cl_page_own_try(const struct lu_env *env, struct cl_io *io,
593 struct cl_page *pg)
594{
595 return cl_page_own0(env, io, pg, 1);
596}
597EXPORT_SYMBOL(cl_page_own_try);
598
d7e09d03
PT
599/**
600 * Assume page ownership.
601 *
602 * Called when page is already locked by the hosting VM.
603 *
604 * \pre !cl_page_is_owned(pg, io)
605 * \post cl_page_is_owned(pg, io)
606 *
607 * \see cl_page_operations::cpo_assume()
608 */
609void cl_page_assume(const struct lu_env *env,
610 struct cl_io *io, struct cl_page *pg)
611{
612 PINVRNT(env, pg, cl_object_same(pg->cp_obj, io->ci_obj));
613
d7e09d03
PT
614 io = cl_io_top(io);
615
616 cl_page_invoid(env, io, pg, CL_PAGE_OP(cpo_assume));
cce3c2da 617 PASSERT(env, pg, !pg->cp_owner);
7addf402 618 pg->cp_owner = cl_io_top(io);
d7e09d03
PT
619 cl_page_owner_set(pg);
620 cl_page_state_set(env, pg, CPS_OWNED);
d7e09d03
PT
621}
622EXPORT_SYMBOL(cl_page_assume);
623
624/**
625 * Releases page ownership without unlocking the page.
626 *
627 * Moves page into cl_page_state::CPS_CACHED without releasing a lock on the
628 * underlying VM page (as VM is supposed to do this itself).
629 *
630 * \pre cl_page_is_owned(pg, io)
631 * \post !cl_page_is_owned(pg, io)
632 *
633 * \see cl_page_assume()
634 */
635void cl_page_unassume(const struct lu_env *env,
636 struct cl_io *io, struct cl_page *pg)
637{
638 PINVRNT(env, pg, cl_page_is_owned(pg, io));
639 PINVRNT(env, pg, cl_page_invariant(pg));
640
d7e09d03
PT
641 io = cl_io_top(io);
642 cl_page_owner_clear(pg);
643 cl_page_state_set(env, pg, CPS_CACHED);
644 CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_unassume),
645 (const struct lu_env *,
646 const struct cl_page_slice *, struct cl_io *),
647 io);
d7e09d03
PT
648}
649EXPORT_SYMBOL(cl_page_unassume);
650
651/**
652 * Releases page ownership.
653 *
654 * Moves page into cl_page_state::CPS_CACHED.
655 *
656 * \pre cl_page_is_owned(pg, io)
657 * \post !cl_page_is_owned(pg, io)
658 *
659 * \see cl_page_own()
660 * \see cl_page_operations::cpo_disown()
661 */
662void cl_page_disown(const struct lu_env *env,
663 struct cl_io *io, struct cl_page *pg)
664{
800548b1
NY
665 PINVRNT(env, pg, cl_page_is_owned(pg, io) ||
666 pg->cp_state == CPS_FREEING);
d7e09d03 667
d7e09d03
PT
668 io = cl_io_top(io);
669 cl_page_disown0(env, io, pg);
d7e09d03
PT
670}
671EXPORT_SYMBOL(cl_page_disown);
672
673/**
674 * Called when page is to be removed from the object, e.g., as a result of
675 * truncate.
676 *
677 * Calls cl_page_operations::cpo_discard() top-to-bottom.
678 *
679 * \pre cl_page_is_owned(pg, io)
680 *
681 * \see cl_page_operations::cpo_discard()
682 */
683void cl_page_discard(const struct lu_env *env,
684 struct cl_io *io, struct cl_page *pg)
685{
686 PINVRNT(env, pg, cl_page_is_owned(pg, io));
687 PINVRNT(env, pg, cl_page_invariant(pg));
688
689 cl_page_invoid(env, io, pg, CL_PAGE_OP(cpo_discard));
690}
691EXPORT_SYMBOL(cl_page_discard);
692
693/**
694 * Version of cl_page_delete() that can be called for not fully constructed
695 * pages, e.g,. in a error handling cl_page_find()->cl_page_delete0()
696 * path. Doesn't check page invariant.
697 */
d9d47901 698static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg)
d7e09d03 699{
d7e09d03
PT
700 PASSERT(env, pg, pg->cp_state != CPS_FREEING);
701
702 /*
703 * Severe all ways to obtain new pointers to @pg.
704 */
705 cl_page_owner_clear(pg);
706
d7e09d03
PT
707 cl_page_state_set0(env, pg, CPS_FREEING);
708
d9d47901
JX
709 CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_delete),
710 (const struct lu_env *,
711 const struct cl_page_slice *));
d7e09d03
PT
712}
713
714/**
715 * Called when a decision is made to throw page out of memory.
716 *
717 * Notifies all layers about page destruction by calling
718 * cl_page_operations::cpo_delete() method top-to-bottom.
719 *
720 * Moves page into cl_page_state::CPS_FREEING state (this is the only place
721 * where transition to this state happens).
722 *
723 * Eliminates all venues through which new references to the page can be
724 * obtained:
725 *
726 * - removes page from the radix trees,
727 *
728 * - breaks linkage from VM page to cl_page.
729 *
730 * Once page reaches cl_page_state::CPS_FREEING, all remaining references will
731 * drain after some time, at which point page will be recycled.
732 *
d7e09d03
PT
733 * \pre VM page is locked
734 * \post pg->cp_state == CPS_FREEING
735 *
736 * \see cl_page_operations::cpo_delete()
737 */
738void cl_page_delete(const struct lu_env *env, struct cl_page *pg)
739{
740 PINVRNT(env, pg, cl_page_invariant(pg));
d9d47901 741 cl_page_delete0(env, pg);
d7e09d03
PT
742}
743EXPORT_SYMBOL(cl_page_delete);
744
d7e09d03
PT
745/**
746 * Marks page up-to-date.
747 *
748 * Call cl_page_operations::cpo_export() through all layers top-to-bottom. The
749 * layer responsible for VM interaction has to mark/clear page as up-to-date
750 * by the \a uptodate argument.
751 *
752 * \see cl_page_operations::cpo_export()
753 */
754void cl_page_export(const struct lu_env *env, struct cl_page *pg, int uptodate)
755{
756 PINVRNT(env, pg, cl_page_invariant(pg));
757 CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_export),
758 (const struct lu_env *,
759 const struct cl_page_slice *, int), uptodate);
760}
761EXPORT_SYMBOL(cl_page_export);
762
763/**
764 * Returns true, iff \a pg is VM locked in a suitable sense by the calling
765 * thread.
766 */
767int cl_page_is_vmlocked(const struct lu_env *env, const struct cl_page *pg)
768{
769 int result;
770 const struct cl_page_slice *slice;
771
d7e09d03
PT
772 slice = container_of(pg->cp_layers.next,
773 const struct cl_page_slice, cpl_linkage);
cce3c2da 774 PASSERT(env, pg, slice->cpl_ops->cpo_is_vmlocked);
d7e09d03
PT
775 /*
776 * Call ->cpo_is_vmlocked() directly instead of going through
777 * CL_PAGE_INVOKE(), because cl_page_is_vmlocked() is used by
778 * cl_page_invariant().
779 */
780 result = slice->cpl_ops->cpo_is_vmlocked(env, slice);
781 PASSERT(env, pg, result == -EBUSY || result == -ENODATA);
0a3bdb00 782 return result == -EBUSY;
d7e09d03
PT
783}
784EXPORT_SYMBOL(cl_page_is_vmlocked);
785
786static enum cl_page_state cl_req_type_state(enum cl_req_type crt)
787{
0a3bdb00 788 return crt == CRT_WRITE ? CPS_PAGEOUT : CPS_PAGEIN;
d7e09d03
PT
789}
790
791static void cl_page_io_start(const struct lu_env *env,
792 struct cl_page *pg, enum cl_req_type crt)
793{
794 /*
795 * Page is queued for IO, change its state.
796 */
d7e09d03
PT
797 cl_page_owner_clear(pg);
798 cl_page_state_set(env, pg, cl_req_type_state(crt));
d7e09d03
PT
799}
800
801/**
802 * Prepares page for immediate transfer. cl_page_operations::cpo_prep() is
803 * called top-to-bottom. Every layer either agrees to submit this page (by
804 * returning 0), or requests to omit this page (by returning -EALREADY). Layer
805 * handling interactions with the VM also has to inform VM that page is under
806 * transfer now.
807 */
808int cl_page_prep(const struct lu_env *env, struct cl_io *io,
809 struct cl_page *pg, enum cl_req_type crt)
810{
811 int result;
812
813 PINVRNT(env, pg, cl_page_is_owned(pg, io));
814 PINVRNT(env, pg, cl_page_invariant(pg));
815 PINVRNT(env, pg, crt < CRT_NR);
816
817 /*
818 * XXX this has to be called bottom-to-top, so that llite can set up
819 * PG_writeback without risking other layers deciding to skip this
820 * page.
821 */
822 if (crt >= CRT_NR)
823 return -EINVAL;
824 result = cl_page_invoke(env, io, pg, CL_PAGE_OP(io[crt].cpo_prep));
825 if (result == 0)
826 cl_page_io_start(env, pg, crt);
827
d7e09d03
PT
828 CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result);
829 return result;
830}
831EXPORT_SYMBOL(cl_page_prep);
832
833/**
834 * Notify layers about transfer completion.
835 *
836 * Invoked by transfer sub-system (which is a part of osc) to notify layers
837 * that a transfer, of which this page is a part of has completed.
838 *
839 * Completion call-backs are executed in the bottom-up order, so that
840 * uppermost layer (llite), responsible for the VFS/VM interaction runs last
841 * and can release locks safely.
842 *
843 * \pre pg->cp_state == CPS_PAGEIN || pg->cp_state == CPS_PAGEOUT
844 * \post pg->cp_state == CPS_CACHED
845 *
846 * \see cl_page_operations::cpo_completion()
847 */
848void cl_page_completion(const struct lu_env *env,
849 struct cl_page *pg, enum cl_req_type crt, int ioret)
850{
851 struct cl_sync_io *anchor = pg->cp_sync_io;
852
853 PASSERT(env, pg, crt < CRT_NR);
854 /* cl_page::cp_req already cleared by the caller (osc_completion()) */
cce3c2da 855 PASSERT(env, pg, !pg->cp_req);
d7e09d03
PT
856 PASSERT(env, pg, pg->cp_state == cl_req_type_state(crt));
857
d7e09d03 858 CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, ioret);
d7e09d03
PT
859
860 cl_page_state_set(env, pg, CPS_CACHED);
861 if (crt >= CRT_NR)
862 return;
863 CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(io[crt].cpo_completion),
864 (const struct lu_env *,
865 const struct cl_page_slice *, int), ioret);
866 if (anchor) {
867 LASSERT(cl_page_is_vmlocked(env, pg));
868 LASSERT(pg->cp_sync_io == anchor);
869 pg->cp_sync_io = NULL;
870 }
871 /*
872 * As page->cp_obj is pinned by a reference from page->cp_req, it is
873 * safe to call cl_page_put() without risking object destruction in a
874 * non-blocking context.
875 */
876 cl_page_put(env, pg);
877
878 if (anchor)
e5c4e635 879 cl_sync_io_note(env, anchor, ioret);
d7e09d03
PT
880}
881EXPORT_SYMBOL(cl_page_completion);
882
883/**
884 * Notify layers that transfer formation engine decided to yank this page from
885 * the cache and to make it a part of a transfer.
886 *
887 * \pre pg->cp_state == CPS_CACHED
888 * \post pg->cp_state == CPS_PAGEIN || pg->cp_state == CPS_PAGEOUT
889 *
890 * \see cl_page_operations::cpo_make_ready()
891 */
892int cl_page_make_ready(const struct lu_env *env, struct cl_page *pg,
893 enum cl_req_type crt)
894{
895 int result;
896
897 PINVRNT(env, pg, crt < CRT_NR);
898
d7e09d03 899 if (crt >= CRT_NR)
0a3bdb00 900 return -EINVAL;
d7e09d03
PT
901 result = CL_PAGE_INVOKE(env, pg, CL_PAGE_OP(io[crt].cpo_make_ready),
902 (const struct lu_env *,
903 const struct cl_page_slice *));
904 if (result == 0) {
905 PASSERT(env, pg, pg->cp_state == CPS_CACHED);
906 cl_page_io_start(env, pg, crt);
907 }
908 CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result);
0a3bdb00 909 return result;
d7e09d03
PT
910}
911EXPORT_SYMBOL(cl_page_make_ready);
912
d7e09d03
PT
913/**
914 * Called if a pge is being written back by kernel's intention.
915 *
916 * \pre cl_page_is_owned(pg, io)
917 * \post ergo(result == 0, pg->cp_state == CPS_PAGEOUT)
918 *
919 * \see cl_page_operations::cpo_flush()
920 */
921int cl_page_flush(const struct lu_env *env, struct cl_io *io,
922 struct cl_page *pg)
923{
924 int result;
925
926 PINVRNT(env, pg, cl_page_is_owned(pg, io));
927 PINVRNT(env, pg, cl_page_invariant(pg));
928
d7e09d03
PT
929 result = cl_page_invoke(env, io, pg, CL_PAGE_OP(cpo_flush));
930
931 CL_PAGE_HEADER(D_TRACE, env, pg, "%d\n", result);
0a3bdb00 932 return result;
d7e09d03
PT
933}
934EXPORT_SYMBOL(cl_page_flush);
935
936/**
937 * Checks whether page is protected by any extent lock is at least required
938 * mode.
939 *
940 * \return the same as in cl_page_operations::cpo_is_under_lock() method.
941 * \see cl_page_operations::cpo_is_under_lock()
942 */
943int cl_page_is_under_lock(const struct lu_env *env, struct cl_io *io,
fd7444fe 944 struct cl_page *page, pgoff_t *max_index)
d7e09d03
PT
945{
946 int rc;
947
948 PINVRNT(env, page, cl_page_invariant(page));
949
fd7444fe
JX
950 rc = CL_PAGE_INVOKE_REVERSE(env, page, CL_PAGE_OP(cpo_is_under_lock),
951 (const struct lu_env *,
952 const struct cl_page_slice *,
953 struct cl_io *, pgoff_t *),
954 io, max_index);
0a3bdb00 955 return rc;
d7e09d03
PT
956}
957EXPORT_SYMBOL(cl_page_is_under_lock);
958
d7e09d03
PT
959/**
960 * Tells transfer engine that only part of a page is to be transmitted.
961 *
962 * \see cl_page_operations::cpo_clip()
963 */
964void cl_page_clip(const struct lu_env *env, struct cl_page *pg,
965 int from, int to)
966{
967 PINVRNT(env, pg, cl_page_invariant(pg));
968
969 CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", from, to);
970 CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_clip),
971 (const struct lu_env *,
574150f0 972 const struct cl_page_slice *, int, int),
d7e09d03
PT
973 from, to);
974}
975EXPORT_SYMBOL(cl_page_clip);
976
977/**
978 * Prints human readable representation of \a pg to the \a f.
979 */
980void cl_page_header_print(const struct lu_env *env, void *cookie,
981 lu_printer_t printer, const struct cl_page *pg)
982{
983 (*printer)(env, cookie,
f5a9a15f 984 "page@%p[%d %p %d %d %p %p]\n",
d7e09d03 985 pg, atomic_read(&pg->cp_ref), pg->cp_obj,
f5a9a15f 986 pg->cp_state, pg->cp_type,
96c53363 987 pg->cp_owner, pg->cp_req);
d7e09d03
PT
988}
989EXPORT_SYMBOL(cl_page_header_print);
990
991/**
992 * Prints human readable representation of \a pg to the \a f.
993 */
994void cl_page_print(const struct lu_env *env, void *cookie,
995 lu_printer_t printer, const struct cl_page *pg)
996{
7addf402 997 cl_page_header_print(env, cookie, printer, pg);
d7e09d03
PT
998 CL_PAGE_INVOKE(env, (struct cl_page *)pg, CL_PAGE_OP(cpo_print),
999 (const struct lu_env *env,
1000 const struct cl_page_slice *slice,
1001 void *cookie, lu_printer_t p), cookie, printer);
1002 (*printer)(env, cookie, "end page@%p\n", pg);
1003}
1004EXPORT_SYMBOL(cl_page_print);
1005
1006/**
1007 * Cancel a page which is still in a transfer.
1008 */
1009int cl_page_cancel(const struct lu_env *env, struct cl_page *page)
1010{
1011 return CL_PAGE_INVOKE(env, page, CL_PAGE_OP(cpo_cancel),
1012 (const struct lu_env *,
1013 const struct cl_page_slice *));
1014}
1015EXPORT_SYMBOL(cl_page_cancel);
1016
1017/**
1018 * Converts a byte offset within object \a obj into a page index.
1019 */
1020loff_t cl_offset(const struct cl_object *obj, pgoff_t idx)
1021{
1022 /*
1023 * XXX for now.
1024 */
09cbfeaf 1025 return (loff_t)idx << PAGE_SHIFT;
d7e09d03
PT
1026}
1027EXPORT_SYMBOL(cl_offset);
1028
1029/**
1030 * Converts a page index into a byte offset within object \a obj.
1031 */
1032pgoff_t cl_index(const struct cl_object *obj, loff_t offset)
1033{
1034 /*
1035 * XXX for now.
1036 */
09cbfeaf 1037 return offset >> PAGE_SHIFT;
d7e09d03
PT
1038}
1039EXPORT_SYMBOL(cl_index);
1040
1041int cl_page_size(const struct cl_object *obj)
1042{
09cbfeaf 1043 return 1 << PAGE_SHIFT;
d7e09d03
PT
1044}
1045EXPORT_SYMBOL(cl_page_size);
1046
1047/**
1048 * Adds page slice to the compound page.
1049 *
1050 * This is called by cl_object_operations::coo_page_init() methods to add a
1051 * per-layer state to the page. New state is added at the end of
1052 * cl_page::cp_layers list, that is, it is at the bottom of the stack.
1053 *
1054 * \see cl_lock_slice_add(), cl_req_slice_add(), cl_io_slice_add()
1055 */
1056void cl_page_slice_add(struct cl_page *page, struct cl_page_slice *slice,
fd7444fe 1057 struct cl_object *obj, pgoff_t index,
d7e09d03
PT
1058 const struct cl_page_operations *ops)
1059{
d7e09d03
PT
1060 list_add_tail(&slice->cpl_linkage, &page->cp_layers);
1061 slice->cpl_obj = obj;
fd7444fe 1062 slice->cpl_index = index;
d7e09d03
PT
1063 slice->cpl_ops = ops;
1064 slice->cpl_page = page;
d7e09d03
PT
1065}
1066EXPORT_SYMBOL(cl_page_slice_add);
1b02bde3
EL
1067
1068/**
1069 * Allocate and initialize cl_cache, called by ll_init_sbi().
1070 */
1071struct cl_client_cache *cl_cache_init(unsigned long lru_page_max)
1072{
1073 struct cl_client_cache *cache = NULL;
1074
1075 cache = kzalloc(sizeof(*cache), GFP_KERNEL);
1076 if (!cache)
1077 return NULL;
1078
1079 /* Initialize cache data */
1080 atomic_set(&cache->ccc_users, 1);
1081 cache->ccc_lru_max = lru_page_max;
1082 atomic_set(&cache->ccc_lru_left, lru_page_max);
1083 spin_lock_init(&cache->ccc_lru_lock);
1084 INIT_LIST_HEAD(&cache->ccc_lru);
1085
1086 atomic_set(&cache->ccc_unstable_nr, 0);
1087 init_waitqueue_head(&cache->ccc_unstable_waitq);
1088
1089 return cache;
1090}
1091EXPORT_SYMBOL(cl_cache_init);
1092
1093/**
1094 * Increase cl_cache refcount
1095 */
1096void cl_cache_incref(struct cl_client_cache *cache)
1097{
1098 atomic_inc(&cache->ccc_users);
1099}
1100EXPORT_SYMBOL(cl_cache_incref);
1101
1102/**
1103 * Decrease cl_cache refcount and free the cache if refcount=0.
1104 * Since llite, lov and osc all hold cl_cache refcount,
1105 * the free will not cause race. (LU-6173)
1106 */
1107void cl_cache_decref(struct cl_client_cache *cache)
1108{
1109 if (atomic_dec_and_test(&cache->ccc_users))
1110 kfree(cache);
1111}
1112EXPORT_SYMBOL(cl_cache_decref);
This page took 0.481004 seconds and 5 git commands to generate.