4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2015, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Nikita Danilov <nikita.danilov@sun.com>
41 #define DEBUG_SUBSYSTEM S_CLASS
43 #include "../../include/linux/libcfs/libcfs.h"
44 #include "../include/obd_class.h"
45 #include "../include/obd_support.h"
46 #include <linux/list.h>
48 #include "../include/cl_object.h"
49 #include "cl_internal.h"
51 static void cl_page_delete0(const struct lu_env
*env
, struct cl_page
*pg
,
54 # define PASSERT(env, page, expr) \
56 if (unlikely(!(expr))) { \
57 CL_PAGE_DEBUG(D_ERROR, (env), (page), #expr "\n"); \
62 # define PINVRNT(env, page, exp) \
63 ((void)sizeof(env), (void)sizeof(page), (void)sizeof !!(exp))
66 * Internal version of cl_page_top, it should be called if the page is
67 * known to be not freed, says with page referenced, or radix tree lock held,
70 static struct cl_page
*cl_page_top_trusted(struct cl_page
*page
)
72 while (page
->cp_parent
)
73 page
= page
->cp_parent
;
78 * Internal version of cl_page_get().
80 * This function can be used to obtain initial reference to previously
81 * unreferenced cached object. It can be called only if concurrent page
82 * reclamation is somehow prevented, e.g., by locking page radix-tree
83 * (cl_object_header::hdr->coh_page_guard), or by keeping a lock on a VM page,
84 * associated with \a page.
86 * Use with care! Not exported.
88 static void cl_page_get_trust(struct cl_page
*page
)
90 LASSERT(atomic_read(&page
->cp_ref
) > 0);
91 atomic_inc(&page
->cp_ref
);
95 * Returns a slice within a page, corresponding to the given layer in the
100 static const struct cl_page_slice
*
101 cl_page_at_trusted(const struct cl_page
*page
,
102 const struct lu_device_type
*dtype
)
104 const struct cl_page_slice
*slice
;
106 page
= cl_page_top_trusted((struct cl_page
*)page
);
108 list_for_each_entry(slice
, &page
->cp_layers
, cpl_linkage
) {
109 if (slice
->cpl_obj
->co_lu
.lo_dev
->ld_type
== dtype
)
112 page
= page
->cp_child
;
118 * Returns a page with given index in the given object, or NULL if no page is
119 * found. Acquires a reference on \a page.
121 * Locking: called under cl_object_header::coh_page_guard spin-lock.
123 struct cl_page
*cl_page_lookup(struct cl_object_header
*hdr
, pgoff_t index
)
125 struct cl_page
*page
;
127 assert_spin_locked(&hdr
->coh_page_guard
);
129 page
= radix_tree_lookup(&hdr
->coh_tree
, index
);
131 cl_page_get_trust(page
);
134 EXPORT_SYMBOL(cl_page_lookup
);
137 * Returns a list of pages by a given [start, end] of \a obj.
139 * \param resched If not NULL, then we give up before hogging CPU for too
140 * long and set *resched = 1, in that case caller should implement a retry
143 * Gang tree lookup (radix_tree_gang_lookup()) optimization is absolutely
144 * crucial in the face of [offset, EOF] locks.
146 * Return at least one page in @queue unless there is no covered page.
148 int cl_page_gang_lookup(const struct lu_env
*env
, struct cl_object
*obj
,
149 struct cl_io
*io
, pgoff_t start
, pgoff_t end
,
150 cl_page_gang_cb_t cb
, void *cbdata
)
152 struct cl_object_header
*hdr
;
153 struct cl_page
*page
;
154 struct cl_page
**pvec
;
155 const struct cl_page_slice
*slice
;
156 const struct lu_device_type
*dtype
;
161 int res
= CLP_GANG_OKAY
;
165 hdr
= cl_object_header(obj
);
166 pvec
= cl_env_info(env
)->clt_pvec
;
167 dtype
= cl_object_top(obj
)->co_lu
.lo_dev
->ld_type
;
168 spin_lock(&hdr
->coh_page_guard
);
169 while ((nr
= radix_tree_gang_lookup(&hdr
->coh_tree
, (void **)pvec
,
170 idx
, CLT_PVEC_SIZE
)) > 0) {
171 int end_of_region
= 0;
173 idx
= pvec
[nr
- 1]->cp_index
+ 1;
174 for (i
= 0, j
= 0; i
< nr
; ++i
) {
178 LASSERT(page
->cp_type
== CPT_CACHEABLE
);
179 if (page
->cp_index
> end
) {
183 if (page
->cp_state
== CPS_FREEING
)
186 slice
= cl_page_at_trusted(page
, dtype
);
188 * Pages for lsm-less file has no underneath sub-page
189 * for osc, in case of ...
191 PASSERT(env
, page
, slice
);
193 page
= slice
->cpl_page
;
195 * Can safely call cl_page_get_trust() under
196 * radix-tree spin-lock.
198 * XXX not true, because @page is from object another
199 * than @hdr and protected by different tree lock.
201 cl_page_get_trust(page
);
202 lu_ref_add_atomic(&page
->cp_reference
,
203 "gang_lookup", current
);
208 * Here a delicate locking dance is performed. Current thread
209 * holds a reference to a page, but has to own it before it
210 * can be placed into queue. Owning implies waiting, so
211 * radix-tree lock is to be released. After a wait one has to
212 * check that pages weren't truncated (cl_page_own() returns
213 * error in the latter case).
215 spin_unlock(&hdr
->coh_page_guard
);
218 for (i
= 0; i
< j
; ++i
) {
220 if (res
== CLP_GANG_OKAY
)
221 res
= (*cb
)(env
, io
, page
, cbdata
);
222 lu_ref_del(&page
->cp_reference
,
223 "gang_lookup", current
);
224 cl_page_put(env
, page
);
226 if (nr
< CLT_PVEC_SIZE
|| end_of_region
)
229 if (res
== CLP_GANG_OKAY
&& need_resched())
230 res
= CLP_GANG_RESCHED
;
231 if (res
!= CLP_GANG_OKAY
)
234 spin_lock(&hdr
->coh_page_guard
);
238 spin_unlock(&hdr
->coh_page_guard
);
241 EXPORT_SYMBOL(cl_page_gang_lookup
);
243 static void cl_page_free(const struct lu_env
*env
, struct cl_page
*page
)
245 struct cl_object
*obj
= page
->cp_obj
;
247 PASSERT(env
, page
, list_empty(&page
->cp_batch
));
248 PASSERT(env
, page
, !page
->cp_owner
);
249 PASSERT(env
, page
, !page
->cp_req
);
250 PASSERT(env
, page
, !page
->cp_parent
);
251 PASSERT(env
, page
, page
->cp_state
== CPS_FREEING
);
254 while (!list_empty(&page
->cp_layers
)) {
255 struct cl_page_slice
*slice
;
257 slice
= list_entry(page
->cp_layers
.next
,
258 struct cl_page_slice
, cpl_linkage
);
259 list_del_init(page
->cp_layers
.next
);
260 slice
->cpl_ops
->cpo_fini(env
, slice
);
262 lu_object_ref_del_at(&obj
->co_lu
, &page
->cp_obj_ref
, "cl_page", page
);
263 cl_object_put(env
, obj
);
264 lu_ref_fini(&page
->cp_reference
);
269 * Helper function updating page state. This is the only place in the code
270 * where cl_page::cp_state field is mutated.
272 static inline void cl_page_state_set_trust(struct cl_page
*page
,
273 enum cl_page_state state
)
276 *(enum cl_page_state
*)&page
->cp_state
= state
;
279 static struct cl_page
*cl_page_alloc(const struct lu_env
*env
,
280 struct cl_object
*o
, pgoff_t ind
,
282 enum cl_page_type type
)
284 struct cl_page
*page
;
285 struct lu_object_header
*head
;
287 page
= kzalloc(cl_object_header(o
)->coh_page_bufsize
, GFP_NOFS
);
291 atomic_set(&page
->cp_ref
, 1);
292 if (type
== CPT_CACHEABLE
) /* for radix tree */
293 atomic_inc(&page
->cp_ref
);
296 lu_object_ref_add_at(&o
->co_lu
, &page
->cp_obj_ref
, "cl_page",
298 page
->cp_index
= ind
;
299 cl_page_state_set_trust(page
, CPS_CACHED
);
300 page
->cp_type
= type
;
301 INIT_LIST_HEAD(&page
->cp_layers
);
302 INIT_LIST_HEAD(&page
->cp_batch
);
303 INIT_LIST_HEAD(&page
->cp_flight
);
304 mutex_init(&page
->cp_mutex
);
305 lu_ref_init(&page
->cp_reference
);
306 head
= o
->co_lu
.lo_header
;
307 list_for_each_entry(o
, &head
->loh_layers
, co_lu
.lo_linkage
) {
308 if (o
->co_ops
->coo_page_init
) {
309 result
= o
->co_ops
->coo_page_init(env
, o
,
312 cl_page_delete0(env
, page
, 0);
313 cl_page_free(env
, page
);
314 page
= ERR_PTR(result
);
320 page
= ERR_PTR(-ENOMEM
);
326 * Returns a cl_page with index \a idx at the object \a o, and associated with
327 * the VM page \a vmpage.
329 * This is the main entry point into the cl_page caching interface. First, a
330 * cache (implemented as a per-object radix tree) is consulted. If page is
331 * found there, it is returned immediately. Otherwise new page is allocated
332 * and returned. In any case, additional reference to page is acquired.
334 * \see cl_object_find(), cl_lock_find()
336 static struct cl_page
*cl_page_find0(const struct lu_env
*env
,
338 pgoff_t idx
, struct page
*vmpage
,
339 enum cl_page_type type
,
340 struct cl_page
*parent
)
342 struct cl_page
*page
= NULL
;
343 struct cl_page
*ghost
= NULL
;
344 struct cl_object_header
*hdr
;
347 LASSERT(type
== CPT_CACHEABLE
|| type
== CPT_TRANSIENT
);
350 hdr
= cl_object_header(o
);
352 CDEBUG(D_PAGE
, "%lu@"DFID
" %p %lx %d\n",
353 idx
, PFID(&hdr
->coh_lu
.loh_fid
), vmpage
, vmpage
->private, type
);
355 if (type
== CPT_CACHEABLE
) {
357 * vmpage lock is used to protect the child/parent
360 KLASSERT(PageLocked(vmpage
));
362 * cl_vmpage_page() can be called here without any locks as
364 * - "vmpage" is locked (which prevents ->private from
365 * concurrent updates), and
367 * - "o" cannot be destroyed while current thread holds a
370 page
= cl_vmpage_page(vmpage
, o
);
373 cl_page_vmpage(env
, page
) == vmpage
&&
374 (void *)radix_tree_lookup(&hdr
->coh_tree
,
381 /* allocate and initialize cl_page */
382 page
= cl_page_alloc(env
, o
, idx
, vmpage
, type
);
386 if (type
== CPT_TRANSIENT
) {
388 LASSERT(!page
->cp_parent
);
389 page
->cp_parent
= parent
;
390 parent
->cp_child
= page
;
396 * XXX optimization: use radix_tree_preload() here, and change tree
397 * gfp mask to GFP_KERNEL in cl_object_header_init().
399 spin_lock(&hdr
->coh_page_guard
);
400 err
= radix_tree_insert(&hdr
->coh_tree
, idx
, page
);
404 * Noted by Jay: a lock on \a vmpage protects cl_page_find()
405 * from this race, but
407 * 0. it's better to have cl_page interface "locally
408 * consistent" so that its correctness can be reasoned
409 * about without appealing to the (obscure world of) VM
412 * 1. handling this race allows ->coh_tree to remain
413 * consistent even when VM locking is somehow busted,
414 * which is very useful during diagnosing and debugging.
417 CL_PAGE_DEBUG(D_ERROR
, env
, ghost
,
418 "fail to insert into radix tree: %d\n", err
);
421 LASSERT(!page
->cp_parent
);
422 page
->cp_parent
= parent
;
423 parent
->cp_child
= page
;
427 spin_unlock(&hdr
->coh_page_guard
);
429 if (unlikely(ghost
)) {
430 cl_page_delete0(env
, ghost
, 0);
431 cl_page_free(env
, ghost
);
436 struct cl_page
*cl_page_find(const struct lu_env
*env
, struct cl_object
*o
,
437 pgoff_t idx
, struct page
*vmpage
,
438 enum cl_page_type type
)
440 return cl_page_find0(env
, o
, idx
, vmpage
, type
, NULL
);
442 EXPORT_SYMBOL(cl_page_find
);
444 struct cl_page
*cl_page_find_sub(const struct lu_env
*env
, struct cl_object
*o
,
445 pgoff_t idx
, struct page
*vmpage
,
446 struct cl_page
*parent
)
448 return cl_page_find0(env
, o
, idx
, vmpage
, parent
->cp_type
, parent
);
450 EXPORT_SYMBOL(cl_page_find_sub
);
452 static inline int cl_page_invariant(const struct cl_page
*pg
)
454 struct cl_object_header
*header
;
455 struct cl_page
*parent
;
456 struct cl_page
*child
;
460 * Page invariant is protected by a VM lock.
462 LINVRNT(cl_page_is_vmlocked(NULL
, pg
));
464 header
= cl_object_header(pg
->cp_obj
);
465 parent
= pg
->cp_parent
;
466 child
= pg
->cp_child
;
467 owner
= pg
->cp_owner
;
469 return cl_page_in_use(pg
) &&
470 ergo(parent
, parent
->cp_child
== pg
) &&
471 ergo(child
, child
->cp_parent
== pg
) &&
472 ergo(child
, pg
->cp_obj
!= child
->cp_obj
) &&
473 ergo(parent
, pg
->cp_obj
!= parent
->cp_obj
) &&
474 ergo(owner
&& parent
,
475 parent
->cp_owner
== pg
->cp_owner
->ci_parent
) &&
476 ergo(owner
&& child
, child
->cp_owner
->ci_parent
== owner
) &&
478 * Either page is early in initialization (has neither child
479 * nor parent yet), or it is in the object radix tree.
481 ergo(pg
->cp_state
< CPS_FREEING
&& pg
->cp_type
== CPT_CACHEABLE
,
482 (void *)radix_tree_lookup(&header
->coh_tree
,
483 pg
->cp_index
) == pg
||
484 (!child
&& !parent
));
487 static void cl_page_state_set0(const struct lu_env
*env
,
488 struct cl_page
*page
, enum cl_page_state state
)
490 enum cl_page_state old
;
493 * Matrix of allowed state transitions [old][new], for sanity
496 static const int allowed_transitions
[CPS_NR
][CPS_NR
] = {
499 [CPS_OWNED
] = 1, /* io finds existing cached page */
501 [CPS_PAGEOUT
] = 1, /* write-out from the cache */
502 [CPS_FREEING
] = 1, /* eviction on the memory pressure */
505 [CPS_CACHED
] = 1, /* release to the cache */
507 [CPS_PAGEIN
] = 1, /* start read immediately */
508 [CPS_PAGEOUT
] = 1, /* start write immediately */
509 [CPS_FREEING
] = 1, /* lock invalidation or truncate */
512 [CPS_CACHED
] = 1, /* io completion */
519 [CPS_CACHED
] = 1, /* io completion */
534 old
= page
->cp_state
;
535 PASSERT(env
, page
, allowed_transitions
[old
][state
]);
536 CL_PAGE_HEADER(D_TRACE
, env
, page
, "%d -> %d\n", old
, state
);
537 for (; page
; page
= page
->cp_child
) {
538 PASSERT(env
, page
, page
->cp_state
== old
);
540 equi(state
== CPS_OWNED
, page
->cp_owner
));
542 cl_page_state_set_trust(page
, state
);
546 static void cl_page_state_set(const struct lu_env
*env
,
547 struct cl_page
*page
, enum cl_page_state state
)
549 cl_page_state_set0(env
, page
, state
);
553 * Acquires an additional reference to a page.
555 * This can be called only by caller already possessing a reference to \a
558 * \see cl_object_get(), cl_lock_get().
560 void cl_page_get(struct cl_page
*page
)
562 cl_page_get_trust(page
);
564 EXPORT_SYMBOL(cl_page_get
);
567 * Releases a reference to a page.
569 * When last reference is released, page is returned to the cache, unless it
570 * is in cl_page_state::CPS_FREEING state, in which case it is immediately
573 * \see cl_object_put(), cl_lock_put().
575 void cl_page_put(const struct lu_env
*env
, struct cl_page
*page
)
577 PASSERT(env
, page
, atomic_read(&page
->cp_ref
) > !!page
->cp_parent
);
579 CL_PAGE_HEADER(D_TRACE
, env
, page
, "%d\n",
580 atomic_read(&page
->cp_ref
));
582 if (atomic_dec_and_test(&page
->cp_ref
)) {
583 LASSERT(page
->cp_state
== CPS_FREEING
);
585 LASSERT(atomic_read(&page
->cp_ref
) == 0);
586 PASSERT(env
, page
, !page
->cp_owner
);
587 PASSERT(env
, page
, list_empty(&page
->cp_batch
));
589 * Page is no longer reachable by other threads. Tear
592 cl_page_free(env
, page
);
595 EXPORT_SYMBOL(cl_page_put
);
598 * Returns a VM page associated with a given cl_page.
600 struct page
*cl_page_vmpage(const struct lu_env
*env
, struct cl_page
*page
)
602 const struct cl_page_slice
*slice
;
605 * Find uppermost layer with ->cpo_vmpage() method, and return its
608 page
= cl_page_top(page
);
610 list_for_each_entry(slice
, &page
->cp_layers
, cpl_linkage
) {
611 if (slice
->cpl_ops
->cpo_vmpage
)
612 return slice
->cpl_ops
->cpo_vmpage(env
, slice
);
614 page
= page
->cp_child
;
616 LBUG(); /* ->cpo_vmpage() has to be defined somewhere in the stack */
618 EXPORT_SYMBOL(cl_page_vmpage
);
621 * Returns a cl_page associated with a VM page, and given cl_object.
623 struct cl_page
*cl_vmpage_page(struct page
*vmpage
, struct cl_object
*obj
)
626 struct cl_page
*page
;
628 KLASSERT(PageLocked(vmpage
));
631 * NOTE: absence of races and liveness of data are guaranteed by page
632 * lock on a "vmpage". That works because object destruction has
633 * bottom-to-top pass.
637 * This loop assumes that ->private points to the top-most page. This
638 * can be rectified easily.
640 top
= (struct cl_page
*)vmpage
->private;
644 for (page
= top
; page
; page
= page
->cp_child
) {
645 if (cl_object_same(page
->cp_obj
, obj
)) {
646 cl_page_get_trust(page
);
650 LASSERT(ergo(page
, page
->cp_type
== CPT_CACHEABLE
));
653 EXPORT_SYMBOL(cl_vmpage_page
);
656 * Returns the top-page for a given page.
658 * \see cl_object_top(), cl_io_top()
660 struct cl_page
*cl_page_top(struct cl_page
*page
)
662 return cl_page_top_trusted(page
);
664 EXPORT_SYMBOL(cl_page_top
);
666 const struct cl_page_slice
*cl_page_at(const struct cl_page
*page
,
667 const struct lu_device_type
*dtype
)
669 return cl_page_at_trusted(page
, dtype
);
671 EXPORT_SYMBOL(cl_page_at
);
673 #define CL_PAGE_OP(opname) offsetof(struct cl_page_operations, opname)
675 #define CL_PAGE_INVOKE(_env, _page, _op, _proto, ...) \
677 const struct lu_env *__env = (_env); \
678 struct cl_page *__page = (_page); \
679 const struct cl_page_slice *__scan; \
681 ptrdiff_t __op = (_op); \
682 int (*__method)_proto; \
685 __page = cl_page_top(__page); \
687 list_for_each_entry(__scan, &__page->cp_layers, \
689 __method = *(void **)((char *)__scan->cpl_ops + \
692 __result = (*__method)(__env, __scan, \
698 __page = __page->cp_child; \
699 } while (__page && __result == 0); \
705 #define CL_PAGE_INVOID(_env, _page, _op, _proto, ...) \
707 const struct lu_env *__env = (_env); \
708 struct cl_page *__page = (_page); \
709 const struct cl_page_slice *__scan; \
710 ptrdiff_t __op = (_op); \
711 void (*__method)_proto; \
713 __page = cl_page_top(__page); \
715 list_for_each_entry(__scan, &__page->cp_layers, \
717 __method = *(void **)((char *)__scan->cpl_ops + \
720 (*__method)(__env, __scan, \
723 __page = __page->cp_child; \
727 #define CL_PAGE_INVOID_REVERSE(_env, _page, _op, _proto, ...) \
729 const struct lu_env *__env = (_env); \
730 struct cl_page *__page = (_page); \
731 const struct cl_page_slice *__scan; \
732 ptrdiff_t __op = (_op); \
733 void (*__method)_proto; \
735 /* get to the bottom page. */ \
736 while (__page->cp_child) \
737 __page = __page->cp_child; \
739 list_for_each_entry_reverse(__scan, &__page->cp_layers, \
741 __method = *(void **)((char *)__scan->cpl_ops + \
744 (*__method)(__env, __scan, \
747 __page = __page->cp_parent; \
751 static int cl_page_invoke(const struct lu_env
*env
,
752 struct cl_io
*io
, struct cl_page
*page
, ptrdiff_t op
)
755 PINVRNT(env
, page
, cl_object_same(page
->cp_obj
, io
->ci_obj
));
756 return CL_PAGE_INVOKE(env
, page
, op
,
757 (const struct lu_env
*,
758 const struct cl_page_slice
*, struct cl_io
*),
762 static void cl_page_invoid(const struct lu_env
*env
,
763 struct cl_io
*io
, struct cl_page
*page
, ptrdiff_t op
)
766 PINVRNT(env
, page
, cl_object_same(page
->cp_obj
, io
->ci_obj
));
767 CL_PAGE_INVOID(env
, page
, op
,
768 (const struct lu_env
*,
769 const struct cl_page_slice
*, struct cl_io
*), io
);
772 static void cl_page_owner_clear(struct cl_page
*page
)
774 for (page
= cl_page_top(page
); page
; page
= page
->cp_child
) {
775 if (page
->cp_owner
) {
776 LASSERT(page
->cp_owner
->ci_owned_nr
> 0);
777 page
->cp_owner
->ci_owned_nr
--;
778 page
->cp_owner
= NULL
;
779 page
->cp_task
= NULL
;
784 static void cl_page_owner_set(struct cl_page
*page
)
786 for (page
= cl_page_top(page
); page
; page
= page
->cp_child
)
787 page
->cp_owner
->ci_owned_nr
++;
790 void cl_page_disown0(const struct lu_env
*env
,
791 struct cl_io
*io
, struct cl_page
*pg
)
793 enum cl_page_state state
;
795 state
= pg
->cp_state
;
796 PINVRNT(env
, pg
, state
== CPS_OWNED
|| state
== CPS_FREEING
);
797 PINVRNT(env
, pg
, cl_page_invariant(pg
));
798 cl_page_owner_clear(pg
);
800 if (state
== CPS_OWNED
)
801 cl_page_state_set(env
, pg
, CPS_CACHED
);
803 * Completion call-backs are executed in the bottom-up order, so that
804 * uppermost layer (llite), responsible for VFS/VM interaction runs
805 * last and can release locks safely.
807 CL_PAGE_INVOID_REVERSE(env
, pg
, CL_PAGE_OP(cpo_disown
),
808 (const struct lu_env
*,
809 const struct cl_page_slice
*, struct cl_io
*),
814 * returns true, iff page is owned by the given io.
816 int cl_page_is_owned(const struct cl_page
*pg
, const struct cl_io
*io
)
818 LINVRNT(cl_object_same(pg
->cp_obj
, io
->ci_obj
));
819 return pg
->cp_state
== CPS_OWNED
&& pg
->cp_owner
== io
;
821 EXPORT_SYMBOL(cl_page_is_owned
);
824 * Try to own a page by IO.
826 * Waits until page is in cl_page_state::CPS_CACHED state, and then switch it
827 * into cl_page_state::CPS_OWNED state.
829 * \pre !cl_page_is_owned(pg, io)
830 * \post result == 0 iff cl_page_is_owned(pg, io)
834 * \retval -ve failure, e.g., page was destroyed (and landed in
835 * cl_page_state::CPS_FREEING instead of cl_page_state::CPS_CACHED).
836 * or, page was owned by another thread, or in IO.
838 * \see cl_page_disown()
839 * \see cl_page_operations::cpo_own()
840 * \see cl_page_own_try()
843 static int cl_page_own0(const struct lu_env
*env
, struct cl_io
*io
,
844 struct cl_page
*pg
, int nonblock
)
848 PINVRNT(env
, pg
, !cl_page_is_owned(pg
, io
));
850 pg
= cl_page_top(pg
);
853 if (pg
->cp_state
== CPS_FREEING
) {
856 result
= CL_PAGE_INVOKE(env
, pg
, CL_PAGE_OP(cpo_own
),
857 (const struct lu_env
*,
858 const struct cl_page_slice
*,
859 struct cl_io
*, int),
862 PASSERT(env
, pg
, !pg
->cp_owner
);
863 PASSERT(env
, pg
, !pg
->cp_req
);
865 pg
->cp_task
= current
;
866 cl_page_owner_set(pg
);
867 if (pg
->cp_state
!= CPS_FREEING
) {
868 cl_page_state_set(env
, pg
, CPS_OWNED
);
870 cl_page_disown0(env
, io
, pg
);
875 PINVRNT(env
, pg
, ergo(result
== 0, cl_page_invariant(pg
)));
880 * Own a page, might be blocked.
882 * \see cl_page_own0()
884 int cl_page_own(const struct lu_env
*env
, struct cl_io
*io
, struct cl_page
*pg
)
886 return cl_page_own0(env
, io
, pg
, 0);
888 EXPORT_SYMBOL(cl_page_own
);
891 * Nonblock version of cl_page_own().
893 * \see cl_page_own0()
895 int cl_page_own_try(const struct lu_env
*env
, struct cl_io
*io
,
898 return cl_page_own0(env
, io
, pg
, 1);
900 EXPORT_SYMBOL(cl_page_own_try
);
903 * Assume page ownership.
905 * Called when page is already locked by the hosting VM.
907 * \pre !cl_page_is_owned(pg, io)
908 * \post cl_page_is_owned(pg, io)
910 * \see cl_page_operations::cpo_assume()
912 void cl_page_assume(const struct lu_env
*env
,
913 struct cl_io
*io
, struct cl_page
*pg
)
915 PINVRNT(env
, pg
, cl_object_same(pg
->cp_obj
, io
->ci_obj
));
917 pg
= cl_page_top(pg
);
920 cl_page_invoid(env
, io
, pg
, CL_PAGE_OP(cpo_assume
));
921 PASSERT(env
, pg
, !pg
->cp_owner
);
923 pg
->cp_task
= current
;
924 cl_page_owner_set(pg
);
925 cl_page_state_set(env
, pg
, CPS_OWNED
);
927 EXPORT_SYMBOL(cl_page_assume
);
930 * Releases page ownership without unlocking the page.
932 * Moves page into cl_page_state::CPS_CACHED without releasing a lock on the
933 * underlying VM page (as VM is supposed to do this itself).
935 * \pre cl_page_is_owned(pg, io)
936 * \post !cl_page_is_owned(pg, io)
938 * \see cl_page_assume()
940 void cl_page_unassume(const struct lu_env
*env
,
941 struct cl_io
*io
, struct cl_page
*pg
)
943 PINVRNT(env
, pg
, cl_page_is_owned(pg
, io
));
944 PINVRNT(env
, pg
, cl_page_invariant(pg
));
946 pg
= cl_page_top(pg
);
948 cl_page_owner_clear(pg
);
949 cl_page_state_set(env
, pg
, CPS_CACHED
);
950 CL_PAGE_INVOID_REVERSE(env
, pg
, CL_PAGE_OP(cpo_unassume
),
951 (const struct lu_env
*,
952 const struct cl_page_slice
*, struct cl_io
*),
955 EXPORT_SYMBOL(cl_page_unassume
);
958 * Releases page ownership.
960 * Moves page into cl_page_state::CPS_CACHED.
962 * \pre cl_page_is_owned(pg, io)
963 * \post !cl_page_is_owned(pg, io)
966 * \see cl_page_operations::cpo_disown()
968 void cl_page_disown(const struct lu_env
*env
,
969 struct cl_io
*io
, struct cl_page
*pg
)
971 PINVRNT(env
, pg
, cl_page_is_owned(pg
, io
));
973 pg
= cl_page_top(pg
);
975 cl_page_disown0(env
, io
, pg
);
977 EXPORT_SYMBOL(cl_page_disown
);
980 * Called when page is to be removed from the object, e.g., as a result of
983 * Calls cl_page_operations::cpo_discard() top-to-bottom.
985 * \pre cl_page_is_owned(pg, io)
987 * \see cl_page_operations::cpo_discard()
989 void cl_page_discard(const struct lu_env
*env
,
990 struct cl_io
*io
, struct cl_page
*pg
)
992 PINVRNT(env
, pg
, cl_page_is_owned(pg
, io
));
993 PINVRNT(env
, pg
, cl_page_invariant(pg
));
995 cl_page_invoid(env
, io
, pg
, CL_PAGE_OP(cpo_discard
));
997 EXPORT_SYMBOL(cl_page_discard
);
1000 * Version of cl_page_delete() that can be called for not fully constructed
1001 * pages, e.g,. in a error handling cl_page_find()->cl_page_delete0()
1002 * path. Doesn't check page invariant.
1004 static void cl_page_delete0(const struct lu_env
*env
, struct cl_page
*pg
,
1007 struct cl_page
*tmp
= pg
;
1009 PASSERT(env
, pg
, pg
== cl_page_top(pg
));
1010 PASSERT(env
, pg
, pg
->cp_state
!= CPS_FREEING
);
1013 * Severe all ways to obtain new pointers to @pg.
1015 cl_page_owner_clear(pg
);
1018 * unexport the page firstly before freeing it so that
1019 * the page content is considered to be invalid.
1020 * We have to do this because a CPS_FREEING cl_page may
1021 * be NOT under the protection of a cl_lock.
1022 * Afterwards, if this page is found by other threads, then this
1023 * page will be forced to reread.
1025 cl_page_export(env
, pg
, 0);
1026 cl_page_state_set0(env
, pg
, CPS_FREEING
);
1028 CL_PAGE_INVOID(env
, pg
, CL_PAGE_OP(cpo_delete
),
1029 (const struct lu_env
*, const struct cl_page_slice
*));
1031 if (tmp
->cp_type
== CPT_CACHEABLE
) {
1033 /* !radix means that @pg is not yet in the radix tree,
1037 for (; tmp
; tmp
= tmp
->cp_child
) {
1039 struct cl_object_header
*hdr
;
1041 hdr
= cl_object_header(tmp
->cp_obj
);
1042 spin_lock(&hdr
->coh_page_guard
);
1043 value
= radix_tree_delete(&hdr
->coh_tree
,
1045 PASSERT(env
, tmp
, value
== tmp
);
1046 PASSERT(env
, tmp
, hdr
->coh_pages
> 0);
1048 spin_unlock(&hdr
->coh_page_guard
);
1049 cl_page_put(env
, tmp
);
1055 * Called when a decision is made to throw page out of memory.
1057 * Notifies all layers about page destruction by calling
1058 * cl_page_operations::cpo_delete() method top-to-bottom.
1060 * Moves page into cl_page_state::CPS_FREEING state (this is the only place
1061 * where transition to this state happens).
1063 * Eliminates all venues through which new references to the page can be
1066 * - removes page from the radix trees,
1068 * - breaks linkage from VM page to cl_page.
1070 * Once page reaches cl_page_state::CPS_FREEING, all remaining references will
1071 * drain after some time, at which point page will be recycled.
1073 * \pre pg == cl_page_top(pg)
1074 * \pre VM page is locked
1075 * \post pg->cp_state == CPS_FREEING
1077 * \see cl_page_operations::cpo_delete()
1079 void cl_page_delete(const struct lu_env
*env
, struct cl_page
*pg
)
1081 PINVRNT(env
, pg
, cl_page_invariant(pg
));
1082 cl_page_delete0(env
, pg
, 1);
1084 EXPORT_SYMBOL(cl_page_delete
);
1087 * Unmaps page from user virtual memory.
1089 * Calls cl_page_operations::cpo_unmap() through all layers top-to-bottom. The
1090 * layer responsible for VM interaction has to unmap page from user space
1093 * \see cl_page_operations::cpo_unmap()
1095 int cl_page_unmap(const struct lu_env
*env
,
1096 struct cl_io
*io
, struct cl_page
*pg
)
1098 PINVRNT(env
, pg
, cl_page_is_owned(pg
, io
));
1099 PINVRNT(env
, pg
, cl_page_invariant(pg
));
1101 return cl_page_invoke(env
, io
, pg
, CL_PAGE_OP(cpo_unmap
));
1103 EXPORT_SYMBOL(cl_page_unmap
);
1106 * Marks page up-to-date.
1108 * Call cl_page_operations::cpo_export() through all layers top-to-bottom. The
1109 * layer responsible for VM interaction has to mark/clear page as up-to-date
1110 * by the \a uptodate argument.
1112 * \see cl_page_operations::cpo_export()
1114 void cl_page_export(const struct lu_env
*env
, struct cl_page
*pg
, int uptodate
)
1116 PINVRNT(env
, pg
, cl_page_invariant(pg
));
1117 CL_PAGE_INVOID(env
, pg
, CL_PAGE_OP(cpo_export
),
1118 (const struct lu_env
*,
1119 const struct cl_page_slice
*, int), uptodate
);
1121 EXPORT_SYMBOL(cl_page_export
);
1124 * Returns true, iff \a pg is VM locked in a suitable sense by the calling
1127 int cl_page_is_vmlocked(const struct lu_env
*env
, const struct cl_page
*pg
)
1130 const struct cl_page_slice
*slice
;
1132 pg
= cl_page_top_trusted((struct cl_page
*)pg
);
1133 slice
= container_of(pg
->cp_layers
.next
,
1134 const struct cl_page_slice
, cpl_linkage
);
1135 PASSERT(env
, pg
, slice
->cpl_ops
->cpo_is_vmlocked
);
1137 * Call ->cpo_is_vmlocked() directly instead of going through
1138 * CL_PAGE_INVOKE(), because cl_page_is_vmlocked() is used by
1139 * cl_page_invariant().
1141 result
= slice
->cpl_ops
->cpo_is_vmlocked(env
, slice
);
1142 PASSERT(env
, pg
, result
== -EBUSY
|| result
== -ENODATA
);
1143 return result
== -EBUSY
;
1145 EXPORT_SYMBOL(cl_page_is_vmlocked
);
1147 static enum cl_page_state
cl_req_type_state(enum cl_req_type crt
)
1149 return crt
== CRT_WRITE
? CPS_PAGEOUT
: CPS_PAGEIN
;
1152 static void cl_page_io_start(const struct lu_env
*env
,
1153 struct cl_page
*pg
, enum cl_req_type crt
)
1156 * Page is queued for IO, change its state.
1158 cl_page_owner_clear(pg
);
1159 cl_page_state_set(env
, pg
, cl_req_type_state(crt
));
1163 * Prepares page for immediate transfer. cl_page_operations::cpo_prep() is
1164 * called top-to-bottom. Every layer either agrees to submit this page (by
1165 * returning 0), or requests to omit this page (by returning -EALREADY). Layer
1166 * handling interactions with the VM also has to inform VM that page is under
1169 int cl_page_prep(const struct lu_env
*env
, struct cl_io
*io
,
1170 struct cl_page
*pg
, enum cl_req_type crt
)
1174 PINVRNT(env
, pg
, cl_page_is_owned(pg
, io
));
1175 PINVRNT(env
, pg
, cl_page_invariant(pg
));
1176 PINVRNT(env
, pg
, crt
< CRT_NR
);
1179 * XXX this has to be called bottom-to-top, so that llite can set up
1180 * PG_writeback without risking other layers deciding to skip this
1185 result
= cl_page_invoke(env
, io
, pg
, CL_PAGE_OP(io
[crt
].cpo_prep
));
1187 cl_page_io_start(env
, pg
, crt
);
1189 CL_PAGE_HEADER(D_TRACE
, env
, pg
, "%d %d\n", crt
, result
);
1192 EXPORT_SYMBOL(cl_page_prep
);
1195 * Notify layers about transfer completion.
1197 * Invoked by transfer sub-system (which is a part of osc) to notify layers
1198 * that a transfer, of which this page is a part of has completed.
1200 * Completion call-backs are executed in the bottom-up order, so that
1201 * uppermost layer (llite), responsible for the VFS/VM interaction runs last
1202 * and can release locks safely.
1204 * \pre pg->cp_state == CPS_PAGEIN || pg->cp_state == CPS_PAGEOUT
1205 * \post pg->cp_state == CPS_CACHED
1207 * \see cl_page_operations::cpo_completion()
1209 void cl_page_completion(const struct lu_env
*env
,
1210 struct cl_page
*pg
, enum cl_req_type crt
, int ioret
)
1212 struct cl_sync_io
*anchor
= pg
->cp_sync_io
;
1214 PASSERT(env
, pg
, crt
< CRT_NR
);
1215 /* cl_page::cp_req already cleared by the caller (osc_completion()) */
1216 PASSERT(env
, pg
, !pg
->cp_req
);
1217 PASSERT(env
, pg
, pg
->cp_state
== cl_req_type_state(crt
));
1219 CL_PAGE_HEADER(D_TRACE
, env
, pg
, "%d %d\n", crt
, ioret
);
1220 if (crt
== CRT_READ
&& ioret
== 0) {
1221 PASSERT(env
, pg
, !(pg
->cp_flags
& CPF_READ_COMPLETED
));
1222 pg
->cp_flags
|= CPF_READ_COMPLETED
;
1225 cl_page_state_set(env
, pg
, CPS_CACHED
);
1228 CL_PAGE_INVOID_REVERSE(env
, pg
, CL_PAGE_OP(io
[crt
].cpo_completion
),
1229 (const struct lu_env
*,
1230 const struct cl_page_slice
*, int), ioret
);
1232 LASSERT(cl_page_is_vmlocked(env
, pg
));
1233 LASSERT(pg
->cp_sync_io
== anchor
);
1234 pg
->cp_sync_io
= NULL
;
1237 * As page->cp_obj is pinned by a reference from page->cp_req, it is
1238 * safe to call cl_page_put() without risking object destruction in a
1239 * non-blocking context.
1241 cl_page_put(env
, pg
);
1244 cl_sync_io_note(anchor
, ioret
);
1246 EXPORT_SYMBOL(cl_page_completion
);
1249 * Notify layers that transfer formation engine decided to yank this page from
1250 * the cache and to make it a part of a transfer.
1252 * \pre pg->cp_state == CPS_CACHED
1253 * \post pg->cp_state == CPS_PAGEIN || pg->cp_state == CPS_PAGEOUT
1255 * \see cl_page_operations::cpo_make_ready()
1257 int cl_page_make_ready(const struct lu_env
*env
, struct cl_page
*pg
,
1258 enum cl_req_type crt
)
1262 PINVRNT(env
, pg
, crt
< CRT_NR
);
1266 result
= CL_PAGE_INVOKE(env
, pg
, CL_PAGE_OP(io
[crt
].cpo_make_ready
),
1267 (const struct lu_env
*,
1268 const struct cl_page_slice
*));
1270 PASSERT(env
, pg
, pg
->cp_state
== CPS_CACHED
);
1271 cl_page_io_start(env
, pg
, crt
);
1273 CL_PAGE_HEADER(D_TRACE
, env
, pg
, "%d %d\n", crt
, result
);
1276 EXPORT_SYMBOL(cl_page_make_ready
);
1279 * Notify layers that high level io decided to place this page into a cache
1280 * for future transfer.
1282 * The layer implementing transfer engine (osc) has to register this page in
1285 * \pre cl_page_is_owned(pg, io)
1286 * \post cl_page_is_owned(pg, io)
1288 * \see cl_page_operations::cpo_cache_add()
1290 int cl_page_cache_add(const struct lu_env
*env
, struct cl_io
*io
,
1291 struct cl_page
*pg
, enum cl_req_type crt
)
1293 const struct cl_page_slice
*scan
;
1296 PINVRNT(env
, pg
, crt
< CRT_NR
);
1297 PINVRNT(env
, pg
, cl_page_is_owned(pg
, io
));
1298 PINVRNT(env
, pg
, cl_page_invariant(pg
));
1303 list_for_each_entry(scan
, &pg
->cp_layers
, cpl_linkage
) {
1304 if (!scan
->cpl_ops
->io
[crt
].cpo_cache_add
)
1307 result
= scan
->cpl_ops
->io
[crt
].cpo_cache_add(env
, scan
, io
);
1311 CL_PAGE_HEADER(D_TRACE
, env
, pg
, "%d %d\n", crt
, result
);
1314 EXPORT_SYMBOL(cl_page_cache_add
);
1317 * Called if a pge is being written back by kernel's intention.
1319 * \pre cl_page_is_owned(pg, io)
1320 * \post ergo(result == 0, pg->cp_state == CPS_PAGEOUT)
1322 * \see cl_page_operations::cpo_flush()
1324 int cl_page_flush(const struct lu_env
*env
, struct cl_io
*io
,
1329 PINVRNT(env
, pg
, cl_page_is_owned(pg
, io
));
1330 PINVRNT(env
, pg
, cl_page_invariant(pg
));
1332 result
= cl_page_invoke(env
, io
, pg
, CL_PAGE_OP(cpo_flush
));
1334 CL_PAGE_HEADER(D_TRACE
, env
, pg
, "%d\n", result
);
1337 EXPORT_SYMBOL(cl_page_flush
);
1340 * Checks whether page is protected by any extent lock is at least required
1343 * \return the same as in cl_page_operations::cpo_is_under_lock() method.
1344 * \see cl_page_operations::cpo_is_under_lock()
1346 int cl_page_is_under_lock(const struct lu_env
*env
, struct cl_io
*io
,
1347 struct cl_page
*page
)
1351 PINVRNT(env
, page
, cl_page_invariant(page
));
1353 rc
= CL_PAGE_INVOKE(env
, page
, CL_PAGE_OP(cpo_is_under_lock
),
1354 (const struct lu_env
*,
1355 const struct cl_page_slice
*, struct cl_io
*),
1357 PASSERT(env
, page
, rc
!= 0);
1360 EXPORT_SYMBOL(cl_page_is_under_lock
);
1362 static int page_prune_cb(const struct lu_env
*env
, struct cl_io
*io
,
1363 struct cl_page
*page
, void *cbdata
)
1365 cl_page_own(env
, io
, page
);
1366 cl_page_unmap(env
, io
, page
);
1367 cl_page_discard(env
, io
, page
);
1368 cl_page_disown(env
, io
, page
);
1369 return CLP_GANG_OKAY
;
1373 * Purges all cached pages belonging to the object \a obj.
1375 int cl_pages_prune(const struct lu_env
*env
, struct cl_object
*clobj
)
1377 struct cl_thread_info
*info
;
1378 struct cl_object
*obj
= cl_object_top(clobj
);
1382 info
= cl_env_info(env
);
1386 * initialize the io. This is ugly since we never do IO in this
1387 * function, we just make cl_page_list functions happy. -jay
1390 io
->ci_ignore_layout
= 1;
1391 result
= cl_io_init(env
, io
, CIT_MISC
, obj
);
1393 cl_io_fini(env
, io
);
1394 return io
->ci_result
;
1398 result
= cl_page_gang_lookup(env
, obj
, io
, 0, CL_PAGE_EOF
,
1399 page_prune_cb
, NULL
);
1400 if (result
== CLP_GANG_RESCHED
)
1402 } while (result
!= CLP_GANG_OKAY
);
1404 cl_io_fini(env
, io
);
1407 EXPORT_SYMBOL(cl_pages_prune
);
1410 * Tells transfer engine that only part of a page is to be transmitted.
1412 * \see cl_page_operations::cpo_clip()
1414 void cl_page_clip(const struct lu_env
*env
, struct cl_page
*pg
,
1417 PINVRNT(env
, pg
, cl_page_invariant(pg
));
1419 CL_PAGE_HEADER(D_TRACE
, env
, pg
, "%d %d\n", from
, to
);
1420 CL_PAGE_INVOID(env
, pg
, CL_PAGE_OP(cpo_clip
),
1421 (const struct lu_env
*,
1422 const struct cl_page_slice
*, int, int),
1425 EXPORT_SYMBOL(cl_page_clip
);
1428 * Prints human readable representation of \a pg to the \a f.
1430 void cl_page_header_print(const struct lu_env
*env
, void *cookie
,
1431 lu_printer_t printer
, const struct cl_page
*pg
)
1433 (*printer
)(env
, cookie
,
1434 "page@%p[%d %p:%lu ^%p_%p %d %d %d %p %p %#x]\n",
1435 pg
, atomic_read(&pg
->cp_ref
), pg
->cp_obj
,
1436 pg
->cp_index
, pg
->cp_parent
, pg
->cp_child
,
1437 pg
->cp_state
, pg
->cp_error
, pg
->cp_type
,
1438 pg
->cp_owner
, pg
->cp_req
, pg
->cp_flags
);
1440 EXPORT_SYMBOL(cl_page_header_print
);
1443 * Prints human readable representation of \a pg to the \a f.
1445 void cl_page_print(const struct lu_env
*env
, void *cookie
,
1446 lu_printer_t printer
, const struct cl_page
*pg
)
1448 struct cl_page
*scan
;
1450 for (scan
= cl_page_top((struct cl_page
*)pg
); scan
;
1451 scan
= scan
->cp_child
)
1452 cl_page_header_print(env
, cookie
, printer
, scan
);
1453 CL_PAGE_INVOKE(env
, (struct cl_page
*)pg
, CL_PAGE_OP(cpo_print
),
1454 (const struct lu_env
*env
,
1455 const struct cl_page_slice
*slice
,
1456 void *cookie
, lu_printer_t p
), cookie
, printer
);
1457 (*printer
)(env
, cookie
, "end page@%p\n", pg
);
1459 EXPORT_SYMBOL(cl_page_print
);
1462 * Cancel a page which is still in a transfer.
1464 int cl_page_cancel(const struct lu_env
*env
, struct cl_page
*page
)
1466 return CL_PAGE_INVOKE(env
, page
, CL_PAGE_OP(cpo_cancel
),
1467 (const struct lu_env
*,
1468 const struct cl_page_slice
*));
1470 EXPORT_SYMBOL(cl_page_cancel
);
1473 * Converts a byte offset within object \a obj into a page index.
1475 loff_t
cl_offset(const struct cl_object
*obj
, pgoff_t idx
)
1480 return (loff_t
)idx
<< PAGE_SHIFT
;
1482 EXPORT_SYMBOL(cl_offset
);
1485 * Converts a page index into a byte offset within object \a obj.
1487 pgoff_t
cl_index(const struct cl_object
*obj
, loff_t offset
)
1492 return offset
>> PAGE_SHIFT
;
1494 EXPORT_SYMBOL(cl_index
);
1496 int cl_page_size(const struct cl_object
*obj
)
1498 return 1 << PAGE_SHIFT
;
1500 EXPORT_SYMBOL(cl_page_size
);
1503 * Adds page slice to the compound page.
1505 * This is called by cl_object_operations::coo_page_init() methods to add a
1506 * per-layer state to the page. New state is added at the end of
1507 * cl_page::cp_layers list, that is, it is at the bottom of the stack.
1509 * \see cl_lock_slice_add(), cl_req_slice_add(), cl_io_slice_add()
1511 void cl_page_slice_add(struct cl_page
*page
, struct cl_page_slice
*slice
,
1512 struct cl_object
*obj
,
1513 const struct cl_page_operations
*ops
)
1515 list_add_tail(&slice
->cpl_linkage
, &page
->cp_layers
);
1516 slice
->cpl_obj
= obj
;
1517 slice
->cpl_ops
= ops
;
1518 slice
->cpl_page
= page
;
1520 EXPORT_SYMBOL(cl_page_slice_add
);
1522 int cl_page_init(void)
1527 void cl_page_fini(void)
This page took 0.082533 seconds and 5 git commands to generate.