Merge tag 'clk-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[deliverable/linux.git] / drivers / staging / lustre / lustre / obdclass / cl_page.c
CommitLineData
d7e09d03
PT
1/*
2 * GPL HEADER START
3 *
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19 *
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
22 * have any questions.
23 *
24 * GPL HEADER END
25 */
26/*
27 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
29 *
1dc563a6 30 * Copyright (c) 2011, 2015, Intel Corporation.
d7e09d03
PT
31 */
32/*
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
35 *
36 * Client Lustre Page.
37 *
38 * Author: Nikita Danilov <nikita.danilov@sun.com>
39 */
40
41#define DEBUG_SUBSYSTEM S_CLASS
42
9fdaf8c0 43#include "../../include/linux/libcfs/libcfs.h"
610f7377
GKH
44#include "../include/obd_class.h"
45#include "../include/obd_support.h"
d7e09d03
PT
46#include <linux/list.h>
47
610f7377 48#include "../include/cl_object.h"
d7e09d03
PT
49#include "cl_internal.h"
50
51static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg,
52 int radix);
53
b16d335d
PL
54# define PASSERT(env, page, expr) \
55 do { \
56 if (unlikely(!(expr))) { \
57 CL_PAGE_DEBUG(D_ERROR, (env), (page), #expr "\n"); \
58 LASSERT(0); \
59 } \
648d6e6c 60 } while (0)
d7e09d03
PT
61
62# define PINVRNT(env, page, exp) \
63 ((void)sizeof(env), (void)sizeof(page), (void)sizeof !!(exp))
64
d7e09d03
PT
65/**
66 * Internal version of cl_page_top, it should be called if the page is
67 * known to be not freed, says with page referenced, or radix tree lock held,
68 * or page owned.
69 */
70static struct cl_page *cl_page_top_trusted(struct cl_page *page)
71{
cce3c2da 72 while (page->cp_parent)
d7e09d03
PT
73 page = page->cp_parent;
74 return page;
75}
76
77/**
78 * Internal version of cl_page_get().
79 *
80 * This function can be used to obtain initial reference to previously
81 * unreferenced cached object. It can be called only if concurrent page
82 * reclamation is somehow prevented, e.g., by locking page radix-tree
83 * (cl_object_header::hdr->coh_page_guard), or by keeping a lock on a VM page,
84 * associated with \a page.
85 *
86 * Use with care! Not exported.
87 */
88static void cl_page_get_trust(struct cl_page *page)
89{
90 LASSERT(atomic_read(&page->cp_ref) > 0);
91 atomic_inc(&page->cp_ref);
92}
93
94/**
95 * Returns a slice within a page, corresponding to the given layer in the
96 * device stack.
97 *
98 * \see cl_lock_at()
99 */
100static const struct cl_page_slice *
101cl_page_at_trusted(const struct cl_page *page,
102 const struct lu_device_type *dtype)
103{
104 const struct cl_page_slice *slice;
d7e09d03
PT
105
106 page = cl_page_top_trusted((struct cl_page *)page);
107 do {
108 list_for_each_entry(slice, &page->cp_layers, cpl_linkage) {
109 if (slice->cpl_obj->co_lu.lo_dev->ld_type == dtype)
0a3bdb00 110 return slice;
d7e09d03
PT
111 }
112 page = page->cp_child;
cce3c2da 113 } while (page);
0a3bdb00 114 return NULL;
d7e09d03
PT
115}
116
117/**
118 * Returns a page with given index in the given object, or NULL if no page is
119 * found. Acquires a reference on \a page.
120 *
121 * Locking: called under cl_object_header::coh_page_guard spin-lock.
122 */
123struct cl_page *cl_page_lookup(struct cl_object_header *hdr, pgoff_t index)
124{
125 struct cl_page *page;
126
5e42bc9d 127 assert_spin_locked(&hdr->coh_page_guard);
d7e09d03
PT
128
129 page = radix_tree_lookup(&hdr->coh_tree, index);
cce3c2da 130 if (page)
d7e09d03
PT
131 cl_page_get_trust(page);
132 return page;
133}
134EXPORT_SYMBOL(cl_page_lookup);
135
136/**
137 * Returns a list of pages by a given [start, end] of \a obj.
138 *
139 * \param resched If not NULL, then we give up before hogging CPU for too
140 * long and set *resched = 1, in that case caller should implement a retry
141 * logic.
142 *
143 * Gang tree lookup (radix_tree_gang_lookup()) optimization is absolutely
144 * crucial in the face of [offset, EOF] locks.
145 *
146 * Return at least one page in @queue unless there is no covered page.
147 */
148int cl_page_gang_lookup(const struct lu_env *env, struct cl_object *obj,
149 struct cl_io *io, pgoff_t start, pgoff_t end,
150 cl_page_gang_cb_t cb, void *cbdata)
151{
152 struct cl_object_header *hdr;
153 struct cl_page *page;
154 struct cl_page **pvec;
155 const struct cl_page_slice *slice;
156 const struct lu_device_type *dtype;
157 pgoff_t idx;
158 unsigned int nr;
159 unsigned int i;
160 unsigned int j;
161 int res = CLP_GANG_OKAY;
162 int tree_lock = 1;
d7e09d03
PT
163
164 idx = start;
165 hdr = cl_object_header(obj);
166 pvec = cl_env_info(env)->clt_pvec;
167 dtype = cl_object_top(obj)->co_lu.lo_dev->ld_type;
168 spin_lock(&hdr->coh_page_guard);
169 while ((nr = radix_tree_gang_lookup(&hdr->coh_tree, (void **)pvec,
170 idx, CLT_PVEC_SIZE)) > 0) {
171 int end_of_region = 0;
8de056a1 172
d7e09d03
PT
173 idx = pvec[nr - 1]->cp_index + 1;
174 for (i = 0, j = 0; i < nr; ++i) {
175 page = pvec[i];
176 pvec[i] = NULL;
177
178 LASSERT(page->cp_type == CPT_CACHEABLE);
179 if (page->cp_index > end) {
180 end_of_region = 1;
181 break;
182 }
183 if (page->cp_state == CPS_FREEING)
184 continue;
185
186 slice = cl_page_at_trusted(page, dtype);
187 /*
188 * Pages for lsm-less file has no underneath sub-page
189 * for osc, in case of ...
190 */
cce3c2da 191 PASSERT(env, page, slice);
d7e09d03
PT
192
193 page = slice->cpl_page;
194 /*
195 * Can safely call cl_page_get_trust() under
196 * radix-tree spin-lock.
197 *
198 * XXX not true, because @page is from object another
199 * than @hdr and protected by different tree lock.
200 */
201 cl_page_get_trust(page);
202 lu_ref_add_atomic(&page->cp_reference,
203 "gang_lookup", current);
204 pvec[j++] = page;
205 }
206
207 /*
208 * Here a delicate locking dance is performed. Current thread
209 * holds a reference to a page, but has to own it before it
210 * can be placed into queue. Owning implies waiting, so
211 * radix-tree lock is to be released. After a wait one has to
212 * check that pages weren't truncated (cl_page_own() returns
213 * error in the latter case).
214 */
215 spin_unlock(&hdr->coh_page_guard);
216 tree_lock = 0;
217
218 for (i = 0; i < j; ++i) {
219 page = pvec[i];
220 if (res == CLP_GANG_OKAY)
221 res = (*cb)(env, io, page, cbdata);
222 lu_ref_del(&page->cp_reference,
223 "gang_lookup", current);
224 cl_page_put(env, page);
225 }
226 if (nr < CLT_PVEC_SIZE || end_of_region)
227 break;
228
229 if (res == CLP_GANG_OKAY && need_resched())
230 res = CLP_GANG_RESCHED;
231 if (res != CLP_GANG_OKAY)
232 break;
233
234 spin_lock(&hdr->coh_page_guard);
235 tree_lock = 1;
236 }
237 if (tree_lock)
238 spin_unlock(&hdr->coh_page_guard);
0a3bdb00 239 return res;
d7e09d03
PT
240}
241EXPORT_SYMBOL(cl_page_gang_lookup);
242
243static void cl_page_free(const struct lu_env *env, struct cl_page *page)
244{
245 struct cl_object *obj = page->cp_obj;
d7e09d03
PT
246
247 PASSERT(env, page, list_empty(&page->cp_batch));
cce3c2da
OD
248 PASSERT(env, page, !page->cp_owner);
249 PASSERT(env, page, !page->cp_req);
250 PASSERT(env, page, !page->cp_parent);
d7e09d03
PT
251 PASSERT(env, page, page->cp_state == CPS_FREEING);
252
d7e09d03
PT
253 might_sleep();
254 while (!list_empty(&page->cp_layers)) {
255 struct cl_page_slice *slice;
256
257 slice = list_entry(page->cp_layers.next,
926d6fb2 258 struct cl_page_slice, cpl_linkage);
d7e09d03
PT
259 list_del_init(page->cp_layers.next);
260 slice->cpl_ops->cpo_fini(env, slice);
261 }
631abc6e 262 lu_object_ref_del_at(&obj->co_lu, &page->cp_obj_ref, "cl_page", page);
d7e09d03
PT
263 cl_object_put(env, obj);
264 lu_ref_fini(&page->cp_reference);
d7279044 265 kfree(page);
d7e09d03
PT
266}
267
268/**
269 * Helper function updating page state. This is the only place in the code
270 * where cl_page::cp_state field is mutated.
271 */
272static inline void cl_page_state_set_trust(struct cl_page *page,
273 enum cl_page_state state)
274{
275 /* bypass const. */
276 *(enum cl_page_state *)&page->cp_state = state;
277}
278
279static struct cl_page *cl_page_alloc(const struct lu_env *env,
926d6fb2
OD
280 struct cl_object *o, pgoff_t ind,
281 struct page *vmpage,
282 enum cl_page_type type)
d7e09d03
PT
283{
284 struct cl_page *page;
285 struct lu_object_header *head;
286
d37c8781 287 page = kzalloc(cl_object_header(o)->coh_page_bufsize, GFP_NOFS);
cce3c2da 288 if (page) {
d7e09d03 289 int result = 0;
8de056a1 290
d7e09d03
PT
291 atomic_set(&page->cp_ref, 1);
292 if (type == CPT_CACHEABLE) /* for radix tree */
293 atomic_inc(&page->cp_ref);
294 page->cp_obj = o;
295 cl_object_get(o);
631abc6e
JH
296 lu_object_ref_add_at(&o->co_lu, &page->cp_obj_ref, "cl_page",
297 page);
d7e09d03
PT
298 page->cp_index = ind;
299 cl_page_state_set_trust(page, CPS_CACHED);
300 page->cp_type = type;
301 INIT_LIST_HEAD(&page->cp_layers);
302 INIT_LIST_HEAD(&page->cp_batch);
303 INIT_LIST_HEAD(&page->cp_flight);
304 mutex_init(&page->cp_mutex);
305 lu_ref_init(&page->cp_reference);
306 head = o->co_lu.lo_header;
926d6fb2 307 list_for_each_entry(o, &head->loh_layers, co_lu.lo_linkage) {
cce3c2da 308 if (o->co_ops->coo_page_init) {
d7e09d03
PT
309 result = o->co_ops->coo_page_init(env, o,
310 page, vmpage);
311 if (result != 0) {
312 cl_page_delete0(env, page, 0);
313 cl_page_free(env, page);
314 page = ERR_PTR(result);
315 break;
316 }
317 }
318 }
d7e09d03
PT
319 } else {
320 page = ERR_PTR(-ENOMEM);
321 }
0a3bdb00 322 return page;
d7e09d03
PT
323}
324
325/**
326 * Returns a cl_page with index \a idx at the object \a o, and associated with
327 * the VM page \a vmpage.
328 *
329 * This is the main entry point into the cl_page caching interface. First, a
330 * cache (implemented as a per-object radix tree) is consulted. If page is
331 * found there, it is returned immediately. Otherwise new page is allocated
332 * and returned. In any case, additional reference to page is acquired.
333 *
334 * \see cl_object_find(), cl_lock_find()
335 */
336static struct cl_page *cl_page_find0(const struct lu_env *env,
337 struct cl_object *o,
338 pgoff_t idx, struct page *vmpage,
339 enum cl_page_type type,
340 struct cl_page *parent)
341{
342 struct cl_page *page = NULL;
343 struct cl_page *ghost = NULL;
344 struct cl_object_header *hdr;
345 int err;
346
347 LASSERT(type == CPT_CACHEABLE || type == CPT_TRANSIENT);
348 might_sleep();
349
d7e09d03 350 hdr = cl_object_header(o);
d7e09d03
PT
351
352 CDEBUG(D_PAGE, "%lu@"DFID" %p %lx %d\n",
353 idx, PFID(&hdr->coh_lu.loh_fid), vmpage, vmpage->private, type);
354 /* fast path. */
355 if (type == CPT_CACHEABLE) {
437dffeb
PL
356 /*
357 * vmpage lock is used to protect the child/parent
358 * relationship
359 */
d7e09d03
PT
360 KLASSERT(PageLocked(vmpage));
361 /*
362 * cl_vmpage_page() can be called here without any locks as
363 *
364 * - "vmpage" is locked (which prevents ->private from
365 * concurrent updates), and
366 *
367 * - "o" cannot be destroyed while current thread holds a
368 * reference on it.
369 */
370 page = cl_vmpage_page(vmpage, o);
371 PINVRNT(env, page,
cce3c2da 372 ergo(page,
d7e09d03
PT
373 cl_page_vmpage(env, page) == vmpage &&
374 (void *)radix_tree_lookup(&hdr->coh_tree,
375 idx) == page));
376 }
377
cce3c2da 378 if (page)
0a3bdb00 379 return page;
d7e09d03
PT
380
381 /* allocate and initialize cl_page */
382 page = cl_page_alloc(env, o, idx, vmpage, type);
383 if (IS_ERR(page))
0a3bdb00 384 return page;
d7e09d03
PT
385
386 if (type == CPT_TRANSIENT) {
387 if (parent) {
cce3c2da 388 LASSERT(!page->cp_parent);
d7e09d03
PT
389 page->cp_parent = parent;
390 parent->cp_child = page;
391 }
0a3bdb00 392 return page;
d7e09d03
PT
393 }
394
395 /*
396 * XXX optimization: use radix_tree_preload() here, and change tree
397 * gfp mask to GFP_KERNEL in cl_object_header_init().
398 */
399 spin_lock(&hdr->coh_page_guard);
400 err = radix_tree_insert(&hdr->coh_tree, idx, page);
401 if (err != 0) {
402 ghost = page;
403 /*
404 * Noted by Jay: a lock on \a vmpage protects cl_page_find()
405 * from this race, but
406 *
407 * 0. it's better to have cl_page interface "locally
408 * consistent" so that its correctness can be reasoned
409 * about without appealing to the (obscure world of) VM
410 * locking.
411 *
412 * 1. handling this race allows ->coh_tree to remain
413 * consistent even when VM locking is somehow busted,
414 * which is very useful during diagnosing and debugging.
415 */
416 page = ERR_PTR(err);
417 CL_PAGE_DEBUG(D_ERROR, env, ghost,
418 "fail to insert into radix tree: %d\n", err);
419 } else {
420 if (parent) {
cce3c2da 421 LASSERT(!page->cp_parent);
d7e09d03
PT
422 page->cp_parent = parent;
423 parent->cp_child = page;
424 }
425 hdr->coh_pages++;
426 }
427 spin_unlock(&hdr->coh_page_guard);
428
cce3c2da 429 if (unlikely(ghost)) {
d7e09d03
PT
430 cl_page_delete0(env, ghost, 0);
431 cl_page_free(env, ghost);
432 }
0a3bdb00 433 return page;
d7e09d03
PT
434}
435
436struct cl_page *cl_page_find(const struct lu_env *env, struct cl_object *o,
437 pgoff_t idx, struct page *vmpage,
438 enum cl_page_type type)
439{
440 return cl_page_find0(env, o, idx, vmpage, type, NULL);
441}
442EXPORT_SYMBOL(cl_page_find);
443
d7e09d03
PT
444struct cl_page *cl_page_find_sub(const struct lu_env *env, struct cl_object *o,
445 pgoff_t idx, struct page *vmpage,
446 struct cl_page *parent)
447{
448 return cl_page_find0(env, o, idx, vmpage, parent->cp_type, parent);
449}
450EXPORT_SYMBOL(cl_page_find_sub);
451
452static inline int cl_page_invariant(const struct cl_page *pg)
453{
454 struct cl_object_header *header;
455 struct cl_page *parent;
456 struct cl_page *child;
457 struct cl_io *owner;
458
459 /*
460 * Page invariant is protected by a VM lock.
461 */
462 LINVRNT(cl_page_is_vmlocked(NULL, pg));
463
464 header = cl_object_header(pg->cp_obj);
465 parent = pg->cp_parent;
466 child = pg->cp_child;
467 owner = pg->cp_owner;
468
469 return cl_page_in_use(pg) &&
cce3c2da
OD
470 ergo(parent, parent->cp_child == pg) &&
471 ergo(child, child->cp_parent == pg) &&
472 ergo(child, pg->cp_obj != child->cp_obj) &&
473 ergo(parent, pg->cp_obj != parent->cp_obj) &&
474 ergo(owner && parent,
d7e09d03 475 parent->cp_owner == pg->cp_owner->ci_parent) &&
cce3c2da 476 ergo(owner && child, child->cp_owner->ci_parent == owner) &&
d7e09d03
PT
477 /*
478 * Either page is early in initialization (has neither child
479 * nor parent yet), or it is in the object radix tree.
480 */
481 ergo(pg->cp_state < CPS_FREEING && pg->cp_type == CPT_CACHEABLE,
482 (void *)radix_tree_lookup(&header->coh_tree,
483 pg->cp_index) == pg ||
cce3c2da 484 (!child && !parent));
d7e09d03
PT
485}
486
487static void cl_page_state_set0(const struct lu_env *env,
488 struct cl_page *page, enum cl_page_state state)
489{
490 enum cl_page_state old;
491
492 /*
493 * Matrix of allowed state transitions [old][new], for sanity
494 * checking.
495 */
496 static const int allowed_transitions[CPS_NR][CPS_NR] = {
497 [CPS_CACHED] = {
498 [CPS_CACHED] = 0,
499 [CPS_OWNED] = 1, /* io finds existing cached page */
500 [CPS_PAGEIN] = 0,
501 [CPS_PAGEOUT] = 1, /* write-out from the cache */
502 [CPS_FREEING] = 1, /* eviction on the memory pressure */
503 },
504 [CPS_OWNED] = {
505 [CPS_CACHED] = 1, /* release to the cache */
506 [CPS_OWNED] = 0,
507 [CPS_PAGEIN] = 1, /* start read immediately */
508 [CPS_PAGEOUT] = 1, /* start write immediately */
509 [CPS_FREEING] = 1, /* lock invalidation or truncate */
510 },
511 [CPS_PAGEIN] = {
512 [CPS_CACHED] = 1, /* io completion */
513 [CPS_OWNED] = 0,
514 [CPS_PAGEIN] = 0,
515 [CPS_PAGEOUT] = 0,
516 [CPS_FREEING] = 0,
517 },
518 [CPS_PAGEOUT] = {
519 [CPS_CACHED] = 1, /* io completion */
520 [CPS_OWNED] = 0,
521 [CPS_PAGEIN] = 0,
522 [CPS_PAGEOUT] = 0,
523 [CPS_FREEING] = 0,
524 },
525 [CPS_FREEING] = {
526 [CPS_CACHED] = 0,
527 [CPS_OWNED] = 0,
528 [CPS_PAGEIN] = 0,
529 [CPS_PAGEOUT] = 0,
530 [CPS_FREEING] = 0,
531 }
532 };
533
d7e09d03
PT
534 old = page->cp_state;
535 PASSERT(env, page, allowed_transitions[old][state]);
536 CL_PAGE_HEADER(D_TRACE, env, page, "%d -> %d\n", old, state);
cce3c2da 537 for (; page; page = page->cp_child) {
d7e09d03
PT
538 PASSERT(env, page, page->cp_state == old);
539 PASSERT(env, page,
cce3c2da 540 equi(state == CPS_OWNED, page->cp_owner));
d7e09d03 541
d7e09d03
PT
542 cl_page_state_set_trust(page, state);
543 }
d7e09d03
PT
544}
545
546static void cl_page_state_set(const struct lu_env *env,
547 struct cl_page *page, enum cl_page_state state)
548{
549 cl_page_state_set0(env, page, state);
550}
551
552/**
553 * Acquires an additional reference to a page.
554 *
555 * This can be called only by caller already possessing a reference to \a
556 * page.
557 *
558 * \see cl_object_get(), cl_lock_get().
559 */
560void cl_page_get(struct cl_page *page)
561{
d7e09d03 562 cl_page_get_trust(page);
d7e09d03
PT
563}
564EXPORT_SYMBOL(cl_page_get);
565
566/**
567 * Releases a reference to a page.
568 *
569 * When last reference is released, page is returned to the cache, unless it
570 * is in cl_page_state::CPS_FREEING state, in which case it is immediately
571 * destroyed.
572 *
573 * \see cl_object_put(), cl_lock_put().
574 */
575void cl_page_put(const struct lu_env *env, struct cl_page *page)
576{
577 PASSERT(env, page, atomic_read(&page->cp_ref) > !!page->cp_parent);
578
d7e09d03
PT
579 CL_PAGE_HEADER(D_TRACE, env, page, "%d\n",
580 atomic_read(&page->cp_ref));
581
582 if (atomic_dec_and_test(&page->cp_ref)) {
583 LASSERT(page->cp_state == CPS_FREEING);
584
585 LASSERT(atomic_read(&page->cp_ref) == 0);
cce3c2da 586 PASSERT(env, page, !page->cp_owner);
d7e09d03
PT
587 PASSERT(env, page, list_empty(&page->cp_batch));
588 /*
589 * Page is no longer reachable by other threads. Tear
590 * it down.
591 */
592 cl_page_free(env, page);
593 }
d7e09d03
PT
594}
595EXPORT_SYMBOL(cl_page_put);
596
597/**
598 * Returns a VM page associated with a given cl_page.
599 */
600struct page *cl_page_vmpage(const struct lu_env *env, struct cl_page *page)
601{
602 const struct cl_page_slice *slice;
603
604 /*
605 * Find uppermost layer with ->cpo_vmpage() method, and return its
606 * result.
607 */
608 page = cl_page_top(page);
609 do {
610 list_for_each_entry(slice, &page->cp_layers, cpl_linkage) {
cce3c2da 611 if (slice->cpl_ops->cpo_vmpage)
0a3bdb00 612 return slice->cpl_ops->cpo_vmpage(env, slice);
d7e09d03
PT
613 }
614 page = page->cp_child;
cce3c2da 615 } while (page);
d7e09d03
PT
616 LBUG(); /* ->cpo_vmpage() has to be defined somewhere in the stack */
617}
618EXPORT_SYMBOL(cl_page_vmpage);
619
620/**
621 * Returns a cl_page associated with a VM page, and given cl_object.
622 */
623struct cl_page *cl_vmpage_page(struct page *vmpage, struct cl_object *obj)
624{
625 struct cl_page *top;
626 struct cl_page *page;
627
d7e09d03
PT
628 KLASSERT(PageLocked(vmpage));
629
630 /*
631 * NOTE: absence of races and liveness of data are guaranteed by page
632 * lock on a "vmpage". That works because object destruction has
633 * bottom-to-top pass.
634 */
635
636 /*
637 * This loop assumes that ->private points to the top-most page. This
638 * can be rectified easily.
639 */
640 top = (struct cl_page *)vmpage->private;
cce3c2da 641 if (!top)
0a3bdb00 642 return NULL;
d7e09d03 643
cce3c2da 644 for (page = top; page; page = page->cp_child) {
d7e09d03
PT
645 if (cl_object_same(page->cp_obj, obj)) {
646 cl_page_get_trust(page);
647 break;
648 }
649 }
650 LASSERT(ergo(page, page->cp_type == CPT_CACHEABLE));
0a3bdb00 651 return page;
d7e09d03
PT
652}
653EXPORT_SYMBOL(cl_vmpage_page);
654
655/**
656 * Returns the top-page for a given page.
657 *
658 * \see cl_object_top(), cl_io_top()
659 */
660struct cl_page *cl_page_top(struct cl_page *page)
661{
662 return cl_page_top_trusted(page);
663}
664EXPORT_SYMBOL(cl_page_top);
665
666const struct cl_page_slice *cl_page_at(const struct cl_page *page,
667 const struct lu_device_type *dtype)
668{
669 return cl_page_at_trusted(page, dtype);
670}
671EXPORT_SYMBOL(cl_page_at);
672
673#define CL_PAGE_OP(opname) offsetof(struct cl_page_operations, opname)
674
675#define CL_PAGE_INVOKE(_env, _page, _op, _proto, ...) \
676({ \
677 const struct lu_env *__env = (_env); \
678 struct cl_page *__page = (_page); \
679 const struct cl_page_slice *__scan; \
680 int __result; \
681 ptrdiff_t __op = (_op); \
682 int (*__method)_proto; \
683 \
684 __result = 0; \
685 __page = cl_page_top(__page); \
686 do { \
687 list_for_each_entry(__scan, &__page->cp_layers, \
688 cpl_linkage) { \
689 __method = *(void **)((char *)__scan->cpl_ops + \
690 __op); \
cce3c2da 691 if (__method) { \
d7e09d03
PT
692 __result = (*__method)(__env, __scan, \
693 ## __VA_ARGS__); \
694 if (__result != 0) \
695 break; \
696 } \
697 } \
698 __page = __page->cp_child; \
cce3c2da 699 } while (__page && __result == 0); \
d7e09d03
PT
700 if (__result > 0) \
701 __result = 0; \
702 __result; \
703})
704
705#define CL_PAGE_INVOID(_env, _page, _op, _proto, ...) \
706do { \
707 const struct lu_env *__env = (_env); \
708 struct cl_page *__page = (_page); \
709 const struct cl_page_slice *__scan; \
710 ptrdiff_t __op = (_op); \
711 void (*__method)_proto; \
712 \
713 __page = cl_page_top(__page); \
714 do { \
715 list_for_each_entry(__scan, &__page->cp_layers, \
716 cpl_linkage) { \
717 __method = *(void **)((char *)__scan->cpl_ops + \
718 __op); \
cce3c2da 719 if (__method) \
d7e09d03
PT
720 (*__method)(__env, __scan, \
721 ## __VA_ARGS__); \
722 } \
723 __page = __page->cp_child; \
cce3c2da 724 } while (__page); \
d7e09d03
PT
725} while (0)
726
727#define CL_PAGE_INVOID_REVERSE(_env, _page, _op, _proto, ...) \
728do { \
729 const struct lu_env *__env = (_env); \
730 struct cl_page *__page = (_page); \
731 const struct cl_page_slice *__scan; \
732 ptrdiff_t __op = (_op); \
733 void (*__method)_proto; \
734 \
735 /* get to the bottom page. */ \
cce3c2da 736 while (__page->cp_child) \
d7e09d03
PT
737 __page = __page->cp_child; \
738 do { \
739 list_for_each_entry_reverse(__scan, &__page->cp_layers, \
740 cpl_linkage) { \
741 __method = *(void **)((char *)__scan->cpl_ops + \
742 __op); \
cce3c2da 743 if (__method) \
d7e09d03
PT
744 (*__method)(__env, __scan, \
745 ## __VA_ARGS__); \
746 } \
747 __page = __page->cp_parent; \
cce3c2da 748 } while (__page); \
d7e09d03
PT
749} while (0)
750
751static int cl_page_invoke(const struct lu_env *env,
752 struct cl_io *io, struct cl_page *page, ptrdiff_t op)
753
754{
755 PINVRNT(env, page, cl_object_same(page->cp_obj, io->ci_obj));
0a3bdb00 756 return CL_PAGE_INVOKE(env, page, op,
d7e09d03
PT
757 (const struct lu_env *,
758 const struct cl_page_slice *, struct cl_io *),
0a3bdb00 759 io);
d7e09d03
PT
760}
761
762static void cl_page_invoid(const struct lu_env *env,
763 struct cl_io *io, struct cl_page *page, ptrdiff_t op)
764
765{
766 PINVRNT(env, page, cl_object_same(page->cp_obj, io->ci_obj));
d7e09d03
PT
767 CL_PAGE_INVOID(env, page, op,
768 (const struct lu_env *,
769 const struct cl_page_slice *, struct cl_io *), io);
d7e09d03
PT
770}
771
772static void cl_page_owner_clear(struct cl_page *page)
773{
cce3c2da
OD
774 for (page = cl_page_top(page); page; page = page->cp_child) {
775 if (page->cp_owner) {
d7e09d03
PT
776 LASSERT(page->cp_owner->ci_owned_nr > 0);
777 page->cp_owner->ci_owned_nr--;
778 page->cp_owner = NULL;
779 page->cp_task = NULL;
780 }
781 }
d7e09d03
PT
782}
783
784static void cl_page_owner_set(struct cl_page *page)
785{
cce3c2da 786 for (page = cl_page_top(page); page; page = page->cp_child)
d7e09d03 787 page->cp_owner->ci_owned_nr++;
d7e09d03
PT
788}
789
790void cl_page_disown0(const struct lu_env *env,
791 struct cl_io *io, struct cl_page *pg)
792{
793 enum cl_page_state state;
794
d7e09d03
PT
795 state = pg->cp_state;
796 PINVRNT(env, pg, state == CPS_OWNED || state == CPS_FREEING);
797 PINVRNT(env, pg, cl_page_invariant(pg));
798 cl_page_owner_clear(pg);
799
800 if (state == CPS_OWNED)
801 cl_page_state_set(env, pg, CPS_CACHED);
802 /*
803 * Completion call-backs are executed in the bottom-up order, so that
804 * uppermost layer (llite), responsible for VFS/VM interaction runs
805 * last and can release locks safely.
806 */
807 CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_disown),
808 (const struct lu_env *,
809 const struct cl_page_slice *, struct cl_io *),
810 io);
d7e09d03
PT
811}
812
813/**
814 * returns true, iff page is owned by the given io.
815 */
816int cl_page_is_owned(const struct cl_page *pg, const struct cl_io *io)
817{
818 LINVRNT(cl_object_same(pg->cp_obj, io->ci_obj));
0a3bdb00 819 return pg->cp_state == CPS_OWNED && pg->cp_owner == io;
d7e09d03
PT
820}
821EXPORT_SYMBOL(cl_page_is_owned);
822
823/**
824 * Try to own a page by IO.
825 *
826 * Waits until page is in cl_page_state::CPS_CACHED state, and then switch it
827 * into cl_page_state::CPS_OWNED state.
828 *
829 * \pre !cl_page_is_owned(pg, io)
830 * \post result == 0 iff cl_page_is_owned(pg, io)
831 *
832 * \retval 0 success
833 *
834 * \retval -ve failure, e.g., page was destroyed (and landed in
835 * cl_page_state::CPS_FREEING instead of cl_page_state::CPS_CACHED).
836 * or, page was owned by another thread, or in IO.
837 *
838 * \see cl_page_disown()
839 * \see cl_page_operations::cpo_own()
840 * \see cl_page_own_try()
841 * \see cl_page_own
842 */
843static int cl_page_own0(const struct lu_env *env, struct cl_io *io,
844 struct cl_page *pg, int nonblock)
845{
846 int result;
847
848 PINVRNT(env, pg, !cl_page_is_owned(pg, io));
849
d7e09d03
PT
850 pg = cl_page_top(pg);
851 io = cl_io_top(io);
852
853 if (pg->cp_state == CPS_FREEING) {
854 result = -ENOENT;
855 } else {
856 result = CL_PAGE_INVOKE(env, pg, CL_PAGE_OP(cpo_own),
857 (const struct lu_env *,
858 const struct cl_page_slice *,
859 struct cl_io *, int),
860 io, nonblock);
861 if (result == 0) {
cce3c2da
OD
862 PASSERT(env, pg, !pg->cp_owner);
863 PASSERT(env, pg, !pg->cp_req);
d7e09d03
PT
864 pg->cp_owner = io;
865 pg->cp_task = current;
866 cl_page_owner_set(pg);
867 if (pg->cp_state != CPS_FREEING) {
868 cl_page_state_set(env, pg, CPS_OWNED);
869 } else {
870 cl_page_disown0(env, io, pg);
871 result = -ENOENT;
872 }
873 }
874 }
875 PINVRNT(env, pg, ergo(result == 0, cl_page_invariant(pg)));
0a3bdb00 876 return result;
d7e09d03
PT
877}
878
879/**
880 * Own a page, might be blocked.
881 *
882 * \see cl_page_own0()
883 */
884int cl_page_own(const struct lu_env *env, struct cl_io *io, struct cl_page *pg)
885{
886 return cl_page_own0(env, io, pg, 0);
887}
888EXPORT_SYMBOL(cl_page_own);
889
890/**
891 * Nonblock version of cl_page_own().
892 *
893 * \see cl_page_own0()
894 */
895int cl_page_own_try(const struct lu_env *env, struct cl_io *io,
896 struct cl_page *pg)
897{
898 return cl_page_own0(env, io, pg, 1);
899}
900EXPORT_SYMBOL(cl_page_own_try);
901
d7e09d03
PT
902/**
903 * Assume page ownership.
904 *
905 * Called when page is already locked by the hosting VM.
906 *
907 * \pre !cl_page_is_owned(pg, io)
908 * \post cl_page_is_owned(pg, io)
909 *
910 * \see cl_page_operations::cpo_assume()
911 */
912void cl_page_assume(const struct lu_env *env,
913 struct cl_io *io, struct cl_page *pg)
914{
915 PINVRNT(env, pg, cl_object_same(pg->cp_obj, io->ci_obj));
916
d7e09d03
PT
917 pg = cl_page_top(pg);
918 io = cl_io_top(io);
919
920 cl_page_invoid(env, io, pg, CL_PAGE_OP(cpo_assume));
cce3c2da 921 PASSERT(env, pg, !pg->cp_owner);
d7e09d03
PT
922 pg->cp_owner = io;
923 pg->cp_task = current;
924 cl_page_owner_set(pg);
925 cl_page_state_set(env, pg, CPS_OWNED);
d7e09d03
PT
926}
927EXPORT_SYMBOL(cl_page_assume);
928
929/**
930 * Releases page ownership without unlocking the page.
931 *
932 * Moves page into cl_page_state::CPS_CACHED without releasing a lock on the
933 * underlying VM page (as VM is supposed to do this itself).
934 *
935 * \pre cl_page_is_owned(pg, io)
936 * \post !cl_page_is_owned(pg, io)
937 *
938 * \see cl_page_assume()
939 */
940void cl_page_unassume(const struct lu_env *env,
941 struct cl_io *io, struct cl_page *pg)
942{
943 PINVRNT(env, pg, cl_page_is_owned(pg, io));
944 PINVRNT(env, pg, cl_page_invariant(pg));
945
d7e09d03
PT
946 pg = cl_page_top(pg);
947 io = cl_io_top(io);
948 cl_page_owner_clear(pg);
949 cl_page_state_set(env, pg, CPS_CACHED);
950 CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_unassume),
951 (const struct lu_env *,
952 const struct cl_page_slice *, struct cl_io *),
953 io);
d7e09d03
PT
954}
955EXPORT_SYMBOL(cl_page_unassume);
956
957/**
958 * Releases page ownership.
959 *
960 * Moves page into cl_page_state::CPS_CACHED.
961 *
962 * \pre cl_page_is_owned(pg, io)
963 * \post !cl_page_is_owned(pg, io)
964 *
965 * \see cl_page_own()
966 * \see cl_page_operations::cpo_disown()
967 */
968void cl_page_disown(const struct lu_env *env,
969 struct cl_io *io, struct cl_page *pg)
970{
971 PINVRNT(env, pg, cl_page_is_owned(pg, io));
972
d7e09d03
PT
973 pg = cl_page_top(pg);
974 io = cl_io_top(io);
975 cl_page_disown0(env, io, pg);
d7e09d03
PT
976}
977EXPORT_SYMBOL(cl_page_disown);
978
979/**
980 * Called when page is to be removed from the object, e.g., as a result of
981 * truncate.
982 *
983 * Calls cl_page_operations::cpo_discard() top-to-bottom.
984 *
985 * \pre cl_page_is_owned(pg, io)
986 *
987 * \see cl_page_operations::cpo_discard()
988 */
989void cl_page_discard(const struct lu_env *env,
990 struct cl_io *io, struct cl_page *pg)
991{
992 PINVRNT(env, pg, cl_page_is_owned(pg, io));
993 PINVRNT(env, pg, cl_page_invariant(pg));
994
995 cl_page_invoid(env, io, pg, CL_PAGE_OP(cpo_discard));
996}
997EXPORT_SYMBOL(cl_page_discard);
998
999/**
1000 * Version of cl_page_delete() that can be called for not fully constructed
1001 * pages, e.g,. in a error handling cl_page_find()->cl_page_delete0()
1002 * path. Doesn't check page invariant.
1003 */
1004static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg,
1005 int radix)
1006{
1007 struct cl_page *tmp = pg;
d7e09d03
PT
1008
1009 PASSERT(env, pg, pg == cl_page_top(pg));
1010 PASSERT(env, pg, pg->cp_state != CPS_FREEING);
1011
1012 /*
1013 * Severe all ways to obtain new pointers to @pg.
1014 */
1015 cl_page_owner_clear(pg);
1016
1017 /*
1018 * unexport the page firstly before freeing it so that
1019 * the page content is considered to be invalid.
1020 * We have to do this because a CPS_FREEING cl_page may
1021 * be NOT under the protection of a cl_lock.
1022 * Afterwards, if this page is found by other threads, then this
1023 * page will be forced to reread.
1024 */
1025 cl_page_export(env, pg, 0);
1026 cl_page_state_set0(env, pg, CPS_FREEING);
1027
1028 CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_delete),
1029 (const struct lu_env *, const struct cl_page_slice *));
1030
1031 if (tmp->cp_type == CPT_CACHEABLE) {
1032 if (!radix)
1033 /* !radix means that @pg is not yet in the radix tree,
1034 * skip removing it.
1035 */
1036 tmp = pg->cp_child;
cce3c2da 1037 for (; tmp; tmp = tmp->cp_child) {
d7e09d03
PT
1038 void *value;
1039 struct cl_object_header *hdr;
1040
1041 hdr = cl_object_header(tmp->cp_obj);
1042 spin_lock(&hdr->coh_page_guard);
1043 value = radix_tree_delete(&hdr->coh_tree,
1044 tmp->cp_index);
1045 PASSERT(env, tmp, value == tmp);
1046 PASSERT(env, tmp, hdr->coh_pages > 0);
1047 hdr->coh_pages--;
1048 spin_unlock(&hdr->coh_page_guard);
1049 cl_page_put(env, tmp);
1050 }
1051 }
d7e09d03
PT
1052}
1053
1054/**
1055 * Called when a decision is made to throw page out of memory.
1056 *
1057 * Notifies all layers about page destruction by calling
1058 * cl_page_operations::cpo_delete() method top-to-bottom.
1059 *
1060 * Moves page into cl_page_state::CPS_FREEING state (this is the only place
1061 * where transition to this state happens).
1062 *
1063 * Eliminates all venues through which new references to the page can be
1064 * obtained:
1065 *
1066 * - removes page from the radix trees,
1067 *
1068 * - breaks linkage from VM page to cl_page.
1069 *
1070 * Once page reaches cl_page_state::CPS_FREEING, all remaining references will
1071 * drain after some time, at which point page will be recycled.
1072 *
1073 * \pre pg == cl_page_top(pg)
1074 * \pre VM page is locked
1075 * \post pg->cp_state == CPS_FREEING
1076 *
1077 * \see cl_page_operations::cpo_delete()
1078 */
1079void cl_page_delete(const struct lu_env *env, struct cl_page *pg)
1080{
1081 PINVRNT(env, pg, cl_page_invariant(pg));
d7e09d03 1082 cl_page_delete0(env, pg, 1);
d7e09d03
PT
1083}
1084EXPORT_SYMBOL(cl_page_delete);
1085
1086/**
1087 * Unmaps page from user virtual memory.
1088 *
1089 * Calls cl_page_operations::cpo_unmap() through all layers top-to-bottom. The
1090 * layer responsible for VM interaction has to unmap page from user space
1091 * virtual memory.
1092 *
1093 * \see cl_page_operations::cpo_unmap()
1094 */
1095int cl_page_unmap(const struct lu_env *env,
1096 struct cl_io *io, struct cl_page *pg)
1097{
1098 PINVRNT(env, pg, cl_page_is_owned(pg, io));
1099 PINVRNT(env, pg, cl_page_invariant(pg));
1100
1101 return cl_page_invoke(env, io, pg, CL_PAGE_OP(cpo_unmap));
1102}
1103EXPORT_SYMBOL(cl_page_unmap);
1104
1105/**
1106 * Marks page up-to-date.
1107 *
1108 * Call cl_page_operations::cpo_export() through all layers top-to-bottom. The
1109 * layer responsible for VM interaction has to mark/clear page as up-to-date
1110 * by the \a uptodate argument.
1111 *
1112 * \see cl_page_operations::cpo_export()
1113 */
1114void cl_page_export(const struct lu_env *env, struct cl_page *pg, int uptodate)
1115{
1116 PINVRNT(env, pg, cl_page_invariant(pg));
1117 CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_export),
1118 (const struct lu_env *,
1119 const struct cl_page_slice *, int), uptodate);
1120}
1121EXPORT_SYMBOL(cl_page_export);
1122
1123/**
1124 * Returns true, iff \a pg is VM locked in a suitable sense by the calling
1125 * thread.
1126 */
1127int cl_page_is_vmlocked(const struct lu_env *env, const struct cl_page *pg)
1128{
1129 int result;
1130 const struct cl_page_slice *slice;
1131
d7e09d03
PT
1132 pg = cl_page_top_trusted((struct cl_page *)pg);
1133 slice = container_of(pg->cp_layers.next,
1134 const struct cl_page_slice, cpl_linkage);
cce3c2da 1135 PASSERT(env, pg, slice->cpl_ops->cpo_is_vmlocked);
d7e09d03
PT
1136 /*
1137 * Call ->cpo_is_vmlocked() directly instead of going through
1138 * CL_PAGE_INVOKE(), because cl_page_is_vmlocked() is used by
1139 * cl_page_invariant().
1140 */
1141 result = slice->cpl_ops->cpo_is_vmlocked(env, slice);
1142 PASSERT(env, pg, result == -EBUSY || result == -ENODATA);
0a3bdb00 1143 return result == -EBUSY;
d7e09d03
PT
1144}
1145EXPORT_SYMBOL(cl_page_is_vmlocked);
1146
1147static enum cl_page_state cl_req_type_state(enum cl_req_type crt)
1148{
0a3bdb00 1149 return crt == CRT_WRITE ? CPS_PAGEOUT : CPS_PAGEIN;
d7e09d03
PT
1150}
1151
1152static void cl_page_io_start(const struct lu_env *env,
1153 struct cl_page *pg, enum cl_req_type crt)
1154{
1155 /*
1156 * Page is queued for IO, change its state.
1157 */
d7e09d03
PT
1158 cl_page_owner_clear(pg);
1159 cl_page_state_set(env, pg, cl_req_type_state(crt));
d7e09d03
PT
1160}
1161
1162/**
1163 * Prepares page for immediate transfer. cl_page_operations::cpo_prep() is
1164 * called top-to-bottom. Every layer either agrees to submit this page (by
1165 * returning 0), or requests to omit this page (by returning -EALREADY). Layer
1166 * handling interactions with the VM also has to inform VM that page is under
1167 * transfer now.
1168 */
1169int cl_page_prep(const struct lu_env *env, struct cl_io *io,
1170 struct cl_page *pg, enum cl_req_type crt)
1171{
1172 int result;
1173
1174 PINVRNT(env, pg, cl_page_is_owned(pg, io));
1175 PINVRNT(env, pg, cl_page_invariant(pg));
1176 PINVRNT(env, pg, crt < CRT_NR);
1177
1178 /*
1179 * XXX this has to be called bottom-to-top, so that llite can set up
1180 * PG_writeback without risking other layers deciding to skip this
1181 * page.
1182 */
1183 if (crt >= CRT_NR)
1184 return -EINVAL;
1185 result = cl_page_invoke(env, io, pg, CL_PAGE_OP(io[crt].cpo_prep));
1186 if (result == 0)
1187 cl_page_io_start(env, pg, crt);
1188
d7e09d03
PT
1189 CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result);
1190 return result;
1191}
1192EXPORT_SYMBOL(cl_page_prep);
1193
1194/**
1195 * Notify layers about transfer completion.
1196 *
1197 * Invoked by transfer sub-system (which is a part of osc) to notify layers
1198 * that a transfer, of which this page is a part of has completed.
1199 *
1200 * Completion call-backs are executed in the bottom-up order, so that
1201 * uppermost layer (llite), responsible for the VFS/VM interaction runs last
1202 * and can release locks safely.
1203 *
1204 * \pre pg->cp_state == CPS_PAGEIN || pg->cp_state == CPS_PAGEOUT
1205 * \post pg->cp_state == CPS_CACHED
1206 *
1207 * \see cl_page_operations::cpo_completion()
1208 */
1209void cl_page_completion(const struct lu_env *env,
1210 struct cl_page *pg, enum cl_req_type crt, int ioret)
1211{
1212 struct cl_sync_io *anchor = pg->cp_sync_io;
1213
1214 PASSERT(env, pg, crt < CRT_NR);
1215 /* cl_page::cp_req already cleared by the caller (osc_completion()) */
cce3c2da 1216 PASSERT(env, pg, !pg->cp_req);
d7e09d03
PT
1217 PASSERT(env, pg, pg->cp_state == cl_req_type_state(crt));
1218
d7e09d03
PT
1219 CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, ioret);
1220 if (crt == CRT_READ && ioret == 0) {
1221 PASSERT(env, pg, !(pg->cp_flags & CPF_READ_COMPLETED));
1222 pg->cp_flags |= CPF_READ_COMPLETED;
1223 }
1224
1225 cl_page_state_set(env, pg, CPS_CACHED);
1226 if (crt >= CRT_NR)
1227 return;
1228 CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(io[crt].cpo_completion),
1229 (const struct lu_env *,
1230 const struct cl_page_slice *, int), ioret);
1231 if (anchor) {
1232 LASSERT(cl_page_is_vmlocked(env, pg));
1233 LASSERT(pg->cp_sync_io == anchor);
1234 pg->cp_sync_io = NULL;
1235 }
1236 /*
1237 * As page->cp_obj is pinned by a reference from page->cp_req, it is
1238 * safe to call cl_page_put() without risking object destruction in a
1239 * non-blocking context.
1240 */
1241 cl_page_put(env, pg);
1242
1243 if (anchor)
1244 cl_sync_io_note(anchor, ioret);
d7e09d03
PT
1245}
1246EXPORT_SYMBOL(cl_page_completion);
1247
1248/**
1249 * Notify layers that transfer formation engine decided to yank this page from
1250 * the cache and to make it a part of a transfer.
1251 *
1252 * \pre pg->cp_state == CPS_CACHED
1253 * \post pg->cp_state == CPS_PAGEIN || pg->cp_state == CPS_PAGEOUT
1254 *
1255 * \see cl_page_operations::cpo_make_ready()
1256 */
1257int cl_page_make_ready(const struct lu_env *env, struct cl_page *pg,
1258 enum cl_req_type crt)
1259{
1260 int result;
1261
1262 PINVRNT(env, pg, crt < CRT_NR);
1263
d7e09d03 1264 if (crt >= CRT_NR)
0a3bdb00 1265 return -EINVAL;
d7e09d03
PT
1266 result = CL_PAGE_INVOKE(env, pg, CL_PAGE_OP(io[crt].cpo_make_ready),
1267 (const struct lu_env *,
1268 const struct cl_page_slice *));
1269 if (result == 0) {
1270 PASSERT(env, pg, pg->cp_state == CPS_CACHED);
1271 cl_page_io_start(env, pg, crt);
1272 }
1273 CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result);
0a3bdb00 1274 return result;
d7e09d03
PT
1275}
1276EXPORT_SYMBOL(cl_page_make_ready);
1277
1278/**
1279 * Notify layers that high level io decided to place this page into a cache
1280 * for future transfer.
1281 *
1282 * The layer implementing transfer engine (osc) has to register this page in
1283 * its queues.
1284 *
1285 * \pre cl_page_is_owned(pg, io)
1286 * \post cl_page_is_owned(pg, io)
1287 *
1288 * \see cl_page_operations::cpo_cache_add()
1289 */
1290int cl_page_cache_add(const struct lu_env *env, struct cl_io *io,
1291 struct cl_page *pg, enum cl_req_type crt)
1292{
1293 const struct cl_page_slice *scan;
1294 int result = 0;
1295
1296 PINVRNT(env, pg, crt < CRT_NR);
1297 PINVRNT(env, pg, cl_page_is_owned(pg, io));
1298 PINVRNT(env, pg, cl_page_invariant(pg));
1299
d7e09d03 1300 if (crt >= CRT_NR)
0a3bdb00 1301 return -EINVAL;
d7e09d03
PT
1302
1303 list_for_each_entry(scan, &pg->cp_layers, cpl_linkage) {
cce3c2da 1304 if (!scan->cpl_ops->io[crt].cpo_cache_add)
d7e09d03
PT
1305 continue;
1306
1307 result = scan->cpl_ops->io[crt].cpo_cache_add(env, scan, io);
1308 if (result != 0)
1309 break;
1310 }
1311 CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result);
0a3bdb00 1312 return result;
d7e09d03
PT
1313}
1314EXPORT_SYMBOL(cl_page_cache_add);
1315
1316/**
1317 * Called if a pge is being written back by kernel's intention.
1318 *
1319 * \pre cl_page_is_owned(pg, io)
1320 * \post ergo(result == 0, pg->cp_state == CPS_PAGEOUT)
1321 *
1322 * \see cl_page_operations::cpo_flush()
1323 */
1324int cl_page_flush(const struct lu_env *env, struct cl_io *io,
1325 struct cl_page *pg)
1326{
1327 int result;
1328
1329 PINVRNT(env, pg, cl_page_is_owned(pg, io));
1330 PINVRNT(env, pg, cl_page_invariant(pg));
1331
d7e09d03
PT
1332 result = cl_page_invoke(env, io, pg, CL_PAGE_OP(cpo_flush));
1333
1334 CL_PAGE_HEADER(D_TRACE, env, pg, "%d\n", result);
0a3bdb00 1335 return result;
d7e09d03
PT
1336}
1337EXPORT_SYMBOL(cl_page_flush);
1338
1339/**
1340 * Checks whether page is protected by any extent lock is at least required
1341 * mode.
1342 *
1343 * \return the same as in cl_page_operations::cpo_is_under_lock() method.
1344 * \see cl_page_operations::cpo_is_under_lock()
1345 */
1346int cl_page_is_under_lock(const struct lu_env *env, struct cl_io *io,
1347 struct cl_page *page)
1348{
1349 int rc;
1350
1351 PINVRNT(env, page, cl_page_invariant(page));
1352
d7e09d03
PT
1353 rc = CL_PAGE_INVOKE(env, page, CL_PAGE_OP(cpo_is_under_lock),
1354 (const struct lu_env *,
1355 const struct cl_page_slice *, struct cl_io *),
1356 io);
1357 PASSERT(env, page, rc != 0);
0a3bdb00 1358 return rc;
d7e09d03
PT
1359}
1360EXPORT_SYMBOL(cl_page_is_under_lock);
1361
1362static int page_prune_cb(const struct lu_env *env, struct cl_io *io,
1363 struct cl_page *page, void *cbdata)
1364{
1365 cl_page_own(env, io, page);
1366 cl_page_unmap(env, io, page);
1367 cl_page_discard(env, io, page);
1368 cl_page_disown(env, io, page);
1369 return CLP_GANG_OKAY;
1370}
1371
1372/**
1373 * Purges all cached pages belonging to the object \a obj.
1374 */
1375int cl_pages_prune(const struct lu_env *env, struct cl_object *clobj)
1376{
1377 struct cl_thread_info *info;
1378 struct cl_object *obj = cl_object_top(clobj);
1379 struct cl_io *io;
1380 int result;
1381
d7e09d03
PT
1382 info = cl_env_info(env);
1383 io = &info->clt_io;
1384
1385 /*
1386 * initialize the io. This is ugly since we never do IO in this
1387 * function, we just make cl_page_list functions happy. -jay
1388 */
1389 io->ci_obj = obj;
1390 io->ci_ignore_layout = 1;
1391 result = cl_io_init(env, io, CIT_MISC, obj);
1392 if (result != 0) {
1393 cl_io_fini(env, io);
0a3bdb00 1394 return io->ci_result;
d7e09d03
PT
1395 }
1396
1397 do {
1398 result = cl_page_gang_lookup(env, obj, io, 0, CL_PAGE_EOF,
1399 page_prune_cb, NULL);
1400 if (result == CLP_GANG_RESCHED)
1401 cond_resched();
1402 } while (result != CLP_GANG_OKAY);
1403
1404 cl_io_fini(env, io);
0a3bdb00 1405 return result;
d7e09d03
PT
1406}
1407EXPORT_SYMBOL(cl_pages_prune);
1408
1409/**
1410 * Tells transfer engine that only part of a page is to be transmitted.
1411 *
1412 * \see cl_page_operations::cpo_clip()
1413 */
1414void cl_page_clip(const struct lu_env *env, struct cl_page *pg,
1415 int from, int to)
1416{
1417 PINVRNT(env, pg, cl_page_invariant(pg));
1418
1419 CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", from, to);
1420 CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_clip),
1421 (const struct lu_env *,
574150f0 1422 const struct cl_page_slice *, int, int),
d7e09d03
PT
1423 from, to);
1424}
1425EXPORT_SYMBOL(cl_page_clip);
1426
1427/**
1428 * Prints human readable representation of \a pg to the \a f.
1429 */
1430void cl_page_header_print(const struct lu_env *env, void *cookie,
1431 lu_printer_t printer, const struct cl_page *pg)
1432{
1433 (*printer)(env, cookie,
1434 "page@%p[%d %p:%lu ^%p_%p %d %d %d %p %p %#x]\n",
1435 pg, atomic_read(&pg->cp_ref), pg->cp_obj,
1436 pg->cp_index, pg->cp_parent, pg->cp_child,
1437 pg->cp_state, pg->cp_error, pg->cp_type,
1438 pg->cp_owner, pg->cp_req, pg->cp_flags);
1439}
1440EXPORT_SYMBOL(cl_page_header_print);
1441
1442/**
1443 * Prints human readable representation of \a pg to the \a f.
1444 */
1445void cl_page_print(const struct lu_env *env, void *cookie,
1446 lu_printer_t printer, const struct cl_page *pg)
1447{
1448 struct cl_page *scan;
1449
cce3c2da
OD
1450 for (scan = cl_page_top((struct cl_page *)pg); scan;
1451 scan = scan->cp_child)
d7e09d03
PT
1452 cl_page_header_print(env, cookie, printer, scan);
1453 CL_PAGE_INVOKE(env, (struct cl_page *)pg, CL_PAGE_OP(cpo_print),
1454 (const struct lu_env *env,
1455 const struct cl_page_slice *slice,
1456 void *cookie, lu_printer_t p), cookie, printer);
1457 (*printer)(env, cookie, "end page@%p\n", pg);
1458}
1459EXPORT_SYMBOL(cl_page_print);
1460
1461/**
1462 * Cancel a page which is still in a transfer.
1463 */
1464int cl_page_cancel(const struct lu_env *env, struct cl_page *page)
1465{
1466 return CL_PAGE_INVOKE(env, page, CL_PAGE_OP(cpo_cancel),
1467 (const struct lu_env *,
1468 const struct cl_page_slice *));
1469}
1470EXPORT_SYMBOL(cl_page_cancel);
1471
1472/**
1473 * Converts a byte offset within object \a obj into a page index.
1474 */
1475loff_t cl_offset(const struct cl_object *obj, pgoff_t idx)
1476{
1477 /*
1478 * XXX for now.
1479 */
09cbfeaf 1480 return (loff_t)idx << PAGE_SHIFT;
d7e09d03
PT
1481}
1482EXPORT_SYMBOL(cl_offset);
1483
1484/**
1485 * Converts a page index into a byte offset within object \a obj.
1486 */
1487pgoff_t cl_index(const struct cl_object *obj, loff_t offset)
1488{
1489 /*
1490 * XXX for now.
1491 */
09cbfeaf 1492 return offset >> PAGE_SHIFT;
d7e09d03
PT
1493}
1494EXPORT_SYMBOL(cl_index);
1495
1496int cl_page_size(const struct cl_object *obj)
1497{
09cbfeaf 1498 return 1 << PAGE_SHIFT;
d7e09d03
PT
1499}
1500EXPORT_SYMBOL(cl_page_size);
1501
1502/**
1503 * Adds page slice to the compound page.
1504 *
1505 * This is called by cl_object_operations::coo_page_init() methods to add a
1506 * per-layer state to the page. New state is added at the end of
1507 * cl_page::cp_layers list, that is, it is at the bottom of the stack.
1508 *
1509 * \see cl_lock_slice_add(), cl_req_slice_add(), cl_io_slice_add()
1510 */
1511void cl_page_slice_add(struct cl_page *page, struct cl_page_slice *slice,
1512 struct cl_object *obj,
1513 const struct cl_page_operations *ops)
1514{
d7e09d03
PT
1515 list_add_tail(&slice->cpl_linkage, &page->cp_layers);
1516 slice->cpl_obj = obj;
1517 slice->cpl_ops = ops;
1518 slice->cpl_page = page;
d7e09d03
PT
1519}
1520EXPORT_SYMBOL(cl_page_slice_add);
1521
1522int cl_page_init(void)
1523{
1524 return 0;
1525}
1526
1527void cl_page_fini(void)
1528{
1529}
This page took 0.538082 seconds and 5 git commands to generate.