4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2012, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * Implementation of cl_page for OSC layer.
38 * Author: Nikita Danilov <nikita.danilov@sun.com>
41 #define DEBUG_SUBSYSTEM S_OSC
43 #include "osc_cl_internal.h"
45 static void osc_lru_del(struct client_obd
*cli
, struct osc_page
*opg
, bool del
);
46 static void osc_lru_add(struct client_obd
*cli
, struct osc_page
*opg
);
47 static int osc_lru_reserve(const struct lu_env
*env
, struct osc_object
*obj
,
48 struct osc_page
*opg
);
55 * Comment out osc_page_protected because it may sleep inside the
56 * the client_obd_list_lock.
57 * client_obd_list_lock -> osc_ap_completion -> osc_completion ->
58 * -> osc_page_protected -> osc_page_is_dlocked -> osc_match_base
59 * -> ldlm_lock_match -> sptlrpc_import_check_ctx -> sleep.
62 static int osc_page_is_dlocked(const struct lu_env
*env
,
63 const struct osc_page
*opg
,
64 enum cl_lock_mode mode
, int pending
, int unref
)
67 struct osc_object
*obj
;
68 struct osc_thread_info
*info
;
69 struct ldlm_res_id
*resname
;
70 struct lustre_handle
*lockh
;
71 ldlm_policy_data_t
*policy
;
77 info
= osc_env_info(env
);
78 resname
= &info
->oti_resname
;
79 policy
= &info
->oti_policy
;
80 lockh
= &info
->oti_handle
;
81 page
= opg
->ops_cl
.cpl_page
;
82 obj
= cl2osc(opg
->ops_cl
.cpl_obj
);
84 flags
= LDLM_FL_TEST_LOCK
| LDLM_FL_BLOCK_GRANTED
;
86 flags
|= LDLM_FL_CBPENDING
;
88 dlmmode
= osc_cl_lock2ldlm(mode
) | LCK_PW
;
89 osc_lock_build_res(env
, obj
, resname
);
90 osc_index2policy(policy
, page
->cp_obj
, page
->cp_index
, page
->cp_index
);
91 return osc_match_base(osc_export(obj
), resname
, LDLM_EXTENT
, policy
,
92 dlmmode
, &flags
, NULL
, lockh
, unref
);
96 * Checks an invariant that a page in the cache is covered by a lock, as
99 static int osc_page_protected(const struct lu_env
*env
,
100 const struct osc_page
*opg
,
101 enum cl_lock_mode mode
, int unref
)
103 struct cl_object_header
*hdr
;
104 struct cl_lock
*scan
;
105 struct cl_page
*page
;
106 struct cl_lock_descr
*descr
;
109 LINVRNT(!opg
->ops_temp
);
111 page
= opg
->ops_cl
.cpl_page
;
112 if (page
->cp_owner
!= NULL
&&
113 cl_io_top(page
->cp_owner
)->ci_lockreq
== CILR_NEVER
)
115 * If IO is done without locks (liblustre, or lloop), lock is
120 /* otherwise check for a DLM lock */
121 result
= osc_page_is_dlocked(env
, opg
, mode
, 1, unref
);
123 /* maybe this page is a part of a lockless io? */
124 hdr
= cl_object_header(opg
->ops_cl
.cpl_obj
);
125 descr
= &osc_env_info(env
)->oti_descr
;
126 descr
->cld_mode
= mode
;
127 descr
->cld_start
= page
->cp_index
;
128 descr
->cld_end
= page
->cp_index
;
129 spin_lock(&hdr
->coh_lock_guard
);
130 list_for_each_entry(scan
, &hdr
->coh_locks
, cll_linkage
) {
132 * Lock-less sub-lock has to be either in HELD state
133 * (when io is actively going on), or in CACHED state,
134 * when top-lock is being unlocked:
135 * cl_io_unlock()->cl_unuse()->...->lov_lock_unuse().
137 if ((scan
->cll_state
== CLS_HELD
||
138 scan
->cll_state
== CLS_CACHED
) &&
139 cl_lock_ext_match(&scan
->cll_descr
, descr
)) {
140 struct osc_lock
*olck
;
142 olck
= osc_lock_at(scan
);
143 result
= osc_lock_is_lockless(olck
);
147 spin_unlock(&hdr
->coh_lock_guard
);
152 static int osc_page_protected(const struct lu_env
*env
,
153 const struct osc_page
*opg
,
154 enum cl_lock_mode mode
, int unref
)
160 /*****************************************************************************
165 static void osc_page_fini(const struct lu_env
*env
,
166 struct cl_page_slice
*slice
)
168 struct osc_page
*opg
= cl2osc_page(slice
);
169 CDEBUG(D_TRACE
, "%p\n", opg
);
170 LASSERT(opg
->ops_lock
== NULL
);
173 static void osc_page_transfer_get(struct osc_page
*opg
, const char *label
)
175 struct cl_page
*page
= cl_page_top(opg
->ops_cl
.cpl_page
);
177 LASSERT(!opg
->ops_transfer_pinned
);
179 lu_ref_add_atomic(&page
->cp_reference
, label
, page
);
180 opg
->ops_transfer_pinned
= 1;
183 static void osc_page_transfer_put(const struct lu_env
*env
,
184 struct osc_page
*opg
)
186 struct cl_page
*page
= cl_page_top(opg
->ops_cl
.cpl_page
);
188 if (opg
->ops_transfer_pinned
) {
189 lu_ref_del(&page
->cp_reference
, "transfer", page
);
190 opg
->ops_transfer_pinned
= 0;
191 cl_page_put(env
, page
);
196 * This is called once for every page when it is submitted for a transfer
197 * either opportunistic (osc_page_cache_add()), or immediate
198 * (osc_page_submit()).
200 static void osc_page_transfer_add(const struct lu_env
*env
,
201 struct osc_page
*opg
, enum cl_req_type crt
)
203 struct osc_object
*obj
= cl2osc(opg
->ops_cl
.cpl_obj
);
205 /* ops_lru and ops_inflight share the same field, so take it from LRU
206 * first and then use it as inflight. */
207 osc_lru_del(osc_cli(obj
), opg
, false);
209 spin_lock(&obj
->oo_seatbelt
);
210 list_add(&opg
->ops_inflight
, &obj
->oo_inflight
[crt
]);
211 opg
->ops_submitter
= current
;
212 spin_unlock(&obj
->oo_seatbelt
);
215 static int osc_page_cache_add(const struct lu_env
*env
,
216 const struct cl_page_slice
*slice
,
219 struct osc_io
*oio
= osc_env_io(env
);
220 struct osc_page
*opg
= cl2osc_page(slice
);
223 LINVRNT(osc_page_protected(env
, opg
, CLM_WRITE
, 0));
225 osc_page_transfer_get(opg
, "transfer\0cache");
226 result
= osc_queue_async_io(env
, io
, opg
);
228 osc_page_transfer_put(env
, opg
);
230 osc_page_transfer_add(env
, opg
, CRT_WRITE
);
232 /* for sync write, kernel will wait for this page to be flushed before
233 * osc_io_end() is called, so release it earlier.
234 * for mkwrite(), it's known there is no further pages. */
235 if (cl_io_is_sync_write(io
) || cl_io_is_mkwrite(io
)) {
236 if (oio
->oi_active
!= NULL
) {
237 osc_extent_release(env
, oio
->oi_active
);
238 oio
->oi_active
= NULL
;
245 void osc_index2policy(ldlm_policy_data_t
*policy
, const struct cl_object
*obj
,
246 pgoff_t start
, pgoff_t end
)
248 memset(policy
, 0, sizeof(*policy
));
249 policy
->l_extent
.start
= cl_offset(obj
, start
);
250 policy
->l_extent
.end
= cl_offset(obj
, end
+ 1) - 1;
253 static int osc_page_addref_lock(const struct lu_env
*env
,
254 struct osc_page
*opg
,
255 struct cl_lock
*lock
)
257 struct osc_lock
*olock
;
260 LASSERT(opg
->ops_lock
== NULL
);
262 olock
= osc_lock_at(lock
);
263 if (atomic_inc_return(&olock
->ols_pageref
) <= 0) {
264 atomic_dec(&olock
->ols_pageref
);
268 opg
->ops_lock
= lock
;
274 static void osc_page_putref_lock(const struct lu_env
*env
,
275 struct osc_page
*opg
)
277 struct cl_lock
*lock
= opg
->ops_lock
;
278 struct osc_lock
*olock
;
280 LASSERT(lock
!= NULL
);
281 olock
= osc_lock_at(lock
);
283 atomic_dec(&olock
->ols_pageref
);
284 opg
->ops_lock
= NULL
;
286 cl_lock_put(env
, lock
);
289 static int osc_page_is_under_lock(const struct lu_env
*env
,
290 const struct cl_page_slice
*slice
,
291 struct cl_io
*unused
)
293 struct cl_lock
*lock
;
294 int result
= -ENODATA
;
296 lock
= cl_lock_at_page(env
, slice
->cpl_obj
, slice
->cpl_page
,
299 if (osc_page_addref_lock(env
, cl2osc_page(slice
), lock
) == 0)
301 cl_lock_put(env
, lock
);
306 static void osc_page_disown(const struct lu_env
*env
,
307 const struct cl_page_slice
*slice
,
310 struct osc_page
*opg
= cl2osc_page(slice
);
312 if (unlikely(opg
->ops_lock
))
313 osc_page_putref_lock(env
, opg
);
316 static void osc_page_completion_read(const struct lu_env
*env
,
317 const struct cl_page_slice
*slice
,
320 struct osc_page
*opg
= cl2osc_page(slice
);
321 struct osc_object
*obj
= cl2osc(opg
->ops_cl
.cpl_obj
);
323 if (likely(opg
->ops_lock
))
324 osc_page_putref_lock(env
, opg
);
325 osc_lru_add(osc_cli(obj
), opg
);
328 static void osc_page_completion_write(const struct lu_env
*env
,
329 const struct cl_page_slice
*slice
,
332 struct osc_page
*opg
= cl2osc_page(slice
);
333 struct osc_object
*obj
= cl2osc(slice
->cpl_obj
);
335 osc_lru_add(osc_cli(obj
), opg
);
338 static int osc_page_fail(const struct lu_env
*env
,
339 const struct cl_page_slice
*slice
,
340 struct cl_io
*unused
)
350 static const char *osc_list(struct list_head
*head
)
352 return list_empty(head
) ? "-" : "+";
355 static inline cfs_time_t
osc_submit_duration(struct osc_page
*opg
)
357 if (opg
->ops_submit_time
== 0)
360 return (cfs_time_current() - opg
->ops_submit_time
);
363 static int osc_page_print(const struct lu_env
*env
,
364 const struct cl_page_slice
*slice
,
365 void *cookie
, lu_printer_t printer
)
367 struct osc_page
*opg
= cl2osc_page(slice
);
368 struct osc_async_page
*oap
= &opg
->ops_oap
;
369 struct osc_object
*obj
= cl2osc(slice
->cpl_obj
);
370 struct client_obd
*cli
= &osc_export(obj
)->exp_obd
->u
.cli
;
372 return (*printer
)(env
, cookie
, LUSTRE_OSC_NAME
"-page@%p: "
373 "1< %#x %d %u %s %s > "
374 "2< "LPU64
" %u %u %#x %#x | %p %p %p > "
375 "3< %s %p %d %lu %d > "
376 "4< %d %d %d %lu %s | %s %s %s %s > "
377 "5< %s %s %s %s | %d %s | %d %s %s>\n",
380 oap
->oap_magic
, oap
->oap_cmd
,
381 oap
->oap_interrupted
,
382 osc_list(&oap
->oap_pending_item
),
383 osc_list(&oap
->oap_rpc_item
),
385 oap
->oap_obj_off
, oap
->oap_page_off
, oap
->oap_count
,
386 oap
->oap_async_flags
, oap
->oap_brw_flags
,
387 oap
->oap_request
, oap
->oap_cli
, obj
,
389 osc_list(&opg
->ops_inflight
),
390 opg
->ops_submitter
, opg
->ops_transfer_pinned
,
391 osc_submit_duration(opg
), opg
->ops_srvlock
,
393 cli
->cl_r_in_flight
, cli
->cl_w_in_flight
,
394 cli
->cl_max_rpcs_in_flight
,
396 osc_list(&cli
->cl_cache_waiters
),
397 osc_list(&cli
->cl_loi_ready_list
),
398 osc_list(&cli
->cl_loi_hp_ready_list
),
399 osc_list(&cli
->cl_loi_write_list
),
400 osc_list(&cli
->cl_loi_read_list
),
402 osc_list(&obj
->oo_ready_item
),
403 osc_list(&obj
->oo_hp_ready_item
),
404 osc_list(&obj
->oo_write_item
),
405 osc_list(&obj
->oo_read_item
),
406 atomic_read(&obj
->oo_nr_reads
),
407 osc_list(&obj
->oo_reading_exts
),
408 atomic_read(&obj
->oo_nr_writes
),
409 osc_list(&obj
->oo_hp_exts
),
410 osc_list(&obj
->oo_urgent_exts
));
413 static void osc_page_delete(const struct lu_env
*env
,
414 const struct cl_page_slice
*slice
)
416 struct osc_page
*opg
= cl2osc_page(slice
);
417 struct osc_object
*obj
= cl2osc(opg
->ops_cl
.cpl_obj
);
420 LINVRNT(opg
->ops_temp
|| osc_page_protected(env
, opg
, CLM_READ
, 1));
422 CDEBUG(D_TRACE
, "%p\n", opg
);
423 osc_page_transfer_put(env
, opg
);
424 rc
= osc_teardown_async_page(env
, obj
, opg
);
426 CL_PAGE_DEBUG(D_ERROR
, env
, cl_page_top(slice
->cpl_page
),
427 "Trying to teardown failed: %d\n", rc
);
431 spin_lock(&obj
->oo_seatbelt
);
432 if (opg
->ops_submitter
!= NULL
) {
433 LASSERT(!list_empty(&opg
->ops_inflight
));
434 list_del_init(&opg
->ops_inflight
);
435 opg
->ops_submitter
= NULL
;
437 spin_unlock(&obj
->oo_seatbelt
);
439 osc_lru_del(osc_cli(obj
), opg
, true);
442 void osc_page_clip(const struct lu_env
*env
, const struct cl_page_slice
*slice
,
445 struct osc_page
*opg
= cl2osc_page(slice
);
446 struct osc_async_page
*oap
= &opg
->ops_oap
;
448 LINVRNT(osc_page_protected(env
, opg
, CLM_READ
, 0));
450 opg
->ops_from
= from
;
452 spin_lock(&oap
->oap_lock
);
453 oap
->oap_async_flags
|= ASYNC_COUNT_STABLE
;
454 spin_unlock(&oap
->oap_lock
);
457 static int osc_page_cancel(const struct lu_env
*env
,
458 const struct cl_page_slice
*slice
)
460 struct osc_page
*opg
= cl2osc_page(slice
);
463 LINVRNT(osc_page_protected(env
, opg
, CLM_READ
, 0));
465 /* Check if the transferring against this page
466 * is completed, or not even queued. */
467 if (opg
->ops_transfer_pinned
)
468 /* FIXME: may not be interrupted.. */
469 rc
= osc_cancel_async_page(env
, opg
);
470 LASSERT(ergo(rc
== 0, opg
->ops_transfer_pinned
== 0));
474 static int osc_page_flush(const struct lu_env
*env
,
475 const struct cl_page_slice
*slice
,
478 struct osc_page
*opg
= cl2osc_page(slice
);
481 rc
= osc_flush_async_page(env
, io
, opg
);
485 static const struct cl_page_operations osc_page_ops
= {
486 .cpo_fini
= osc_page_fini
,
487 .cpo_print
= osc_page_print
,
488 .cpo_delete
= osc_page_delete
,
489 .cpo_is_under_lock
= osc_page_is_under_lock
,
490 .cpo_disown
= osc_page_disown
,
493 .cpo_cache_add
= osc_page_fail
,
494 .cpo_completion
= osc_page_completion_read
497 .cpo_cache_add
= osc_page_cache_add
,
498 .cpo_completion
= osc_page_completion_write
501 .cpo_clip
= osc_page_clip
,
502 .cpo_cancel
= osc_page_cancel
,
503 .cpo_flush
= osc_page_flush
506 int osc_page_init(const struct lu_env
*env
, struct cl_object
*obj
,
507 struct cl_page
*page
, struct page
*vmpage
)
509 struct osc_object
*osc
= cl2osc(obj
);
510 struct osc_page
*opg
= cl_object_page_slice(obj
, page
);
514 opg
->ops_to
= PAGE_CACHE_SIZE
;
516 result
= osc_prep_async_page(osc
, opg
, vmpage
,
517 cl_offset(obj
, page
->cp_index
));
519 struct osc_io
*oio
= osc_env_io(env
);
520 opg
->ops_srvlock
= osc_io_srvlock(oio
);
521 cl_page_slice_add(page
, &opg
->ops_cl
, obj
,
525 * Cannot assert osc_page_protected() here as read-ahead
526 * creates temporary pages outside of a lock.
528 /* ops_inflight and ops_lru are the same field, but it doesn't
529 * hurt to initialize it twice :-) */
530 INIT_LIST_HEAD(&opg
->ops_inflight
);
531 INIT_LIST_HEAD(&opg
->ops_lru
);
533 /* reserve an LRU space for this page */
534 if (page
->cp_type
== CPT_CACHEABLE
&& result
== 0)
535 result
= osc_lru_reserve(env
, osc
, opg
);
541 * Helper function called by osc_io_submit() for every page in an immediate
542 * transfer (i.e., transferred synchronously).
544 void osc_page_submit(const struct lu_env
*env
, struct osc_page
*opg
,
545 enum cl_req_type crt
, int brw_flags
)
547 struct osc_async_page
*oap
= &opg
->ops_oap
;
548 struct osc_object
*obj
= oap
->oap_obj
;
550 LINVRNT(osc_page_protected(env
, opg
,
551 crt
== CRT_WRITE
? CLM_WRITE
: CLM_READ
, 1));
553 LASSERTF(oap
->oap_magic
== OAP_MAGIC
, "Bad oap magic: oap %p, "
554 "magic 0x%x\n", oap
, oap
->oap_magic
);
555 LASSERT(oap
->oap_async_flags
& ASYNC_READY
);
556 LASSERT(oap
->oap_async_flags
& ASYNC_COUNT_STABLE
);
558 oap
->oap_cmd
= crt
== CRT_WRITE
? OBD_BRW_WRITE
: OBD_BRW_READ
;
559 oap
->oap_page_off
= opg
->ops_from
;
560 oap
->oap_count
= opg
->ops_to
- opg
->ops_from
;
561 oap
->oap_brw_flags
= OBD_BRW_SYNC
| brw_flags
;
563 if (!client_is_remote(osc_export(obj
)) &&
564 cfs_capable(CFS_CAP_SYS_RESOURCE
)) {
565 oap
->oap_brw_flags
|= OBD_BRW_NOQUOTA
;
566 oap
->oap_cmd
|= OBD_BRW_NOQUOTA
;
569 opg
->ops_submit_time
= cfs_time_current();
570 osc_page_transfer_get(opg
, "transfer\0imm");
571 osc_page_transfer_add(env
, opg
, crt
);
574 /* --------------- LRU page management ------------------ */
576 /* OSC is a natural place to manage LRU pages as applications are specialized
577 * to write OSC by OSC. Ideally, if one OSC is used more frequently it should
578 * occupy more LRU slots. On the other hand, we should avoid using up all LRU
579 * slots (client_obd::cl_lru_left) otherwise process has to be put into sleep
580 * for free LRU slots - this will be very bad so the algorithm requires each
581 * OSC to free slots voluntarily to maintain a reasonable number of free slots
585 static DECLARE_WAIT_QUEUE_HEAD(osc_lru_waitq
);
586 static atomic_t osc_lru_waiters
= ATOMIC_INIT(0);
587 /* LRU pages are freed in batch mode. OSC should at least free this
588 * number of pages to avoid running out of LRU budget, and.. */
589 static const int lru_shrink_min
= 2 << (20 - PAGE_CACHE_SHIFT
); /* 2M */
590 /* free this number at most otherwise it will take too long time to finsih. */
591 static const int lru_shrink_max
= 32 << (20 - PAGE_CACHE_SHIFT
); /* 32M */
593 /* Check if we can free LRU slots from this OSC. If there exists LRU waiters,
594 * we should free slots aggressively. In this way, slots are freed in a steady
595 * step to maintain fairness among OSCs.
597 * Return how many LRU pages should be freed. */
598 static int osc_cache_too_much(struct client_obd
*cli
)
600 struct cl_client_cache
*cache
= cli
->cl_cache
;
601 int pages
= atomic_read(&cli
->cl_lru_in_list
) >> 1;
603 if (atomic_read(&osc_lru_waiters
) > 0 &&
604 atomic_read(cli
->cl_lru_left
) < lru_shrink_max
)
605 /* drop lru pages aggressively */
606 return min(pages
, lru_shrink_max
);
608 /* if it's going to run out LRU slots, we should free some, but not
609 * too much to maintain faireness among OSCs. */
610 if (atomic_read(cli
->cl_lru_left
) < cache
->ccc_lru_max
>> 4) {
613 tmp
= cache
->ccc_lru_max
/ atomic_read(&cache
->ccc_users
);
615 return min(pages
, lru_shrink_max
);
617 return pages
> lru_shrink_min
? lru_shrink_min
: 0;
623 /* Return how many pages are not discarded in @pvec. */
624 static int discard_pagevec(const struct lu_env
*env
, struct cl_io
*io
,
625 struct cl_page
**pvec
, int max_index
)
630 for (count
= 0, i
= 0; i
< max_index
; i
++) {
631 struct cl_page
*page
= pvec
[i
];
632 if (cl_page_own_try(env
, io
, page
) == 0) {
633 /* free LRU page only if nobody is using it.
634 * This check is necessary to avoid freeing the pages
635 * having already been removed from LRU and pinned
637 if (!cl_page_in_use(page
)) {
638 cl_page_unmap(env
, io
, page
);
639 cl_page_discard(env
, io
, page
);
642 cl_page_disown(env
, io
, page
);
644 cl_page_put(env
, page
);
647 return max_index
- count
;
651 * Drop @target of pages from LRU at most.
653 int osc_lru_shrink(struct client_obd
*cli
, int target
)
655 struct cl_env_nest nest
;
658 struct cl_object
*clobj
= NULL
;
659 struct cl_page
**pvec
;
660 struct osc_page
*opg
;
666 LASSERT(atomic_read(&cli
->cl_lru_in_list
) >= 0);
667 if (atomic_read(&cli
->cl_lru_in_list
) == 0 || target
<= 0)
670 env
= cl_env_nested_get(&nest
);
674 pvec
= osc_env_info(env
)->oti_pvec
;
675 io
= &osc_env_info(env
)->oti_io
;
677 client_obd_list_lock(&cli
->cl_lru_list_lock
);
678 atomic_inc(&cli
->cl_lru_shrinkers
);
679 maxscan
= min(target
<< 1, atomic_read(&cli
->cl_lru_in_list
));
680 while (!list_empty(&cli
->cl_lru_list
)) {
681 struct cl_page
*page
;
686 opg
= list_entry(cli
->cl_lru_list
.next
, struct osc_page
,
688 page
= cl_page_top(opg
->ops_cl
.cpl_page
);
689 if (cl_page_in_use_noref(page
)) {
690 list_move_tail(&opg
->ops_lru
, &cli
->cl_lru_list
);
694 LASSERT(page
->cp_obj
!= NULL
);
695 if (clobj
!= page
->cp_obj
) {
696 struct cl_object
*tmp
= page
->cp_obj
;
699 client_obd_list_unlock(&cli
->cl_lru_list_lock
);
702 count
-= discard_pagevec(env
, io
, pvec
, index
);
706 cl_object_put(env
, clobj
);
712 io
->ci_ignore_layout
= 1;
713 rc
= cl_io_init(env
, io
, CIT_MISC
, clobj
);
715 client_obd_list_lock(&cli
->cl_lru_list_lock
);
724 /* move this page to the end of list as it will be discarded
725 * soon. The page will be finally removed from LRU list in
726 * osc_page_delete(). */
727 list_move_tail(&opg
->ops_lru
, &cli
->cl_lru_list
);
729 /* it's okay to grab a refcount here w/o holding lock because
730 * it has to grab cl_lru_list_lock to delete the page. */
732 pvec
[index
++] = page
;
733 if (++count
>= target
)
736 if (unlikely(index
== OTI_PVEC_SIZE
)) {
737 client_obd_list_unlock(&cli
->cl_lru_list_lock
);
738 count
-= discard_pagevec(env
, io
, pvec
, index
);
741 client_obd_list_lock(&cli
->cl_lru_list_lock
);
744 client_obd_list_unlock(&cli
->cl_lru_list_lock
);
747 count
-= discard_pagevec(env
, io
, pvec
, index
);
750 cl_object_put(env
, clobj
);
752 cl_env_nested_put(&nest
, env
);
754 atomic_dec(&cli
->cl_lru_shrinkers
);
755 return count
> 0 ? count
: rc
;
758 static void osc_lru_add(struct client_obd
*cli
, struct osc_page
*opg
)
762 if (!opg
->ops_in_lru
)
765 atomic_dec(&cli
->cl_lru_busy
);
766 client_obd_list_lock(&cli
->cl_lru_list_lock
);
767 if (list_empty(&opg
->ops_lru
)) {
768 list_move_tail(&opg
->ops_lru
, &cli
->cl_lru_list
);
769 atomic_inc_return(&cli
->cl_lru_in_list
);
770 wakeup
= atomic_read(&osc_lru_waiters
) > 0;
772 client_obd_list_unlock(&cli
->cl_lru_list_lock
);
775 osc_lru_shrink(cli
, osc_cache_too_much(cli
));
776 wake_up_all(&osc_lru_waitq
);
780 /* delete page from LRUlist. The page can be deleted from LRUlist for two
781 * reasons: redirtied or deleted from page cache. */
782 static void osc_lru_del(struct client_obd
*cli
, struct osc_page
*opg
, bool del
)
784 if (opg
->ops_in_lru
) {
785 client_obd_list_lock(&cli
->cl_lru_list_lock
);
786 if (!list_empty(&opg
->ops_lru
)) {
787 LASSERT(atomic_read(&cli
->cl_lru_in_list
) > 0);
788 list_del_init(&opg
->ops_lru
);
789 atomic_dec(&cli
->cl_lru_in_list
);
791 atomic_inc(&cli
->cl_lru_busy
);
793 LASSERT(atomic_read(&cli
->cl_lru_busy
) > 0);
794 atomic_dec(&cli
->cl_lru_busy
);
796 client_obd_list_unlock(&cli
->cl_lru_list_lock
);
798 atomic_inc(cli
->cl_lru_left
);
799 /* this is a great place to release more LRU pages if
800 * this osc occupies too many LRU pages and kernel is
801 * stealing one of them.
802 * cl_lru_shrinkers is to avoid recursive call in case
803 * we're already in the context of osc_lru_shrink(). */
804 if (atomic_read(&cli
->cl_lru_shrinkers
) == 0 &&
805 !memory_pressure_get())
806 osc_lru_shrink(cli
, osc_cache_too_much(cli
));
807 wake_up(&osc_lru_waitq
);
810 LASSERT(list_empty(&opg
->ops_lru
));
814 static inline int max_to_shrink(struct client_obd
*cli
)
816 return min(atomic_read(&cli
->cl_lru_in_list
) >> 1, lru_shrink_max
);
819 static int osc_lru_reclaim(struct client_obd
*cli
)
821 struct cl_client_cache
*cache
= cli
->cl_cache
;
825 LASSERT(cache
!= NULL
);
826 LASSERT(!list_empty(&cache
->ccc_lru
));
828 rc
= osc_lru_shrink(cli
, lru_shrink_min
);
830 CDEBUG(D_CACHE
, "%s: Free %d pages from own LRU: %p.\n",
831 cli
->cl_import
->imp_obd
->obd_name
, rc
, cli
);
835 CDEBUG(D_CACHE
, "%s: cli %p no free slots, pages: %d, busy: %d.\n",
836 cli
->cl_import
->imp_obd
->obd_name
, cli
,
837 atomic_read(&cli
->cl_lru_in_list
),
838 atomic_read(&cli
->cl_lru_busy
));
840 /* Reclaim LRU slots from other client_obd as it can't free enough
841 * from its own. This should rarely happen. */
842 spin_lock(&cache
->ccc_lru_lock
);
843 cache
->ccc_lru_shrinkers
++;
844 list_move_tail(&cli
->cl_lru_osc
, &cache
->ccc_lru
);
846 max_scans
= atomic_read(&cache
->ccc_users
);
847 while (--max_scans
> 0 && !list_empty(&cache
->ccc_lru
)) {
848 cli
= list_entry(cache
->ccc_lru
.next
, struct client_obd
,
851 CDEBUG(D_CACHE
, "%s: cli %p LRU pages: %d, busy: %d.\n",
852 cli
->cl_import
->imp_obd
->obd_name
, cli
,
853 atomic_read(&cli
->cl_lru_in_list
),
854 atomic_read(&cli
->cl_lru_busy
));
856 list_move_tail(&cli
->cl_lru_osc
, &cache
->ccc_lru
);
857 if (atomic_read(&cli
->cl_lru_in_list
) > 0) {
858 spin_unlock(&cache
->ccc_lru_lock
);
860 rc
= osc_lru_shrink(cli
, max_to_shrink(cli
));
861 spin_lock(&cache
->ccc_lru_lock
);
866 spin_unlock(&cache
->ccc_lru_lock
);
868 CDEBUG(D_CACHE
, "%s: cli %p freed %d pages.\n",
869 cli
->cl_import
->imp_obd
->obd_name
, cli
, rc
);
873 static int osc_lru_reserve(const struct lu_env
*env
, struct osc_object
*obj
,
874 struct osc_page
*opg
)
876 struct l_wait_info lwi
= LWI_INTR(LWI_ON_SIGNAL_NOOP
, NULL
);
877 struct client_obd
*cli
= osc_cli(obj
);
880 if (cli
->cl_cache
== NULL
) /* shall not be in LRU */
883 LASSERT(atomic_read(cli
->cl_lru_left
) >= 0);
884 while (!atomic_add_unless(cli
->cl_lru_left
, -1, 0)) {
887 /* run out of LRU spaces, try to drop some by itself */
888 rc
= osc_lru_reclaim(cli
);
896 /* slowest case, all of caching pages are busy, notifying
897 * other OSCs that we're lack of LRU slots. */
898 atomic_inc(&osc_lru_waiters
);
900 gen
= atomic_read(&cli
->cl_lru_in_list
);
901 rc
= l_wait_event(osc_lru_waitq
,
902 atomic_read(cli
->cl_lru_left
) > 0 ||
903 (atomic_read(&cli
->cl_lru_in_list
) > 0 &&
904 gen
!= atomic_read(&cli
->cl_lru_in_list
)),
907 atomic_dec(&osc_lru_waiters
);
913 atomic_inc(&cli
->cl_lru_busy
);