4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2012, 2015 Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * Internal interfaces of LOV layer.
38 * Author: Nikita Danilov <nikita.danilov@sun.com>
39 * Author: Jinshan Xiong <jinshan.xiong@intel.com>
42 #ifndef LOV_CL_INTERNAL_H
43 #define LOV_CL_INTERNAL_H
45 #include "../../include/linux/libcfs/libcfs.h"
47 #include "../include/obd.h"
48 #include "../include/cl_object.h"
49 #include "lov_internal.h"
52 * Logical object volume layer. This layer implements data striping (raid0).
54 * At the lov layer top-entity (object, page, lock, io) is connected to one or
55 * more sub-entities: top-object, representing a file is connected to a set of
56 * sub-objects, each representing a stripe, file-level top-lock is connected
57 * to a set of per-stripe sub-locks, top-page is connected to a (single)
58 * sub-page, and a top-level IO is connected to a set of (potentially
59 * concurrent) sub-IO's.
61 * Sub-object, sub-page, and sub-io have well-defined top-object and top-page
62 * respectively, while a single sub-lock can be part of multiple top-locks.
64 * Reference counting models are different for different types of entities:
66 * - top-object keeps a reference to its sub-objects, and destroys them
67 * when it is destroyed.
69 * - top-page keeps a reference to its sub-page, and destroys it when it
72 * - IO's are not reference counted.
74 * To implement a connection between top and sub entities, lov layer is split
75 * into two pieces: lov ("upper half"), and lovsub ("bottom half"), both
76 * implementing full set of cl-interfaces. For example, top-object has vvp and
77 * lov layers, and it's sub-object has lovsub and osc layers. lovsub layer is
78 * used to track child-parent relationship.
87 enum lov_device_flags
{
88 LOV_DEV_INITIALIZED
= 1 << 0
96 * Resources that are used in memory-cleaning path, and whose allocation
97 * cannot fail even when memory is tight. They are preallocated in sufficient
98 * quantities in lov_device::ld_emerg[], and access to them is serialized
99 * lov_device::ld_mutex.
101 struct lov_device_emerg
{
103 * Page list used to submit IO when memory is in pressure.
105 struct cl_page_list emrg_page_list
;
107 * sub-io's shared by all threads accessing this device when memory is
108 * too low to allocate sub-io's dynamically.
110 struct cl_io emrg_subio
;
112 * Environments used by sub-io's in
113 * lov_device_emerg::emrg_subio.
115 struct lu_env
*emrg_env
;
117 * Refchecks for lov_device_emerg::emrg_env.
126 * XXX Locking of lov-private data is missing.
128 struct cl_device ld_cl
;
129 struct lov_obd
*ld_lov
;
130 /** size of lov_device::ld_target[] array */
132 struct lovsub_device
**ld_target
;
135 /** Emergency resources used in memory-cleansing paths. */
136 struct lov_device_emerg
**ld_emrg
;
138 * Serializes access to lov_device::ld_emrg in low-memory
141 struct mutex ld_mutex
;
147 enum lov_layout_type
{
148 LLT_EMPTY
, /** empty file without body (mknod + truncate) */
149 LLT_RAID0
, /** striped file */
150 LLT_RELEASED
, /** file with no objects (data in HSM) */
154 static inline char *llt2str(enum lov_layout_type llt
)
171 * lov-specific file state.
173 * lov object has particular layout type, determining how top-object is built
174 * on top of sub-objects. Layout type can change dynamically. When this
175 * happens, lov_object::lo_type_guard semaphore is taken in exclusive mode,
176 * all state pertaining to the old layout type is destroyed, and new state is
177 * constructed. All object methods take said semaphore in the shared mode,
178 * providing serialization against transition between layout types.
180 * To avoid multiple `if' or `switch' statements, selecting behavior for the
181 * current layout type, object methods perform double-dispatch, invoking
182 * function corresponding to the current layout type.
185 struct cl_object lo_cl
;
187 * Serializes object operations with transitions between layout types.
189 * This semaphore is taken in shared mode by all object methods, and
190 * is taken in exclusive mode when object type is changed.
192 * \see lov_object::lo_type
194 struct rw_semaphore lo_type_guard
;
196 * Type of an object. Protected by lov_object::lo_type_guard.
198 enum lov_layout_type lo_type
;
200 * True if layout is invalid. This bit is cleared when layout lock
203 bool lo_layout_invalid
;
205 * How many IOs are on going on this object. Layout can be changed
206 * only if there is no active IO.
208 atomic_t lo_active_ios
;
210 * Waitq - wait for no one else is using lo_lsm
212 wait_queue_head_t lo_waitq
;
214 * Layout metadata. NULL if empty layout.
216 struct lov_stripe_md
*lo_lsm
;
218 union lov_layout_state
{
219 struct lov_layout_raid0
{
222 * When this is true, lov_object::lo_attr contains
223 * valid up to date attributes for a top-level
224 * object. This field is reset to 0 when attributes of
225 * any sub-object change.
229 * Array of sub-objects. Allocated when top-object is
230 * created (lov_init_raid0()).
232 * Top-object is a strict master of its sub-objects:
233 * it is created before them, and outlives its
234 * children (this later is necessary so that basic
235 * functions like cl_object_top() always
236 * work). Top-object keeps a reference on every
239 * When top-object is destroyed (lov_delete_raid0())
240 * it releases its reference to a sub-object and waits
241 * until the latter is finally destroyed.
243 struct lovsub_object
**lo_sub
;
247 spinlock_t lo_sub_lock
;
249 * Cached object attribute, built from sub-object
252 struct cl_attr lo_attr
;
254 struct lov_layout_state_empty
{
256 struct lov_layout_state_released
{
260 * Thread that acquired lov_object::lo_type_guard in an exclusive
263 struct task_struct
*lo_owner
;
267 * State lov_lock keeps for each sub-lock.
269 struct lov_lock_sub
{
270 /** sub-lock itself */
271 struct cl_lock sub_lock
;
272 /** Set if the sublock has ever been enqueued, meaning it may
273 * hold resources of underlying layers
275 unsigned int sub_is_enqueued
:1,
281 * lov-specific lock state.
284 struct cl_lock_slice lls_cl
;
285 /** Number of sub-locks in this lock */
288 struct lov_lock_sub lls_sub
[0];
292 struct cl_page_slice lps_cl
;
293 unsigned int lps_stripe
; /* stripe index */
300 struct lovsub_device
{
301 struct cl_device acid_cl
;
302 struct lov_device
*acid_super
;
304 struct cl_device
*acid_next
;
307 struct lovsub_object
{
308 struct cl_object_header lso_header
;
309 struct cl_object lso_cl
;
310 struct lov_object
*lso_super
;
315 * A link between a top-lock and a sub-lock. Separate data-structure is
316 * necessary, because top-locks and sub-locks are in M:N relationship.
318 * \todo This can be optimized for a (by far) most frequent case of a single
319 * top-lock per sub-lock.
321 struct lov_lock_link
{
322 struct lov_lock
*lll_super
;
323 /** An index within parent lock. */
326 * A linkage into per sub-lock list of all corresponding top-locks,
327 * hanging off lovsub_lock::lss_parents.
329 struct list_head lll_list
;
333 * Lock state at lovsub layer.
336 struct cl_lock_slice lss_cl
;
338 * List of top-locks that have given sub-lock as their part. Protected
339 * by cl_lock::cll_guard mutex.
341 struct list_head lss_parents
;
343 * Top-lock that initiated current operation on this sub-lock. This is
344 * only set during top-to-bottom lock operations like enqueue, and is
345 * used to optimize state change notification. Protected by
346 * cl_lock::cll_guard mutex.
348 * \see lovsub_lock_state_one().
350 struct cl_lock
*lss_active
;
354 * Describe the environment settings for sublocks.
356 struct lov_sublock_env
{
357 const struct lu_env
*lse_env
;
358 struct cl_io
*lse_io
;
359 struct lov_io_sub
*lse_sub
;
363 struct cl_page_slice lsb_cl
;
366 struct lov_thread_info
{
367 struct cl_object_conf lti_stripe_conf
;
368 struct lu_fid lti_fid
;
369 struct cl_lock_descr lti_ldescr
;
370 struct ost_lvb lti_lvb
;
371 struct cl_2queue lti_cl2q
;
372 struct cl_page_list lti_plist
;
373 wait_queue_t lti_waiter
;
374 struct cl_attr lti_attr
;
378 * State that lov_io maintains for every sub-io.
383 * sub-io for a stripe. Ideally sub-io's can be stopped and resumed
384 * independently, with lov acting as a scheduler to maximize overall
387 struct cl_io
*sub_io
;
389 * Linkage into a list (hanging off lov_io::lis_active) of all
390 * sub-io's active for the current IO iteration.
392 struct list_head sub_linkage
;
394 * true, iff cl_io_init() was successfully executed against
395 * lov_io_sub::sub_io.
397 int sub_io_initialized
;
399 * True, iff lov_io_sub::sub_io and lov_io_sub::sub_env weren't
400 * allocated, but borrowed from a per-device emergency pool.
404 * environment, in which sub-io executes.
406 struct lu_env
*sub_env
;
408 * environment's refcheck.
419 * IO state private for LOV.
423 struct cl_io_slice lis_cl
;
425 * Pointer to the object slice. This is a duplicate of
426 * lov_io::lis_cl::cis_object.
428 struct lov_object
*lis_object
;
430 * Original end-of-io position for this IO, set by the upper layer as
431 * cl_io::u::ci_rw::pos + cl_io::u::ci_rw::count. lov remembers this,
432 * changes pos and count to fit IO into a single stripe and uses saved
433 * value to determine when IO iterations have to stop.
435 * This is used only for CIT_READ and CIT_WRITE io's.
437 loff_t lis_io_endpos
;
440 * starting position within a file, for the current io loop iteration
441 * (stripe), used by ci_io_loop().
445 * end position with in a file, for the current stripe io. This is
446 * exclusive (i.e., next offset after last byte affected by io).
451 int lis_stripe_count
;
452 int lis_active_subios
;
455 * the index of ls_single_subio in ls_subios array
457 int lis_single_subio_index
;
458 struct cl_io lis_single_subio
;
461 * size of ls_subios array, actually the highest stripe #
464 struct lov_io_sub
*lis_subs
;
466 * List of active sub-io's.
468 struct list_head lis_active
;
473 struct lov_sublock_env ls_subenv
;
477 * State of transfer for lov.
480 struct cl_req_slice lr_cl
;
484 * State of transfer for lovsub.
487 struct cl_req_slice lsrq_cl
;
490 extern struct lu_device_type lov_device_type
;
491 extern struct lu_device_type lovsub_device_type
;
493 extern struct lu_context_key lov_key
;
494 extern struct lu_context_key lov_session_key
;
496 extern struct kmem_cache
*lov_lock_kmem
;
497 extern struct kmem_cache
*lov_object_kmem
;
498 extern struct kmem_cache
*lov_thread_kmem
;
499 extern struct kmem_cache
*lov_session_kmem
;
500 extern struct kmem_cache
*lov_req_kmem
;
502 extern struct kmem_cache
*lovsub_lock_kmem
;
503 extern struct kmem_cache
*lovsub_object_kmem
;
504 extern struct kmem_cache
*lovsub_req_kmem
;
506 extern struct kmem_cache
*lov_lock_link_kmem
;
508 int lov_object_init(const struct lu_env
*env
, struct lu_object
*obj
,
509 const struct lu_object_conf
*conf
);
510 int lovsub_object_init(const struct lu_env
*env
, struct lu_object
*obj
,
511 const struct lu_object_conf
*conf
);
512 int lov_lock_init(const struct lu_env
*env
, struct cl_object
*obj
,
513 struct cl_lock
*lock
, const struct cl_io
*io
);
514 int lov_io_init(const struct lu_env
*env
, struct cl_object
*obj
,
516 int lovsub_lock_init(const struct lu_env
*env
, struct cl_object
*obj
,
517 struct cl_lock
*lock
, const struct cl_io
*io
);
519 int lov_lock_init_raid0(const struct lu_env
*env
, struct cl_object
*obj
,
520 struct cl_lock
*lock
, const struct cl_io
*io
);
521 int lov_lock_init_empty(const struct lu_env
*env
, struct cl_object
*obj
,
522 struct cl_lock
*lock
, const struct cl_io
*io
);
523 int lov_io_init_raid0(const struct lu_env
*env
, struct cl_object
*obj
,
525 int lov_io_init_empty(const struct lu_env
*env
, struct cl_object
*obj
,
527 int lov_io_init_released(const struct lu_env
*env
, struct cl_object
*obj
,
529 void lov_lock_unlink(const struct lu_env
*env
, struct lov_lock_link
*link
,
530 struct lovsub_lock
*sub
);
532 struct lov_io_sub
*lov_sub_get(const struct lu_env
*env
, struct lov_io
*lio
,
534 void lov_sub_put(struct lov_io_sub
*sub
);
535 int lov_sublock_modify(const struct lu_env
*env
, struct lov_lock
*lov
,
536 struct lovsub_lock
*sublock
,
537 const struct cl_lock_descr
*d
, int idx
);
539 int lov_page_init(const struct lu_env
*env
, struct cl_object
*ob
,
540 struct cl_page
*page
, pgoff_t index
);
541 int lovsub_page_init(const struct lu_env
*env
, struct cl_object
*ob
,
542 struct cl_page
*page
, pgoff_t index
);
543 int lov_page_init_empty(const struct lu_env
*env
, struct cl_object
*obj
,
544 struct cl_page
*page
, pgoff_t index
);
545 int lov_page_init_raid0(const struct lu_env
*env
, struct cl_object
*obj
,
546 struct cl_page
*page
, pgoff_t index
);
547 struct lu_object
*lov_object_alloc(const struct lu_env
*env
,
548 const struct lu_object_header
*hdr
,
549 struct lu_device
*dev
);
550 struct lu_object
*lovsub_object_alloc(const struct lu_env
*env
,
551 const struct lu_object_header
*hdr
,
552 struct lu_device
*dev
);
554 struct lov_lock_link
*lov_lock_link_find(const struct lu_env
*env
,
555 struct lov_lock
*lck
,
556 struct lovsub_lock
*sub
);
557 struct lov_io_sub
*lov_page_subio(const struct lu_env
*env
, struct lov_io
*lio
,
558 const struct cl_page_slice
*slice
);
559 int lov_page_stripe(const struct cl_page
*page
);
561 #define lov_foreach_target(lov, var) \
562 for (var = 0; var < lov_targets_nr(lov); ++var)
564 /*****************************************************************************
572 static inline struct lov_session
*lov_env_session(const struct lu_env
*env
)
574 struct lov_session
*ses
;
576 ses
= lu_context_key_get(env
->le_ses
, &lov_session_key
);
581 static inline struct lov_io
*lov_env_io(const struct lu_env
*env
)
583 return &lov_env_session(env
)->ls_io
;
586 static inline int lov_is_object(const struct lu_object
*obj
)
588 return obj
->lo_dev
->ld_type
== &lov_device_type
;
591 static inline int lovsub_is_object(const struct lu_object
*obj
)
593 return obj
->lo_dev
->ld_type
== &lovsub_device_type
;
596 static inline struct lu_device
*lov2lu_dev(struct lov_device
*lov
)
598 return &lov
->ld_cl
.cd_lu_dev
;
601 static inline struct lov_device
*lu2lov_dev(const struct lu_device
*d
)
603 LINVRNT(d
->ld_type
== &lov_device_type
);
604 return container_of0(d
, struct lov_device
, ld_cl
.cd_lu_dev
);
607 static inline struct cl_device
*lovsub2cl_dev(struct lovsub_device
*lovsub
)
609 return &lovsub
->acid_cl
;
612 static inline struct lu_device
*lovsub2lu_dev(struct lovsub_device
*lovsub
)
614 return &lovsub2cl_dev(lovsub
)->cd_lu_dev
;
617 static inline struct lovsub_device
*lu2lovsub_dev(const struct lu_device
*d
)
619 LINVRNT(d
->ld_type
== &lovsub_device_type
);
620 return container_of0(d
, struct lovsub_device
, acid_cl
.cd_lu_dev
);
623 static inline struct lovsub_device
*cl2lovsub_dev(const struct cl_device
*d
)
625 LINVRNT(d
->cd_lu_dev
.ld_type
== &lovsub_device_type
);
626 return container_of0(d
, struct lovsub_device
, acid_cl
);
629 static inline struct lu_object
*lov2lu(struct lov_object
*lov
)
631 return &lov
->lo_cl
.co_lu
;
634 static inline struct cl_object
*lov2cl(struct lov_object
*lov
)
639 static inline struct lov_object
*lu2lov(const struct lu_object
*obj
)
641 LINVRNT(lov_is_object(obj
));
642 return container_of0(obj
, struct lov_object
, lo_cl
.co_lu
);
645 static inline struct lov_object
*cl2lov(const struct cl_object
*obj
)
647 LINVRNT(lov_is_object(&obj
->co_lu
));
648 return container_of0(obj
, struct lov_object
, lo_cl
);
651 static inline struct lu_object
*lovsub2lu(struct lovsub_object
*los
)
653 return &los
->lso_cl
.co_lu
;
656 static inline struct cl_object
*lovsub2cl(struct lovsub_object
*los
)
661 static inline struct lovsub_object
*cl2lovsub(const struct cl_object
*obj
)
663 LINVRNT(lovsub_is_object(&obj
->co_lu
));
664 return container_of0(obj
, struct lovsub_object
, lso_cl
);
667 static inline struct lovsub_object
*lu2lovsub(const struct lu_object
*obj
)
669 LINVRNT(lovsub_is_object(obj
));
670 return container_of0(obj
, struct lovsub_object
, lso_cl
.co_lu
);
673 static inline struct lovsub_lock
*
674 cl2lovsub_lock(const struct cl_lock_slice
*slice
)
676 LINVRNT(lovsub_is_object(&slice
->cls_obj
->co_lu
));
677 return container_of(slice
, struct lovsub_lock
, lss_cl
);
680 static inline struct lovsub_lock
*cl2sub_lock(const struct cl_lock
*lock
)
682 const struct cl_lock_slice
*slice
;
684 slice
= cl_lock_at(lock
, &lovsub_device_type
);
686 return cl2lovsub_lock(slice
);
689 static inline struct lov_lock
*cl2lov_lock(const struct cl_lock_slice
*slice
)
691 LINVRNT(lov_is_object(&slice
->cls_obj
->co_lu
));
692 return container_of(slice
, struct lov_lock
, lls_cl
);
695 static inline struct lov_page
*cl2lov_page(const struct cl_page_slice
*slice
)
697 LINVRNT(lov_is_object(&slice
->cpl_obj
->co_lu
));
698 return container_of0(slice
, struct lov_page
, lps_cl
);
701 static inline struct lov_req
*cl2lov_req(const struct cl_req_slice
*slice
)
703 return container_of0(slice
, struct lov_req
, lr_cl
);
706 static inline struct lovsub_page
*
707 cl2lovsub_page(const struct cl_page_slice
*slice
)
709 LINVRNT(lovsub_is_object(&slice
->cpl_obj
->co_lu
));
710 return container_of0(slice
, struct lovsub_page
, lsb_cl
);
713 static inline struct lovsub_req
*cl2lovsub_req(const struct cl_req_slice
*slice
)
715 return container_of0(slice
, struct lovsub_req
, lsrq_cl
);
718 static inline struct lov_io
*cl2lov_io(const struct lu_env
*env
,
719 const struct cl_io_slice
*ios
)
723 lio
= container_of(ios
, struct lov_io
, lis_cl
);
724 LASSERT(lio
== lov_env_io(env
));
728 static inline int lov_targets_nr(const struct lov_device
*lov
)
730 return lov
->ld_lov
->desc
.ld_tgt_count
;
733 static inline struct lov_thread_info
*lov_env_info(const struct lu_env
*env
)
735 struct lov_thread_info
*info
;
737 info
= lu_context_key_get(&env
->le_ctx
, &lov_key
);
742 static inline struct lov_layout_raid0
*lov_r0(struct lov_object
*lov
)
744 LASSERT(lov
->lo_type
== LLT_RAID0
);
745 LASSERT(lov
->lo_lsm
->lsm_wire
.lw_magic
== LOV_MAGIC
||
746 lov
->lo_lsm
->lsm_wire
.lw_magic
== LOV_MAGIC_V3
);
747 return &lov
->u
.raid0
;