4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2015, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
37 #define DEBUG_SUBSYSTEM S_LMV
38 #include <linux/slab.h>
39 #include <linux/module.h>
40 #include <linux/init.h>
41 #include <linux/pagemap.h>
43 #include <asm/div64.h>
44 #include <linux/seq_file.h>
45 #include <linux/namei.h>
46 #include <linux/uaccess.h>
48 #include "../include/lustre/lustre_idl.h"
49 #include "../include/obd_support.h"
50 #include "../include/lustre_lib.h"
51 #include "../include/lustre_net.h"
52 #include "../include/obd_class.h"
53 #include "../include/lprocfs_status.h"
54 #include "../include/lustre_lite.h"
55 #include "../include/lustre_fid.h"
56 #include "../include/lustre_kernelcomm.h"
57 #include "lmv_internal.h"
59 static void lmv_activate_target(struct lmv_obd
*lmv
,
60 struct lmv_tgt_desc
*tgt
,
63 if (tgt
->ltd_active
== activate
)
66 tgt
->ltd_active
= activate
;
67 lmv
->desc
.ld_active_tgt_count
+= (activate
? 1 : -1);
73 * -EINVAL : UUID can't be found in the LMV's target list
74 * -ENOTCONN: The UUID is found, but the target connection is bad (!)
75 * -EBADF : The UUID is found, but the OBD of the wrong type (!)
77 static int lmv_set_mdc_active(struct lmv_obd
*lmv
, struct obd_uuid
*uuid
,
80 struct lmv_tgt_desc
*uninitialized_var(tgt
);
81 struct obd_device
*obd
;
85 CDEBUG(D_INFO
, "Searching in lmv %p for uuid %s (activate=%d)\n",
86 lmv
, uuid
->uuid
, activate
);
88 spin_lock(&lmv
->lmv_lock
);
89 for (i
= 0; i
< lmv
->desc
.ld_tgt_count
; i
++) {
91 if (!tgt
|| !tgt
->ltd_exp
)
94 CDEBUG(D_INFO
, "Target idx %d is %s conn %#llx\n", i
,
95 tgt
->ltd_uuid
.uuid
, tgt
->ltd_exp
->exp_handle
.h_cookie
);
97 if (obd_uuid_equals(uuid
, &tgt
->ltd_uuid
))
101 if (i
== lmv
->desc
.ld_tgt_count
) {
106 obd
= class_exp2obd(tgt
->ltd_exp
);
112 CDEBUG(D_INFO
, "Found OBD %s=%s device %d (%p) type %s at LMV idx %d\n",
113 obd
->obd_name
, obd
->obd_uuid
.uuid
, obd
->obd_minor
, obd
,
114 obd
->obd_type
->typ_name
, i
);
115 LASSERT(strcmp(obd
->obd_type
->typ_name
, LUSTRE_MDC_NAME
) == 0);
117 if (tgt
->ltd_active
== activate
) {
118 CDEBUG(D_INFO
, "OBD %p already %sactive!\n", obd
,
119 activate
? "" : "in");
123 CDEBUG(D_INFO
, "Marking OBD %p %sactive\n", obd
,
124 activate
? "" : "in");
125 lmv_activate_target(lmv
, tgt
, activate
);
128 spin_unlock(&lmv
->lmv_lock
);
132 static struct obd_uuid
*lmv_get_uuid(struct obd_export
*exp
)
134 struct lmv_obd
*lmv
= &exp
->exp_obd
->u
.lmv
;
136 return obd_get_uuid(lmv
->tgts
[0]->ltd_exp
);
139 static int lmv_notify(struct obd_device
*obd
, struct obd_device
*watched
,
140 enum obd_notify_event ev
, void *data
)
142 struct obd_connect_data
*conn_data
;
143 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
144 struct obd_uuid
*uuid
;
147 if (strcmp(watched
->obd_type
->typ_name
, LUSTRE_MDC_NAME
)) {
148 CERROR("unexpected notification of %s %s!\n",
149 watched
->obd_type
->typ_name
,
154 uuid
= &watched
->u
.cli
.cl_target_uuid
;
155 if (ev
== OBD_NOTIFY_ACTIVE
|| ev
== OBD_NOTIFY_INACTIVE
) {
157 * Set MDC as active before notifying the observer, so the
158 * observer can use the MDC normally.
160 rc
= lmv_set_mdc_active(lmv
, uuid
,
161 ev
== OBD_NOTIFY_ACTIVE
);
163 CERROR("%sactivation of %s failed: %d\n",
164 ev
== OBD_NOTIFY_ACTIVE
? "" : "de",
168 } else if (ev
== OBD_NOTIFY_OCD
) {
169 conn_data
= &watched
->u
.cli
.cl_import
->imp_connect_data
;
171 * XXX: Make sure that ocd_connect_flags from all targets are
172 * the same. Otherwise one of MDTs runs wrong version or
173 * something like this. --umka
175 obd
->obd_self_export
->exp_connect_data
= *conn_data
;
178 else if (ev
== OBD_NOTIFY_DISCON
) {
180 * For disconnect event, flush fld cache for failout MDS case.
182 fld_client_flush(&lmv
->lmv_fld
);
186 * Pass the notification up the chain.
188 if (obd
->obd_observer
)
189 rc
= obd_notify(obd
->obd_observer
, watched
, ev
, data
);
195 * This is fake connect function. Its purpose is to initialize lmv and say
196 * caller that everything is okay. Real connection will be performed later.
198 static int lmv_connect(const struct lu_env
*env
,
199 struct obd_export
**exp
, struct obd_device
*obd
,
200 struct obd_uuid
*cluuid
, struct obd_connect_data
*data
,
203 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
204 struct lustre_handle conn
= { 0 };
208 * We don't want to actually do the underlying connections more than
209 * once, so keep track.
212 if (lmv
->refcount
> 1) {
217 rc
= class_connect(&conn
, obd
, cluuid
);
219 CERROR("class_connection() returned %d\n", rc
);
223 *exp
= class_conn2export(&conn
);
224 class_export_get(*exp
);
228 lmv
->cluuid
= *cluuid
;
231 lmv
->conn_data
= *data
;
233 lmv
->lmv_tgts_kobj
= kobject_create_and_add("target_obds",
236 * All real clients should perform actual connection right away, because
237 * it is possible, that LMV will not have opportunity to connect targets
238 * and MDC stuff will be called directly, for instance while reading
239 * ../mdc/../kbytesfree procfs file, etc.
241 if (data
&& data
->ocd_connect_flags
& OBD_CONNECT_REAL
)
242 rc
= lmv_check_connect(obd
);
244 if (rc
&& lmv
->lmv_tgts_kobj
)
245 kobject_put(lmv
->lmv_tgts_kobj
);
250 static void lmv_set_timeouts(struct obd_device
*obd
)
252 struct lmv_tgt_desc
*tgt
;
257 if (lmv
->server_timeout
== 0)
260 if (lmv
->connected
== 0)
263 for (i
= 0; i
< lmv
->desc
.ld_tgt_count
; i
++) {
265 if (!tgt
|| !tgt
->ltd_exp
|| tgt
->ltd_active
== 0)
268 obd_set_info_async(NULL
, tgt
->ltd_exp
, sizeof(KEY_INTERMDS
),
269 KEY_INTERMDS
, 0, NULL
, NULL
);
273 static int lmv_init_ea_size(struct obd_export
*exp
, int easize
,
274 int def_easize
, int cookiesize
, int def_cookiesize
)
276 struct obd_device
*obd
= exp
->exp_obd
;
277 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
282 if (lmv
->max_easize
< easize
) {
283 lmv
->max_easize
= easize
;
286 if (lmv
->max_def_easize
< def_easize
) {
287 lmv
->max_def_easize
= def_easize
;
290 if (lmv
->max_cookiesize
< cookiesize
) {
291 lmv
->max_cookiesize
= cookiesize
;
294 if (lmv
->max_def_cookiesize
< def_cookiesize
) {
295 lmv
->max_def_cookiesize
= def_cookiesize
;
301 if (lmv
->connected
== 0)
304 for (i
= 0; i
< lmv
->desc
.ld_tgt_count
; i
++) {
305 if (!lmv
->tgts
[i
] || !lmv
->tgts
[i
]->ltd_exp
||
306 lmv
->tgts
[i
]->ltd_active
== 0) {
307 CWARN("%s: NULL export for %d\n", obd
->obd_name
, i
);
311 rc
= md_init_ea_size(lmv
->tgts
[i
]->ltd_exp
, easize
, def_easize
,
312 cookiesize
, def_cookiesize
);
314 CERROR("%s: obd_init_ea_size() failed on MDT target %d: rc = %d\n",
315 obd
->obd_name
, i
, rc
);
322 #define MAX_STRING_SIZE 128
324 static int lmv_connect_mdc(struct obd_device
*obd
, struct lmv_tgt_desc
*tgt
)
326 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
327 struct obd_uuid
*cluuid
= &lmv
->cluuid
;
328 struct obd_uuid lmv_mdc_uuid
= { "LMV_MDC_UUID" };
329 struct obd_device
*mdc_obd
;
330 struct obd_export
*mdc_exp
;
331 struct lu_fld_target target
;
334 mdc_obd
= class_find_client_obd(&tgt
->ltd_uuid
, LUSTRE_MDC_NAME
,
337 CERROR("target %s not attached\n", tgt
->ltd_uuid
.uuid
);
341 CDEBUG(D_CONFIG
, "connect to %s(%s) - %s, %s FOR %s\n",
342 mdc_obd
->obd_name
, mdc_obd
->obd_uuid
.uuid
,
343 tgt
->ltd_uuid
.uuid
, obd
->obd_uuid
.uuid
, cluuid
->uuid
);
345 if (!mdc_obd
->obd_set_up
) {
346 CERROR("target %s is not set up\n", tgt
->ltd_uuid
.uuid
);
350 rc
= obd_connect(NULL
, &mdc_exp
, mdc_obd
, &lmv_mdc_uuid
,
351 &lmv
->conn_data
, NULL
);
353 CERROR("target %s connect error %d\n", tgt
->ltd_uuid
.uuid
, rc
);
358 * Init fid sequence client for this mdc and add new fld target.
360 rc
= obd_fid_init(mdc_obd
, mdc_exp
, LUSTRE_SEQ_METADATA
);
364 target
.ft_srv
= NULL
;
365 target
.ft_exp
= mdc_exp
;
366 target
.ft_idx
= tgt
->ltd_idx
;
368 fld_client_add_target(&lmv
->lmv_fld
, &target
);
370 rc
= obd_register_observer(mdc_obd
, obd
);
372 obd_disconnect(mdc_exp
);
373 CERROR("target %s register_observer error %d\n",
374 tgt
->ltd_uuid
.uuid
, rc
);
378 if (obd
->obd_observer
) {
380 * Tell the observer about the new target.
382 rc
= obd_notify(obd
->obd_observer
, mdc_exp
->exp_obd
,
384 (void *)(tgt
- lmv
->tgts
[0]));
386 obd_disconnect(mdc_exp
);
392 tgt
->ltd_exp
= mdc_exp
;
393 lmv
->desc
.ld_active_tgt_count
++;
395 md_init_ea_size(tgt
->ltd_exp
, lmv
->max_easize
, lmv
->max_def_easize
,
396 lmv
->max_cookiesize
, lmv
->max_def_cookiesize
);
398 CDEBUG(D_CONFIG
, "Connected to %s(%s) successfully (%d)\n",
399 mdc_obd
->obd_name
, mdc_obd
->obd_uuid
.uuid
,
400 atomic_read(&obd
->obd_refcount
));
402 if (lmv
->lmv_tgts_kobj
)
403 /* Even if we failed to create the link, that's fine */
404 rc
= sysfs_create_link(lmv
->lmv_tgts_kobj
, &mdc_obd
->obd_kobj
,
409 static void lmv_del_target(struct lmv_obd
*lmv
, int index
)
411 if (!lmv
->tgts
[index
])
414 kfree(lmv
->tgts
[index
]);
415 lmv
->tgts
[index
] = NULL
;
419 static int lmv_add_target(struct obd_device
*obd
, struct obd_uuid
*uuidp
,
420 __u32 index
, int gen
)
422 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
423 struct lmv_tgt_desc
*tgt
;
426 CDEBUG(D_CONFIG
, "Target uuid: %s. index %d\n", uuidp
->uuid
, index
);
430 if (lmv
->desc
.ld_tgt_count
== 0) {
431 struct obd_device
*mdc_obd
;
433 mdc_obd
= class_find_client_obd(uuidp
, LUSTRE_MDC_NAME
,
436 lmv_init_unlock(lmv
);
437 CERROR("%s: Target %s not attached: rc = %d\n",
438 obd
->obd_name
, uuidp
->uuid
, -EINVAL
);
443 if ((index
< lmv
->tgts_size
) && lmv
->tgts
[index
]) {
444 tgt
= lmv
->tgts
[index
];
445 CERROR("%s: UUID %s already assigned at LOV target index %d: rc = %d\n",
447 obd_uuid2str(&tgt
->ltd_uuid
), index
, -EEXIST
);
448 lmv_init_unlock(lmv
);
452 if (index
>= lmv
->tgts_size
) {
453 /* We need to reallocate the lmv target array. */
454 struct lmv_tgt_desc
**newtgts
, **old
= NULL
;
458 while (newsize
< index
+ 1)
460 newtgts
= kcalloc(newsize
, sizeof(*newtgts
), GFP_NOFS
);
462 lmv_init_unlock(lmv
);
466 if (lmv
->tgts_size
) {
467 memcpy(newtgts
, lmv
->tgts
,
468 sizeof(*newtgts
) * lmv
->tgts_size
);
470 oldsize
= lmv
->tgts_size
;
474 lmv
->tgts_size
= newsize
;
478 CDEBUG(D_CONFIG
, "tgts: %p size: %d\n", lmv
->tgts
,
482 tgt
= kzalloc(sizeof(*tgt
), GFP_NOFS
);
484 lmv_init_unlock(lmv
);
488 mutex_init(&tgt
->ltd_fid_mutex
);
489 tgt
->ltd_idx
= index
;
490 tgt
->ltd_uuid
= *uuidp
;
492 lmv
->tgts
[index
] = tgt
;
493 if (index
>= lmv
->desc
.ld_tgt_count
)
494 lmv
->desc
.ld_tgt_count
= index
+ 1;
496 if (lmv
->connected
) {
497 rc
= lmv_connect_mdc(obd
, tgt
);
499 spin_lock(&lmv
->lmv_lock
);
500 lmv
->desc
.ld_tgt_count
--;
501 memset(tgt
, 0, sizeof(*tgt
));
502 spin_unlock(&lmv
->lmv_lock
);
504 int easize
= sizeof(struct lmv_stripe_md
) +
505 lmv
->desc
.ld_tgt_count
* sizeof(struct lu_fid
);
506 lmv_init_ea_size(obd
->obd_self_export
, easize
, 0, 0, 0);
510 lmv_init_unlock(lmv
);
514 int lmv_check_connect(struct obd_device
*obd
)
516 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
517 struct lmv_tgt_desc
*tgt
;
526 if (lmv
->connected
) {
527 lmv_init_unlock(lmv
);
531 if (lmv
->desc
.ld_tgt_count
== 0) {
532 lmv_init_unlock(lmv
);
533 CERROR("%s: no targets configured.\n", obd
->obd_name
);
537 CDEBUG(D_CONFIG
, "Time to connect %s to %s\n",
538 lmv
->cluuid
.uuid
, obd
->obd_name
);
540 for (i
= 0; i
< lmv
->desc
.ld_tgt_count
; i
++) {
544 rc
= lmv_connect_mdc(obd
, tgt
);
549 lmv_set_timeouts(obd
);
550 class_export_put(lmv
->exp
);
552 easize
= lmv_get_easize(lmv
);
553 lmv_init_ea_size(obd
->obd_self_export
, easize
, 0, 0, 0);
554 lmv_init_unlock(lmv
);
566 --lmv
->desc
.ld_active_tgt_count
;
567 rc2
= obd_disconnect(tgt
->ltd_exp
);
569 CERROR("LMV target %s disconnect on MDC idx %d: error %d\n",
570 tgt
->ltd_uuid
.uuid
, i
, rc2
);
574 class_disconnect(lmv
->exp
);
575 lmv_init_unlock(lmv
);
579 static int lmv_disconnect_mdc(struct obd_device
*obd
, struct lmv_tgt_desc
*tgt
)
581 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
582 struct obd_device
*mdc_obd
;
585 mdc_obd
= class_exp2obd(tgt
->ltd_exp
);
588 mdc_obd
->obd_force
= obd
->obd_force
;
589 mdc_obd
->obd_fail
= obd
->obd_fail
;
590 mdc_obd
->obd_no_recov
= obd
->obd_no_recov
;
592 if (lmv
->lmv_tgts_kobj
)
593 sysfs_remove_link(lmv
->lmv_tgts_kobj
,
597 rc
= obd_fid_fini(tgt
->ltd_exp
->exp_obd
);
599 CERROR("Can't finalize fids factory\n");
601 CDEBUG(D_INFO
, "Disconnected from %s(%s) successfully\n",
602 tgt
->ltd_exp
->exp_obd
->obd_name
,
603 tgt
->ltd_exp
->exp_obd
->obd_uuid
.uuid
);
605 obd_register_observer(tgt
->ltd_exp
->exp_obd
, NULL
);
606 rc
= obd_disconnect(tgt
->ltd_exp
);
608 if (tgt
->ltd_active
) {
609 CERROR("Target %s disconnect error %d\n",
610 tgt
->ltd_uuid
.uuid
, rc
);
614 lmv_activate_target(lmv
, tgt
, 0);
619 static int lmv_disconnect(struct obd_export
*exp
)
621 struct obd_device
*obd
= class_exp2obd(exp
);
622 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
630 * Only disconnect the underlying layers on the final disconnect.
633 if (lmv
->refcount
!= 0)
636 for (i
= 0; i
< lmv
->desc
.ld_tgt_count
; i
++) {
637 if (!lmv
->tgts
[i
] || !lmv
->tgts
[i
]->ltd_exp
)
640 lmv_disconnect_mdc(obd
, lmv
->tgts
[i
]);
643 if (lmv
->lmv_tgts_kobj
)
644 kobject_put(lmv
->lmv_tgts_kobj
);
648 * This is the case when no real connection is established by
649 * lmv_check_connect().
652 class_export_put(exp
);
653 rc
= class_disconnect(exp
);
654 if (lmv
->refcount
== 0)
659 static int lmv_fid2path(struct obd_export
*exp
, int len
, void *karg
,
662 struct obd_device
*obddev
= class_exp2obd(exp
);
663 struct lmv_obd
*lmv
= &obddev
->u
.lmv
;
664 struct getinfo_fid2path
*gf
;
665 struct lmv_tgt_desc
*tgt
;
666 struct getinfo_fid2path
*remote_gf
= NULL
;
667 int remote_gf_size
= 0;
670 gf
= (struct getinfo_fid2path
*)karg
;
671 tgt
= lmv_find_target(lmv
, &gf
->gf_fid
);
676 rc
= obd_iocontrol(OBD_IOC_FID2PATH
, tgt
->ltd_exp
, len
, gf
, uarg
);
677 if (rc
!= 0 && rc
!= -EREMOTE
)
680 /* If remote_gf != NULL, it means just building the
681 * path on the remote MDT, copy this path segment to gf
684 struct getinfo_fid2path
*ori_gf
;
687 ori_gf
= (struct getinfo_fid2path
*)karg
;
688 if (strlen(ori_gf
->gf_path
) +
689 strlen(gf
->gf_path
) > ori_gf
->gf_pathlen
) {
694 ptr
= ori_gf
->gf_path
;
696 memmove(ptr
+ strlen(gf
->gf_path
) + 1, ptr
,
697 strlen(ori_gf
->gf_path
));
699 strncpy(ptr
, gf
->gf_path
, strlen(gf
->gf_path
));
700 ptr
+= strlen(gf
->gf_path
);
704 CDEBUG(D_INFO
, "%s: get path %s "DFID
" rec: %llu ln: %u\n",
705 tgt
->ltd_exp
->exp_obd
->obd_name
,
706 gf
->gf_path
, PFID(&gf
->gf_fid
), gf
->gf_recno
,
712 /* sigh, has to go to another MDT to do path building further */
714 remote_gf_size
= sizeof(*remote_gf
) + PATH_MAX
;
715 remote_gf
= kzalloc(remote_gf_size
, GFP_NOFS
);
720 remote_gf
->gf_pathlen
= PATH_MAX
;
723 if (!fid_is_sane(&gf
->gf_fid
)) {
724 CERROR("%s: invalid FID "DFID
": rc = %d\n",
725 tgt
->ltd_exp
->exp_obd
->obd_name
,
726 PFID(&gf
->gf_fid
), -EINVAL
);
731 tgt
= lmv_find_target(lmv
, &gf
->gf_fid
);
737 remote_gf
->gf_fid
= gf
->gf_fid
;
738 remote_gf
->gf_recno
= -1;
739 remote_gf
->gf_linkno
= -1;
740 memset(remote_gf
->gf_path
, 0, remote_gf
->gf_pathlen
);
742 goto repeat_fid2path
;
749 static int lmv_hsm_req_count(struct lmv_obd
*lmv
,
750 const struct hsm_user_request
*hur
,
751 const struct lmv_tgt_desc
*tgt_mds
)
754 struct lmv_tgt_desc
*curr_tgt
;
756 /* count how many requests must be sent to the given target */
757 for (i
= 0; i
< hur
->hur_request
.hr_itemcount
; i
++) {
758 curr_tgt
= lmv_find_target(lmv
, &hur
->hur_user_item
[i
].hui_fid
);
759 if (obd_uuid_equals(&curr_tgt
->ltd_uuid
, &tgt_mds
->ltd_uuid
))
765 static void lmv_hsm_req_build(struct lmv_obd
*lmv
,
766 struct hsm_user_request
*hur_in
,
767 const struct lmv_tgt_desc
*tgt_mds
,
768 struct hsm_user_request
*hur_out
)
771 struct lmv_tgt_desc
*curr_tgt
;
773 /* build the hsm_user_request for the given target */
774 hur_out
->hur_request
= hur_in
->hur_request
;
776 for (i
= 0; i
< hur_in
->hur_request
.hr_itemcount
; i
++) {
777 curr_tgt
= lmv_find_target(lmv
,
778 &hur_in
->hur_user_item
[i
].hui_fid
);
779 if (obd_uuid_equals(&curr_tgt
->ltd_uuid
, &tgt_mds
->ltd_uuid
)) {
780 hur_out
->hur_user_item
[nr_out
] =
781 hur_in
->hur_user_item
[i
];
785 hur_out
->hur_request
.hr_itemcount
= nr_out
;
786 memcpy(hur_data(hur_out
), hur_data(hur_in
),
787 hur_in
->hur_request
.hr_data_len
);
790 static int lmv_hsm_ct_unregister(struct lmv_obd
*lmv
, unsigned int cmd
, int len
,
791 struct lustre_kernelcomm
*lk
,
797 /* unregister request (call from llapi_hsm_copytool_fini) */
798 for (i
= 0; i
< lmv
->desc
.ld_tgt_count
; i
++) {
799 /* best effort: try to clean as much as possible
800 * (continue on error)
802 obd_iocontrol(cmd
, lmv
->tgts
[i
]->ltd_exp
, len
, lk
, uarg
);
805 /* Whatever the result, remove copytool from kuc groups.
806 * Unreached coordinators will get EPIPE on next requests
807 * and will unregister automatically.
809 rc
= libcfs_kkuc_group_rem(lk
->lk_uid
, lk
->lk_group
);
814 static int lmv_hsm_ct_register(struct lmv_obd
*lmv
, unsigned int cmd
, int len
,
815 struct lustre_kernelcomm
*lk
, void __user
*uarg
)
820 bool any_set
= false;
821 struct kkuc_ct_data kcd
= { 0 };
823 /* All or nothing: try to register to all MDS.
824 * In case of failure, unregister from previous MDS,
825 * except if it because of inactive target.
827 for (i
= 0; i
< lmv
->desc
.ld_tgt_count
; i
++) {
828 err
= obd_iocontrol(cmd
, lmv
->tgts
[i
]->ltd_exp
, len
, lk
, uarg
);
830 if (lmv
->tgts
[i
]->ltd_active
) {
831 /* permanent error */
832 CERROR("error: iocontrol MDC %s on MDTidx %d cmd %x: err = %d\n",
833 lmv
->tgts
[i
]->ltd_uuid
.uuid
,
836 lk
->lk_flags
|= LK_FLG_STOP
;
837 /* unregister from previous MDS */
838 for (j
= 0; j
< i
; j
++)
840 lmv
->tgts
[j
]->ltd_exp
,
844 /* else: transient error.
845 * kuc will register to the missing MDT when it is back
853 /* no registration done: return error */
856 /* at least one registration done, with no failure */
857 filp
= fget(lk
->lk_wfd
);
861 kcd
.kcd_magic
= KKUC_CT_DATA_MAGIC
;
862 kcd
.kcd_uuid
= lmv
->cluuid
;
863 kcd
.kcd_archive
= lk
->lk_data
;
865 rc
= libcfs_kkuc_group_add(filp
, lk
->lk_uid
, lk
->lk_group
,
875 static int lmv_iocontrol(unsigned int cmd
, struct obd_export
*exp
,
876 int len
, void *karg
, void __user
*uarg
)
878 struct obd_device
*obddev
= class_exp2obd(exp
);
879 struct lmv_obd
*lmv
= &obddev
->u
.lmv
;
883 int count
= lmv
->desc
.ld_tgt_count
;
889 case IOC_OBD_STATFS
: {
890 struct obd_ioctl_data
*data
= karg
;
891 struct obd_device
*mdc_obd
;
892 struct obd_statfs stat_buf
= {0};
895 memcpy(&index
, data
->ioc_inlbuf2
, sizeof(__u32
));
899 if (!lmv
->tgts
[index
] || lmv
->tgts
[index
]->ltd_active
== 0)
902 mdc_obd
= class_exp2obd(lmv
->tgts
[index
]->ltd_exp
);
907 if (copy_to_user(data
->ioc_pbuf2
, obd2cli_tgt(mdc_obd
),
908 min((int)data
->ioc_plen2
,
909 (int)sizeof(struct obd_uuid
))))
912 rc
= obd_statfs(NULL
, lmv
->tgts
[index
]->ltd_exp
, &stat_buf
,
913 cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS
),
917 if (copy_to_user(data
->ioc_pbuf1
, &stat_buf
,
918 min((int)data
->ioc_plen1
,
919 (int)sizeof(stat_buf
))))
923 case OBD_IOC_QUOTACTL
: {
924 struct if_quotactl
*qctl
= karg
;
925 struct lmv_tgt_desc
*tgt
= NULL
;
926 struct obd_quotactl
*oqctl
;
928 if (qctl
->qc_valid
== QC_MDTIDX
) {
929 if (qctl
->qc_idx
< 0 || count
<= qctl
->qc_idx
)
932 tgt
= lmv
->tgts
[qctl
->qc_idx
];
933 if (!tgt
|| !tgt
->ltd_exp
)
935 } else if (qctl
->qc_valid
== QC_UUID
) {
936 for (i
= 0; i
< count
; i
++) {
940 if (!obd_uuid_equals(&tgt
->ltd_uuid
,
956 LASSERT(tgt
&& tgt
->ltd_exp
);
957 oqctl
= kzalloc(sizeof(*oqctl
), GFP_NOFS
);
961 QCTL_COPY(oqctl
, qctl
);
962 rc
= obd_quotactl(tgt
->ltd_exp
, oqctl
);
964 QCTL_COPY(qctl
, oqctl
);
965 qctl
->qc_valid
= QC_MDTIDX
;
966 qctl
->obd_uuid
= tgt
->ltd_uuid
;
971 case OBD_IOC_CHANGELOG_SEND
:
972 case OBD_IOC_CHANGELOG_CLEAR
: {
973 struct ioc_changelog
*icc
= karg
;
975 if (icc
->icc_mdtindex
>= count
)
978 if (!lmv
->tgts
[icc
->icc_mdtindex
] ||
979 !lmv
->tgts
[icc
->icc_mdtindex
]->ltd_exp
||
980 lmv
->tgts
[icc
->icc_mdtindex
]->ltd_active
== 0)
982 rc
= obd_iocontrol(cmd
, lmv
->tgts
[icc
->icc_mdtindex
]->ltd_exp
,
983 sizeof(*icc
), icc
, NULL
);
986 case LL_IOC_GET_CONNECT_FLAGS
: {
989 rc
= obd_iocontrol(cmd
, lmv
->tgts
[0]->ltd_exp
, len
, karg
, uarg
);
992 case OBD_IOC_FID2PATH
: {
993 rc
= lmv_fid2path(exp
, len
, karg
, uarg
);
996 case LL_IOC_HSM_STATE_GET
:
997 case LL_IOC_HSM_STATE_SET
:
998 case LL_IOC_HSM_ACTION
: {
999 struct md_op_data
*op_data
= karg
;
1000 struct lmv_tgt_desc
*tgt
;
1002 tgt
= lmv_find_target(lmv
, &op_data
->op_fid1
);
1004 return PTR_ERR(tgt
);
1009 rc
= obd_iocontrol(cmd
, tgt
->ltd_exp
, len
, karg
, uarg
);
1012 case LL_IOC_HSM_PROGRESS
: {
1013 const struct hsm_progress_kernel
*hpk
= karg
;
1014 struct lmv_tgt_desc
*tgt
;
1016 tgt
= lmv_find_target(lmv
, &hpk
->hpk_fid
);
1018 return PTR_ERR(tgt
);
1019 rc
= obd_iocontrol(cmd
, tgt
->ltd_exp
, len
, karg
, uarg
);
1022 case LL_IOC_HSM_REQUEST
: {
1023 struct hsm_user_request
*hur
= karg
;
1024 struct lmv_tgt_desc
*tgt
;
1025 unsigned int reqcount
= hur
->hur_request
.hr_itemcount
;
1030 /* if the request is about a single fid
1031 * or if there is a single MDS, no need to split
1034 if (reqcount
== 1 || count
== 1) {
1035 tgt
= lmv_find_target(lmv
,
1036 &hur
->hur_user_item
[0].hui_fid
);
1038 return PTR_ERR(tgt
);
1039 rc
= obd_iocontrol(cmd
, tgt
->ltd_exp
, len
, karg
, uarg
);
1041 /* split fid list to their respective MDS */
1042 for (i
= 0; i
< count
; i
++) {
1043 unsigned int nr
, reqlen
;
1045 struct hsm_user_request
*req
;
1047 nr
= lmv_hsm_req_count(lmv
, hur
, lmv
->tgts
[i
]);
1048 if (nr
== 0) /* nothing for this MDS */
1051 /* build a request with fids for this MDS */
1052 reqlen
= offsetof(typeof(*hur
),
1054 + hur
->hur_request
.hr_data_len
;
1055 req
= libcfs_kvzalloc(reqlen
, GFP_NOFS
);
1059 lmv_hsm_req_build(lmv
, hur
, lmv
->tgts
[i
], req
);
1061 rc1
= obd_iocontrol(cmd
, lmv
->tgts
[i
]->ltd_exp
,
1063 if (rc1
!= 0 && rc
== 0)
1070 case LL_IOC_LOV_SWAP_LAYOUTS
: {
1071 struct md_op_data
*op_data
= karg
;
1072 struct lmv_tgt_desc
*tgt1
, *tgt2
;
1074 tgt1
= lmv_find_target(lmv
, &op_data
->op_fid1
);
1076 return PTR_ERR(tgt1
);
1078 tgt2
= lmv_find_target(lmv
, &op_data
->op_fid2
);
1080 return PTR_ERR(tgt2
);
1082 if (!tgt1
->ltd_exp
|| !tgt2
->ltd_exp
)
1085 /* only files on same MDT can have their layouts swapped */
1086 if (tgt1
->ltd_idx
!= tgt2
->ltd_idx
)
1089 rc
= obd_iocontrol(cmd
, tgt1
->ltd_exp
, len
, karg
, uarg
);
1092 case LL_IOC_HSM_CT_START
: {
1093 struct lustre_kernelcomm
*lk
= karg
;
1095 if (lk
->lk_flags
& LK_FLG_STOP
)
1096 rc
= lmv_hsm_ct_unregister(lmv
, cmd
, len
, lk
, uarg
);
1098 rc
= lmv_hsm_ct_register(lmv
, cmd
, len
, lk
, uarg
);
1102 for (i
= 0; i
< count
; i
++) {
1103 struct obd_device
*mdc_obd
;
1106 if (!lmv
->tgts
[i
] || !lmv
->tgts
[i
]->ltd_exp
)
1108 /* ll_umount_begin() sets force flag but for lmv, not
1109 * mdc. Let's pass it through
1111 mdc_obd
= class_exp2obd(lmv
->tgts
[i
]->ltd_exp
);
1112 mdc_obd
->obd_force
= obddev
->obd_force
;
1113 err
= obd_iocontrol(cmd
, lmv
->tgts
[i
]->ltd_exp
, len
,
1115 if (err
== -ENODATA
&& cmd
== OBD_IOC_POLL_QUOTACHECK
) {
1118 if (lmv
->tgts
[i
]->ltd_active
) {
1119 CERROR("error: iocontrol MDC %s on MDTidx %d cmd %x: err = %d\n",
1120 lmv
->tgts
[i
]->ltd_uuid
.uuid
,
1135 * This is _inode_ placement policy function (not name).
1137 static int lmv_placement_policy(struct obd_device
*obd
,
1138 struct md_op_data
*op_data
, u32
*mds
)
1140 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1144 if (lmv
->desc
.ld_tgt_count
== 1) {
1150 * If stripe_offset is provided during setdirstripe
1151 * (setdirstripe -i xx), xx MDS will be chosen.
1153 if (op_data
->op_cli_flags
& CLI_SET_MEA
) {
1154 struct lmv_user_md
*lum
;
1156 lum
= (struct lmv_user_md
*)op_data
->op_data
;
1157 if (lum
->lum_type
== LMV_STRIPE_TYPE
&&
1158 lum
->lum_stripe_offset
!= -1) {
1159 if (lum
->lum_stripe_offset
>= lmv
->desc
.ld_tgt_count
) {
1160 CERROR("%s: Stripe_offset %d > MDT count %d: rc = %d\n",
1162 lum
->lum_stripe_offset
,
1163 lmv
->desc
.ld_tgt_count
, -ERANGE
);
1166 *mds
= lum
->lum_stripe_offset
;
1171 /* Allocate new fid on target according to operation type and parent
1174 *mds
= op_data
->op_mds
;
1178 int __lmv_fid_alloc(struct lmv_obd
*lmv
, struct lu_fid
*fid
, u32 mds
)
1180 struct lmv_tgt_desc
*tgt
;
1183 tgt
= lmv_get_target(lmv
, mds
);
1185 return PTR_ERR(tgt
);
1188 * New seq alloc and FLD setup should be atomic. Otherwise we may find
1189 * on server that seq in new allocated fid is not yet known.
1191 mutex_lock(&tgt
->ltd_fid_mutex
);
1193 if (tgt
->ltd_active
== 0 || !tgt
->ltd_exp
) {
1199 * Asking underlaying tgt layer to allocate new fid.
1201 rc
= obd_fid_alloc(tgt
->ltd_exp
, fid
, NULL
);
1203 LASSERT(fid_is_sane(fid
));
1208 mutex_unlock(&tgt
->ltd_fid_mutex
);
1212 int lmv_fid_alloc(struct obd_export
*exp
, struct lu_fid
*fid
,
1213 struct md_op_data
*op_data
)
1215 struct obd_device
*obd
= class_exp2obd(exp
);
1216 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1223 rc
= lmv_placement_policy(obd
, op_data
, &mds
);
1225 CERROR("Can't get target for allocating fid, rc %d\n",
1230 rc
= __lmv_fid_alloc(lmv
, fid
, mds
);
1232 CERROR("Can't alloc new fid, rc %d\n", rc
);
1239 static int lmv_setup(struct obd_device
*obd
, struct lustre_cfg
*lcfg
)
1241 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1242 struct lprocfs_static_vars lvars
= { NULL
};
1243 struct lmv_desc
*desc
;
1246 if (LUSTRE_CFG_BUFLEN(lcfg
, 1) < 1) {
1247 CERROR("LMV setup requires a descriptor\n");
1251 desc
= (struct lmv_desc
*)lustre_cfg_buf(lcfg
, 1);
1252 if (sizeof(*desc
) > LUSTRE_CFG_BUFLEN(lcfg
, 1)) {
1253 CERROR("Lmv descriptor size wrong: %d > %d\n",
1254 (int)sizeof(*desc
), LUSTRE_CFG_BUFLEN(lcfg
, 1));
1258 lmv
->tgts
= kcalloc(32, sizeof(*lmv
->tgts
), GFP_NOFS
);
1261 lmv
->tgts_size
= 32;
1263 obd_str2uuid(&lmv
->desc
.ld_uuid
, desc
->ld_uuid
.uuid
);
1264 lmv
->desc
.ld_tgt_count
= 0;
1265 lmv
->desc
.ld_active_tgt_count
= 0;
1266 lmv
->max_cookiesize
= 0;
1267 lmv
->max_def_easize
= 0;
1268 lmv
->max_easize
= 0;
1269 lmv
->lmv_placement
= PLACEMENT_CHAR_POLICY
;
1271 spin_lock_init(&lmv
->lmv_lock
);
1272 mutex_init(&lmv
->init_mutex
);
1274 lprocfs_lmv_init_vars(&lvars
);
1276 lprocfs_obd_setup(obd
, lvars
.obd_vars
, lvars
.sysfs_vars
);
1277 rc
= ldebugfs_seq_create(obd
->obd_debugfs_entry
, "target_obd",
1278 0444, &lmv_proc_target_fops
, obd
);
1280 CWARN("%s: error adding LMV target_obd file: rc = %d\n",
1282 rc
= fld_client_init(&lmv
->lmv_fld
, obd
->obd_name
,
1283 LUSTRE_CLI_FLD_HASH_DHT
);
1285 CERROR("Can't init FLD, err %d\n", rc
);
1295 static int lmv_cleanup(struct obd_device
*obd
)
1297 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1299 fld_client_fini(&lmv
->lmv_fld
);
1303 for (i
= 0; i
< lmv
->desc
.ld_tgt_count
; i
++) {
1306 lmv_del_target(lmv
, i
);
1314 static int lmv_process_config(struct obd_device
*obd
, u32 len
, void *buf
)
1316 struct lustre_cfg
*lcfg
= buf
;
1317 struct obd_uuid obd_uuid
;
1322 switch (lcfg
->lcfg_command
) {
1324 /* modify_mdc_tgts add 0:lustre-clilmv 1:lustre-MDT0000_UUID
1325 * 2:0 3:1 4:lustre-MDT0000-mdc_UUID
1327 if (LUSTRE_CFG_BUFLEN(lcfg
, 1) > sizeof(obd_uuid
.uuid
)) {
1332 obd_str2uuid(&obd_uuid
, lustre_cfg_buf(lcfg
, 1));
1334 if (sscanf(lustre_cfg_buf(lcfg
, 2), "%d", &index
) != 1) {
1338 if (sscanf(lustre_cfg_buf(lcfg
, 3), "%d", &gen
) != 1) {
1342 rc
= lmv_add_target(obd
, &obd_uuid
, index
, gen
);
1345 CERROR("Unknown command: %d\n", lcfg
->lcfg_command
);
1353 static int lmv_statfs(const struct lu_env
*env
, struct obd_export
*exp
,
1354 struct obd_statfs
*osfs
, __u64 max_age
, __u32 flags
)
1356 struct obd_device
*obd
= class_exp2obd(exp
);
1357 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1358 struct obd_statfs
*temp
;
1362 rc
= lmv_check_connect(obd
);
1366 temp
= kzalloc(sizeof(*temp
), GFP_NOFS
);
1370 for (i
= 0; i
< lmv
->desc
.ld_tgt_count
; i
++) {
1371 if (!lmv
->tgts
[i
] || !lmv
->tgts
[i
]->ltd_exp
)
1374 rc
= obd_statfs(env
, lmv
->tgts
[i
]->ltd_exp
, temp
,
1377 CERROR("can't stat MDS #%d (%s), error %d\n", i
,
1378 lmv
->tgts
[i
]->ltd_exp
->exp_obd
->obd_name
,
1385 /* If the statfs is from mount, it will needs
1386 * retrieve necessary information from MDT0.
1387 * i.e. mount does not need the merged osfs
1389 * And also clients can be mounted as long as
1390 * MDT0 is in service
1392 if (flags
& OBD_STATFS_FOR_MDT0
)
1395 osfs
->os_bavail
+= temp
->os_bavail
;
1396 osfs
->os_blocks
+= temp
->os_blocks
;
1397 osfs
->os_ffree
+= temp
->os_ffree
;
1398 osfs
->os_files
+= temp
->os_files
;
1407 static int lmv_getstatus(struct obd_export
*exp
,
1410 struct obd_device
*obd
= exp
->exp_obd
;
1411 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1414 rc
= lmv_check_connect(obd
);
1418 rc
= md_getstatus(lmv
->tgts
[0]->ltd_exp
, fid
);
1422 static int lmv_getxattr(struct obd_export
*exp
, const struct lu_fid
*fid
,
1423 u64 valid
, const char *name
,
1424 const char *input
, int input_size
, int output_size
,
1425 int flags
, struct ptlrpc_request
**request
)
1427 struct obd_device
*obd
= exp
->exp_obd
;
1428 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1429 struct lmv_tgt_desc
*tgt
;
1432 rc
= lmv_check_connect(obd
);
1436 tgt
= lmv_find_target(lmv
, fid
);
1438 return PTR_ERR(tgt
);
1440 rc
= md_getxattr(tgt
->ltd_exp
, fid
, valid
, name
, input
,
1441 input_size
, output_size
, flags
, request
);
1446 static int lmv_setxattr(struct obd_export
*exp
, const struct lu_fid
*fid
,
1447 u64 valid
, const char *name
,
1448 const char *input
, int input_size
, int output_size
,
1449 int flags
, __u32 suppgid
,
1450 struct ptlrpc_request
**request
)
1452 struct obd_device
*obd
= exp
->exp_obd
;
1453 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1454 struct lmv_tgt_desc
*tgt
;
1457 rc
= lmv_check_connect(obd
);
1461 tgt
= lmv_find_target(lmv
, fid
);
1463 return PTR_ERR(tgt
);
1465 rc
= md_setxattr(tgt
->ltd_exp
, fid
, valid
, name
, input
,
1466 input_size
, output_size
, flags
, suppgid
,
1472 static int lmv_getattr(struct obd_export
*exp
, struct md_op_data
*op_data
,
1473 struct ptlrpc_request
**request
)
1475 struct obd_device
*obd
= exp
->exp_obd
;
1476 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1477 struct lmv_tgt_desc
*tgt
;
1480 rc
= lmv_check_connect(obd
);
1484 tgt
= lmv_find_target(lmv
, &op_data
->op_fid1
);
1486 return PTR_ERR(tgt
);
1488 if (op_data
->op_flags
& MF_GET_MDT_IDX
) {
1489 op_data
->op_mds
= tgt
->ltd_idx
;
1493 rc
= md_getattr(tgt
->ltd_exp
, op_data
, request
);
1498 static int lmv_null_inode(struct obd_export
*exp
, const struct lu_fid
*fid
)
1500 struct obd_device
*obd
= exp
->exp_obd
;
1501 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1505 rc
= lmv_check_connect(obd
);
1509 CDEBUG(D_INODE
, "CBDATA for "DFID
"\n", PFID(fid
));
1512 * With DNE every object can have two locks in different namespaces:
1513 * lookup lock in space of MDT storing direntry and update/open lock in
1514 * space of MDT storing inode.
1516 for (i
= 0; i
< lmv
->desc
.ld_tgt_count
; i
++) {
1517 if (!lmv
->tgts
[i
] || !lmv
->tgts
[i
]->ltd_exp
)
1519 md_null_inode(lmv
->tgts
[i
]->ltd_exp
, fid
);
1525 static int lmv_find_cbdata(struct obd_export
*exp
, const struct lu_fid
*fid
,
1526 ldlm_iterator_t it
, void *data
)
1528 struct obd_device
*obd
= exp
->exp_obd
;
1529 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1533 rc
= lmv_check_connect(obd
);
1537 CDEBUG(D_INODE
, "CBDATA for "DFID
"\n", PFID(fid
));
1540 * With DNE every object can have two locks in different namespaces:
1541 * lookup lock in space of MDT storing direntry and update/open lock in
1542 * space of MDT storing inode.
1544 for (i
= 0; i
< lmv
->desc
.ld_tgt_count
; i
++) {
1545 if (!lmv
->tgts
[i
] || !lmv
->tgts
[i
]->ltd_exp
)
1547 rc
= md_find_cbdata(lmv
->tgts
[i
]->ltd_exp
, fid
, it
, data
);
1555 static int lmv_close(struct obd_export
*exp
, struct md_op_data
*op_data
,
1556 struct md_open_data
*mod
, struct ptlrpc_request
**request
)
1558 struct obd_device
*obd
= exp
->exp_obd
;
1559 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1560 struct lmv_tgt_desc
*tgt
;
1563 rc
= lmv_check_connect(obd
);
1567 tgt
= lmv_find_target(lmv
, &op_data
->op_fid1
);
1569 return PTR_ERR(tgt
);
1571 CDEBUG(D_INODE
, "CLOSE "DFID
"\n", PFID(&op_data
->op_fid1
));
1572 rc
= md_close(tgt
->ltd_exp
, op_data
, mod
, request
);
1577 *lmv_locate_mds(struct lmv_obd
*lmv
, struct md_op_data
*op_data
,
1580 struct lmv_tgt_desc
*tgt
;
1582 tgt
= lmv_find_target(lmv
, fid
);
1586 op_data
->op_mds
= tgt
->ltd_idx
;
1591 static int lmv_create(struct obd_export
*exp
, struct md_op_data
*op_data
,
1592 const void *data
, int datalen
, int mode
, __u32 uid
,
1593 __u32 gid
, cfs_cap_t cap_effective
, __u64 rdev
,
1594 struct ptlrpc_request
**request
)
1596 struct obd_device
*obd
= exp
->exp_obd
;
1597 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1598 struct lmv_tgt_desc
*tgt
;
1601 rc
= lmv_check_connect(obd
);
1605 if (!lmv
->desc
.ld_active_tgt_count
)
1608 tgt
= lmv_locate_mds(lmv
, op_data
, &op_data
->op_fid1
);
1610 return PTR_ERR(tgt
);
1612 rc
= lmv_fid_alloc(exp
, &op_data
->op_fid2
, op_data
);
1616 CDEBUG(D_INODE
, "CREATE '%*s' on "DFID
" -> mds #%x\n",
1617 op_data
->op_namelen
, op_data
->op_name
, PFID(&op_data
->op_fid1
),
1620 op_data
->op_flags
|= MF_MDC_CANCEL_FID1
;
1621 rc
= md_create(tgt
->ltd_exp
, op_data
, data
, datalen
, mode
, uid
, gid
,
1622 cap_effective
, rdev
, request
);
1627 CDEBUG(D_INODE
, "Created - "DFID
"\n", PFID(&op_data
->op_fid2
));
1632 static int lmv_done_writing(struct obd_export
*exp
,
1633 struct md_op_data
*op_data
,
1634 struct md_open_data
*mod
)
1636 struct obd_device
*obd
= exp
->exp_obd
;
1637 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1638 struct lmv_tgt_desc
*tgt
;
1641 rc
= lmv_check_connect(obd
);
1645 tgt
= lmv_find_target(lmv
, &op_data
->op_fid1
);
1647 return PTR_ERR(tgt
);
1649 rc
= md_done_writing(tgt
->ltd_exp
, op_data
, mod
);
1654 lmv_enqueue_remote(struct obd_export
*exp
, struct ldlm_enqueue_info
*einfo
,
1655 struct lookup_intent
*it
, struct md_op_data
*op_data
,
1656 struct lustre_handle
*lockh
, void *lmm
, int lmmsize
,
1657 __u64 extra_lock_flags
)
1659 struct ptlrpc_request
*req
= it
->d
.lustre
.it_data
;
1660 struct obd_device
*obd
= exp
->exp_obd
;
1661 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1662 struct lustre_handle plock
;
1663 struct lmv_tgt_desc
*tgt
;
1664 struct md_op_data
*rdata
;
1666 struct mdt_body
*body
;
1670 body
= req_capsule_server_get(&req
->rq_pill
, &RMF_MDT_BODY
);
1672 if (!(body
->valid
& OBD_MD_MDS
))
1675 CDEBUG(D_INODE
, "REMOTE_ENQUEUE '%s' on "DFID
" -> "DFID
"\n",
1676 LL_IT2STR(it
), PFID(&op_data
->op_fid1
), PFID(&body
->fid1
));
1679 * We got LOOKUP lock, but we really need attrs.
1681 pmode
= it
->d
.lustre
.it_lock_mode
;
1682 LASSERT(pmode
!= 0);
1683 memcpy(&plock
, lockh
, sizeof(plock
));
1684 it
->d
.lustre
.it_lock_mode
= 0;
1685 it
->d
.lustre
.it_data
= NULL
;
1688 ptlrpc_req_finished(req
);
1690 tgt
= lmv_find_target(lmv
, &fid1
);
1696 rdata
= kzalloc(sizeof(*rdata
), GFP_NOFS
);
1702 rdata
->op_fid1
= fid1
;
1703 rdata
->op_bias
= MDS_CROSS_REF
;
1705 rc
= md_enqueue(tgt
->ltd_exp
, einfo
, it
, rdata
, lockh
,
1706 lmm
, lmmsize
, NULL
, extra_lock_flags
);
1709 ldlm_lock_decref(&plock
, pmode
);
1714 lmv_enqueue(struct obd_export
*exp
, struct ldlm_enqueue_info
*einfo
,
1715 struct lookup_intent
*it
, struct md_op_data
*op_data
,
1716 struct lustre_handle
*lockh
, void *lmm
, int lmmsize
,
1717 struct ptlrpc_request
**req
, __u64 extra_lock_flags
)
1719 struct obd_device
*obd
= exp
->exp_obd
;
1720 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1721 struct lmv_tgt_desc
*tgt
;
1724 rc
= lmv_check_connect(obd
);
1728 CDEBUG(D_INODE
, "ENQUEUE '%s' on "DFID
"\n",
1729 LL_IT2STR(it
), PFID(&op_data
->op_fid1
));
1731 tgt
= lmv_locate_mds(lmv
, op_data
, &op_data
->op_fid1
);
1733 return PTR_ERR(tgt
);
1735 CDEBUG(D_INODE
, "ENQUEUE '%s' on "DFID
" -> mds #%d\n",
1736 LL_IT2STR(it
), PFID(&op_data
->op_fid1
), tgt
->ltd_idx
);
1738 rc
= md_enqueue(tgt
->ltd_exp
, einfo
, it
, op_data
, lockh
,
1739 lmm
, lmmsize
, req
, extra_lock_flags
);
1741 if (rc
== 0 && it
&& it
->it_op
== IT_OPEN
) {
1742 rc
= lmv_enqueue_remote(exp
, einfo
, it
, op_data
, lockh
,
1743 lmm
, lmmsize
, extra_lock_flags
);
1749 lmv_getattr_name(struct obd_export
*exp
, struct md_op_data
*op_data
,
1750 struct ptlrpc_request
**request
)
1752 struct ptlrpc_request
*req
= NULL
;
1753 struct obd_device
*obd
= exp
->exp_obd
;
1754 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1755 struct lmv_tgt_desc
*tgt
;
1756 struct mdt_body
*body
;
1759 rc
= lmv_check_connect(obd
);
1763 tgt
= lmv_locate_mds(lmv
, op_data
, &op_data
->op_fid1
);
1765 return PTR_ERR(tgt
);
1767 CDEBUG(D_INODE
, "GETATTR_NAME for %*s on "DFID
" -> mds #%d\n",
1768 op_data
->op_namelen
, op_data
->op_name
, PFID(&op_data
->op_fid1
),
1771 rc
= md_getattr_name(tgt
->ltd_exp
, op_data
, request
);
1775 body
= req_capsule_server_get(&(*request
)->rq_pill
,
1778 if (body
->valid
& OBD_MD_MDS
) {
1779 struct lu_fid rid
= body
->fid1
;
1781 CDEBUG(D_INODE
, "Request attrs for "DFID
"\n",
1784 tgt
= lmv_find_target(lmv
, &rid
);
1786 ptlrpc_req_finished(*request
);
1787 return PTR_ERR(tgt
);
1790 op_data
->op_fid1
= rid
;
1791 op_data
->op_valid
|= OBD_MD_FLCROSSREF
;
1792 op_data
->op_namelen
= 0;
1793 op_data
->op_name
= NULL
;
1794 rc
= md_getattr_name(tgt
->ltd_exp
, op_data
, &req
);
1795 ptlrpc_req_finished(*request
);
1802 #define md_op_data_fid(op_data, fl) \
1803 (fl == MF_MDC_CANCEL_FID1 ? &op_data->op_fid1 : \
1804 fl == MF_MDC_CANCEL_FID2 ? &op_data->op_fid2 : \
1805 fl == MF_MDC_CANCEL_FID3 ? &op_data->op_fid3 : \
1806 fl == MF_MDC_CANCEL_FID4 ? &op_data->op_fid4 : \
1809 static int lmv_early_cancel(struct obd_export
*exp
, struct md_op_data
*op_data
,
1810 int op_tgt
, enum ldlm_mode mode
, int bits
,
1813 struct lu_fid
*fid
= md_op_data_fid(op_data
, flag
);
1814 struct obd_device
*obd
= exp
->exp_obd
;
1815 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1816 struct lmv_tgt_desc
*tgt
;
1817 ldlm_policy_data_t policy
= { {0} };
1820 if (!fid_is_sane(fid
))
1823 tgt
= lmv_find_target(lmv
, fid
);
1825 return PTR_ERR(tgt
);
1827 if (tgt
->ltd_idx
!= op_tgt
) {
1828 CDEBUG(D_INODE
, "EARLY_CANCEL on "DFID
"\n", PFID(fid
));
1829 policy
.l_inodebits
.bits
= bits
;
1830 rc
= md_cancel_unused(tgt
->ltd_exp
, fid
, &policy
,
1831 mode
, LCF_ASYNC
, NULL
);
1834 "EARLY_CANCEL skip operation target %d on "DFID
"\n",
1836 op_data
->op_flags
|= flag
;
1844 * llite passes fid of an target inode in op_data->op_fid1 and id of directory in
1847 static int lmv_link(struct obd_export
*exp
, struct md_op_data
*op_data
,
1848 struct ptlrpc_request
**request
)
1850 struct obd_device
*obd
= exp
->exp_obd
;
1851 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1852 struct lmv_tgt_desc
*tgt
;
1855 rc
= lmv_check_connect(obd
);
1859 LASSERT(op_data
->op_namelen
!= 0);
1861 CDEBUG(D_INODE
, "LINK "DFID
":%*s to "DFID
"\n",
1862 PFID(&op_data
->op_fid2
), op_data
->op_namelen
,
1863 op_data
->op_name
, PFID(&op_data
->op_fid1
));
1865 op_data
->op_fsuid
= from_kuid(&init_user_ns
, current_fsuid());
1866 op_data
->op_fsgid
= from_kgid(&init_user_ns
, current_fsgid());
1867 op_data
->op_cap
= cfs_curproc_cap_pack();
1868 tgt
= lmv_locate_mds(lmv
, op_data
, &op_data
->op_fid2
);
1870 return PTR_ERR(tgt
);
1873 * Cancel UPDATE lock on child (fid1).
1875 op_data
->op_flags
|= MF_MDC_CANCEL_FID2
;
1876 rc
= lmv_early_cancel(exp
, op_data
, tgt
->ltd_idx
, LCK_EX
,
1877 MDS_INODELOCK_UPDATE
, MF_MDC_CANCEL_FID1
);
1881 rc
= md_link(tgt
->ltd_exp
, op_data
, request
);
1886 static int lmv_rename(struct obd_export
*exp
, struct md_op_data
*op_data
,
1887 const char *old
, int oldlen
, const char *new, int newlen
,
1888 struct ptlrpc_request
**request
)
1890 struct obd_device
*obd
= exp
->exp_obd
;
1891 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1892 struct lmv_tgt_desc
*src_tgt
;
1893 struct lmv_tgt_desc
*tgt_tgt
;
1896 LASSERT(oldlen
!= 0);
1898 CDEBUG(D_INODE
, "RENAME %*s in "DFID
" to %*s in "DFID
"\n",
1899 oldlen
, old
, PFID(&op_data
->op_fid1
),
1900 newlen
, new, PFID(&op_data
->op_fid2
));
1902 rc
= lmv_check_connect(obd
);
1906 op_data
->op_fsuid
= from_kuid(&init_user_ns
, current_fsuid());
1907 op_data
->op_fsgid
= from_kgid(&init_user_ns
, current_fsgid());
1908 op_data
->op_cap
= cfs_curproc_cap_pack();
1909 src_tgt
= lmv_locate_mds(lmv
, op_data
, &op_data
->op_fid1
);
1910 if (IS_ERR(src_tgt
))
1911 return PTR_ERR(src_tgt
);
1913 tgt_tgt
= lmv_locate_mds(lmv
, op_data
, &op_data
->op_fid2
);
1914 if (IS_ERR(tgt_tgt
))
1915 return PTR_ERR(tgt_tgt
);
1917 * LOOKUP lock on src child (fid3) should also be cancelled for
1918 * src_tgt in mdc_rename.
1920 op_data
->op_flags
|= MF_MDC_CANCEL_FID1
| MF_MDC_CANCEL_FID3
;
1923 * Cancel UPDATE locks on tgt parent (fid2), tgt_tgt is its
1926 rc
= lmv_early_cancel(exp
, op_data
, src_tgt
->ltd_idx
,
1927 LCK_EX
, MDS_INODELOCK_UPDATE
,
1928 MF_MDC_CANCEL_FID2
);
1931 * Cancel LOOKUP locks on tgt child (fid4) for parent tgt_tgt.
1934 rc
= lmv_early_cancel(exp
, op_data
, src_tgt
->ltd_idx
,
1935 LCK_EX
, MDS_INODELOCK_LOOKUP
,
1936 MF_MDC_CANCEL_FID4
);
1940 * Cancel all the locks on tgt child (fid4).
1943 rc
= lmv_early_cancel(exp
, op_data
, src_tgt
->ltd_idx
,
1944 LCK_EX
, MDS_INODELOCK_FULL
,
1945 MF_MDC_CANCEL_FID4
);
1948 rc
= md_rename(src_tgt
->ltd_exp
, op_data
, old
, oldlen
,
1949 new, newlen
, request
);
1953 static int lmv_setattr(struct obd_export
*exp
, struct md_op_data
*op_data
,
1954 void *ea
, int ealen
, void *ea2
, int ea2len
,
1955 struct ptlrpc_request
**request
,
1956 struct md_open_data
**mod
)
1958 struct obd_device
*obd
= exp
->exp_obd
;
1959 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1960 struct lmv_tgt_desc
*tgt
;
1963 rc
= lmv_check_connect(obd
);
1967 CDEBUG(D_INODE
, "SETATTR for "DFID
", valid 0x%x\n",
1968 PFID(&op_data
->op_fid1
), op_data
->op_attr
.ia_valid
);
1970 op_data
->op_flags
|= MF_MDC_CANCEL_FID1
;
1971 tgt
= lmv_find_target(lmv
, &op_data
->op_fid1
);
1973 return PTR_ERR(tgt
);
1975 rc
= md_setattr(tgt
->ltd_exp
, op_data
, ea
, ealen
, ea2
,
1976 ea2len
, request
, mod
);
1981 static int lmv_sync(struct obd_export
*exp
, const struct lu_fid
*fid
,
1982 struct ptlrpc_request
**request
)
1984 struct obd_device
*obd
= exp
->exp_obd
;
1985 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1986 struct lmv_tgt_desc
*tgt
;
1989 rc
= lmv_check_connect(obd
);
1993 tgt
= lmv_find_target(lmv
, fid
);
1995 return PTR_ERR(tgt
);
1997 rc
= md_sync(tgt
->ltd_exp
, fid
, request
);
2002 * Adjust a set of pages, each page containing an array of lu_dirpages,
2003 * so that each page can be used as a single logical lu_dirpage.
2005 * A lu_dirpage is laid out as follows, where s = ldp_hash_start,
2006 * e = ldp_hash_end, f = ldp_flags, p = padding, and each "ent" is a
2007 * struct lu_dirent. It has size up to LU_PAGE_SIZE. The ldp_hash_end
2008 * value is used as a cookie to request the next lu_dirpage in a
2009 * directory listing that spans multiple pages (two in this example):
2012 * .|--------v------- -----.
2013 * |s|e|f|p|ent|ent| ... |ent|
2014 * '--|-------------- -----' Each CFS_PAGE contains a single
2015 * '------. lu_dirpage.
2016 * .---------v------- -----.
2017 * |s|e|f|p|ent| 0 | ... | 0 |
2018 * '----------------- -----'
2020 * However, on hosts where the native VM page size (PAGE_CACHE_SIZE) is
2021 * larger than LU_PAGE_SIZE, a single host page may contain multiple
2022 * lu_dirpages. After reading the lu_dirpages from the MDS, the
2023 * ldp_hash_end of the first lu_dirpage refers to the one immediately
2024 * after it in the same CFS_PAGE (arrows simplified for brevity, but
2025 * in general e0==s1, e1==s2, etc.):
2027 * .-------------------- -----.
2028 * |s0|e0|f0|p|ent|ent| ... |ent|
2029 * |---v---------------- -----|
2030 * |s1|e1|f1|p|ent|ent| ... |ent|
2031 * |---v---------------- -----| Here, each CFS_PAGE contains
2032 * ... multiple lu_dirpages.
2033 * |---v---------------- -----|
2034 * |s'|e'|f'|p|ent|ent| ... |ent|
2035 * '---|---------------- -----'
2037 * .----------------------------.
2040 * This structure is transformed into a single logical lu_dirpage as follows:
2042 * - Replace e0 with e' so the request for the next lu_dirpage gets the page
2043 * labeled 'next CFS_PAGE'.
2045 * - Copy the LDF_COLLIDE flag from f' to f0 to correctly reflect whether
2046 * a hash collision with the next page exists.
2048 * - Adjust the lde_reclen of the ending entry of each lu_dirpage to span
2049 * to the first entry of the next lu_dirpage.
2051 #if PAGE_CACHE_SIZE > LU_PAGE_SIZE
2052 static void lmv_adjust_dirpages(struct page
**pages
, int ncfspgs
, int nlupgs
)
2056 for (i
= 0; i
< ncfspgs
; i
++) {
2057 struct lu_dirpage
*dp
= kmap(pages
[i
]);
2058 struct lu_dirpage
*first
= dp
;
2059 struct lu_dirent
*end_dirent
= NULL
;
2060 struct lu_dirent
*ent
;
2061 __u64 hash_end
= dp
->ldp_hash_end
;
2062 __u32 flags
= dp
->ldp_flags
;
2064 while (--nlupgs
> 0) {
2065 ent
= lu_dirent_start(dp
);
2066 for (end_dirent
= ent
; ent
;
2067 end_dirent
= ent
, ent
= lu_dirent_next(ent
))
2070 /* Advance dp to next lu_dirpage. */
2071 dp
= (struct lu_dirpage
*)((char *)dp
+ LU_PAGE_SIZE
);
2073 /* Check if we've reached the end of the CFS_PAGE. */
2074 if (!((unsigned long)dp
& ~CFS_PAGE_MASK
))
2077 /* Save the hash and flags of this lu_dirpage. */
2078 hash_end
= dp
->ldp_hash_end
;
2079 flags
= dp
->ldp_flags
;
2081 /* Check if lu_dirpage contains no entries. */
2085 /* Enlarge the end entry lde_reclen from 0 to
2086 * first entry of next lu_dirpage.
2088 LASSERT(le16_to_cpu(end_dirent
->lde_reclen
) == 0);
2089 end_dirent
->lde_reclen
=
2090 cpu_to_le16((char *)(dp
->ldp_entries
) -
2091 (char *)end_dirent
);
2094 first
->ldp_hash_end
= hash_end
;
2095 first
->ldp_flags
&= ~cpu_to_le32(LDF_COLLIDE
);
2096 first
->ldp_flags
|= flags
& cpu_to_le32(LDF_COLLIDE
);
2100 LASSERTF(nlupgs
== 0, "left = %d", nlupgs
);
2103 #define lmv_adjust_dirpages(pages, ncfspgs, nlupgs) do {} while (0)
2104 #endif /* PAGE_CACHE_SIZE > LU_PAGE_SIZE */
2106 static int lmv_readpage(struct obd_export
*exp
, struct md_op_data
*op_data
,
2107 struct page
**pages
, struct ptlrpc_request
**request
)
2109 struct obd_device
*obd
= exp
->exp_obd
;
2110 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
2111 __u64 offset
= op_data
->op_offset
;
2113 int ncfspgs
; /* pages read in PAGE_CACHE_SIZE */
2114 int nlupgs
; /* pages read in LU_PAGE_SIZE */
2115 struct lmv_tgt_desc
*tgt
;
2117 rc
= lmv_check_connect(obd
);
2121 CDEBUG(D_INODE
, "READPAGE at %#llx from "DFID
"\n",
2122 offset
, PFID(&op_data
->op_fid1
));
2124 tgt
= lmv_find_target(lmv
, &op_data
->op_fid1
);
2126 return PTR_ERR(tgt
);
2128 rc
= md_readpage(tgt
->ltd_exp
, op_data
, pages
, request
);
2132 ncfspgs
= ((*request
)->rq_bulk
->bd_nob_transferred
+ PAGE_CACHE_SIZE
- 1)
2133 >> PAGE_CACHE_SHIFT
;
2134 nlupgs
= (*request
)->rq_bulk
->bd_nob_transferred
>> LU_PAGE_SHIFT
;
2135 LASSERT(!((*request
)->rq_bulk
->bd_nob_transferred
& ~LU_PAGE_MASK
));
2136 LASSERT(ncfspgs
> 0 && ncfspgs
<= op_data
->op_npages
);
2138 CDEBUG(D_INODE
, "read %d(%d)/%d pages\n", ncfspgs
, nlupgs
,
2139 op_data
->op_npages
);
2141 lmv_adjust_dirpages(pages
, ncfspgs
, nlupgs
);
2146 static int lmv_unlink(struct obd_export
*exp
, struct md_op_data
*op_data
,
2147 struct ptlrpc_request
**request
)
2149 struct obd_device
*obd
= exp
->exp_obd
;
2150 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
2151 struct lmv_tgt_desc
*tgt
= NULL
;
2152 struct mdt_body
*body
;
2155 rc
= lmv_check_connect(obd
);
2159 /* Send unlink requests to the MDT where the child is located */
2160 if (likely(!fid_is_zero(&op_data
->op_fid2
)))
2161 tgt
= lmv_locate_mds(lmv
, op_data
, &op_data
->op_fid2
);
2163 tgt
= lmv_locate_mds(lmv
, op_data
, &op_data
->op_fid1
);
2165 return PTR_ERR(tgt
);
2167 op_data
->op_fsuid
= from_kuid(&init_user_ns
, current_fsuid());
2168 op_data
->op_fsgid
= from_kgid(&init_user_ns
, current_fsgid());
2169 op_data
->op_cap
= cfs_curproc_cap_pack();
2172 * If child's fid is given, cancel unused locks for it if it is from
2173 * another export than parent.
2175 * LOOKUP lock for child (fid3) should also be cancelled on parent
2176 * tgt_tgt in mdc_unlink().
2178 op_data
->op_flags
|= MF_MDC_CANCEL_FID1
| MF_MDC_CANCEL_FID3
;
2181 * Cancel FULL locks on child (fid3).
2183 rc
= lmv_early_cancel(exp
, op_data
, tgt
->ltd_idx
, LCK_EX
,
2184 MDS_INODELOCK_FULL
, MF_MDC_CANCEL_FID3
);
2189 CDEBUG(D_INODE
, "unlink with fid="DFID
"/"DFID
" -> mds #%d\n",
2190 PFID(&op_data
->op_fid1
), PFID(&op_data
->op_fid2
), tgt
->ltd_idx
);
2192 rc
= md_unlink(tgt
->ltd_exp
, op_data
, request
);
2193 if (rc
!= 0 && rc
!= -EREMOTE
)
2196 body
= req_capsule_server_get(&(*request
)->rq_pill
, &RMF_MDT_BODY
);
2200 /* Not cross-ref case, just get out of here. */
2201 if (likely(!(body
->valid
& OBD_MD_MDS
)))
2204 CDEBUG(D_INODE
, "%s: try unlink to another MDT for "DFID
"\n",
2205 exp
->exp_obd
->obd_name
, PFID(&body
->fid1
));
2207 /* This is a remote object, try remote MDT, Note: it may
2208 * try more than 1 time here, Considering following case
2209 * /mnt/lustre is root on MDT0, remote1 is on MDT1
2210 * 1. Initially A does not know where remote1 is, it send
2211 * unlink RPC to MDT0, MDT0 return -EREMOTE, it will
2212 * resend unlink RPC to MDT1 (retry 1st time).
2214 * 2. During the unlink RPC in flight,
2215 * client B mv /mnt/lustre/remote1 /mnt/lustre/remote2
2216 * and create new remote1, but on MDT0
2218 * 3. MDT1 get unlink RPC(from A), then do remote lock on
2219 * /mnt/lustre, then lookup get fid of remote1, and find
2220 * it is remote dir again, and replay -EREMOTE again.
2222 * 4. Then A will resend unlink RPC to MDT0. (retry 2nd times).
2224 * In theory, it might try unlimited time here, but it should
2225 * be very rare case.
2227 op_data
->op_fid2
= body
->fid1
;
2228 ptlrpc_req_finished(*request
);
2234 static int lmv_precleanup(struct obd_device
*obd
, enum obd_cleanup_stage stage
)
2236 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
2239 case OBD_CLEANUP_EARLY
:
2240 /* XXX: here should be calling obd_precleanup() down to
2244 case OBD_CLEANUP_EXPORTS
:
2245 fld_client_debugfs_fini(&lmv
->lmv_fld
);
2246 lprocfs_obd_cleanup(obd
);
2254 static int lmv_get_info(const struct lu_env
*env
, struct obd_export
*exp
,
2255 __u32 keylen
, void *key
, __u32
*vallen
, void *val
,
2256 struct lov_stripe_md
*lsm
)
2258 struct obd_device
*obd
;
2259 struct lmv_obd
*lmv
;
2262 obd
= class_exp2obd(exp
);
2264 CDEBUG(D_IOCTL
, "Invalid client cookie %#llx\n",
2265 exp
->exp_handle
.h_cookie
);
2270 if (keylen
>= strlen("remote_flag") && !strcmp(key
, "remote_flag")) {
2271 struct lmv_tgt_desc
*tgt
;
2274 rc
= lmv_check_connect(obd
);
2278 LASSERT(*vallen
== sizeof(__u32
));
2279 for (i
= 0; i
< lmv
->desc
.ld_tgt_count
; i
++) {
2282 * All tgts should be connected when this gets called.
2284 if (!tgt
|| !tgt
->ltd_exp
)
2287 if (!obd_get_info(env
, tgt
->ltd_exp
, keylen
, key
,
2292 } else if (KEY_IS(KEY_MAX_EASIZE
) ||
2293 KEY_IS(KEY_DEFAULT_EASIZE
) ||
2294 KEY_IS(KEY_CONN_DATA
)) {
2295 rc
= lmv_check_connect(obd
);
2300 * Forwarding this request to first MDS, it should know LOV
2303 rc
= obd_get_info(env
, lmv
->tgts
[0]->ltd_exp
, keylen
, key
,
2305 if (!rc
&& KEY_IS(KEY_CONN_DATA
))
2306 exp
->exp_connect_data
= *(struct obd_connect_data
*)val
;
2308 } else if (KEY_IS(KEY_TGT_COUNT
)) {
2309 *((int *)val
) = lmv
->desc
.ld_tgt_count
;
2313 CDEBUG(D_IOCTL
, "Invalid key\n");
2317 static int lmv_set_info_async(const struct lu_env
*env
, struct obd_export
*exp
,
2318 u32 keylen
, void *key
, u32 vallen
,
2319 void *val
, struct ptlrpc_request_set
*set
)
2321 struct lmv_tgt_desc
*tgt
;
2322 struct obd_device
*obd
;
2323 struct lmv_obd
*lmv
;
2326 obd
= class_exp2obd(exp
);
2328 CDEBUG(D_IOCTL
, "Invalid client cookie %#llx\n",
2329 exp
->exp_handle
.h_cookie
);
2334 if (KEY_IS(KEY_READ_ONLY
) || KEY_IS(KEY_FLUSH_CTX
)) {
2337 for (i
= 0; i
< lmv
->desc
.ld_tgt_count
; i
++) {
2340 if (!tgt
|| !tgt
->ltd_exp
)
2343 err
= obd_set_info_async(env
, tgt
->ltd_exp
,
2344 keylen
, key
, vallen
, val
, set
);
2355 static int lmv_packmd(struct obd_export
*exp
, struct lov_mds_md
**lmmp
,
2356 struct lov_stripe_md
*lsm
)
2358 struct obd_device
*obd
= class_exp2obd(exp
);
2359 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
2360 struct lmv_stripe_md
*meap
;
2361 struct lmv_stripe_md
*lsmp
;
2365 mea_size
= lmv_get_easize(lmv
);
2369 if (*lmmp
&& !lsm
) {
2376 *lmmp
= libcfs_kvzalloc(mea_size
, GFP_NOFS
);
2384 lsmp
= (struct lmv_stripe_md
*)lsm
;
2385 meap
= (struct lmv_stripe_md
*)*lmmp
;
2387 if (lsmp
->mea_magic
!= MEA_MAGIC_LAST_CHAR
&&
2388 lsmp
->mea_magic
!= MEA_MAGIC_ALL_CHARS
)
2391 meap
->mea_magic
= cpu_to_le32(lsmp
->mea_magic
);
2392 meap
->mea_count
= cpu_to_le32(lsmp
->mea_count
);
2393 meap
->mea_master
= cpu_to_le32(lsmp
->mea_master
);
2395 for (i
= 0; i
< lmv
->desc
.ld_tgt_count
; i
++) {
2396 meap
->mea_ids
[i
] = lsmp
->mea_ids
[i
];
2397 fid_cpu_to_le(&meap
->mea_ids
[i
], &lsmp
->mea_ids
[i
]);
2403 static int lmv_unpackmd(struct obd_export
*exp
, struct lov_stripe_md
**lsmp
,
2404 struct lov_mds_md
*lmm
, int lmm_size
)
2406 struct obd_device
*obd
= class_exp2obd(exp
);
2407 struct lmv_stripe_md
**tmea
= (struct lmv_stripe_md
**)lsmp
;
2408 struct lmv_stripe_md
*mea
= (struct lmv_stripe_md
*)lmm
;
2409 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
2414 mea_size
= lmv_get_easize(lmv
);
2418 if (*lsmp
&& !lmm
) {
2424 LASSERT(mea_size
== lmm_size
);
2426 *tmea
= libcfs_kvzalloc(mea_size
, GFP_NOFS
);
2433 if (mea
->mea_magic
== MEA_MAGIC_LAST_CHAR
||
2434 mea
->mea_magic
== MEA_MAGIC_ALL_CHARS
||
2435 mea
->mea_magic
== MEA_MAGIC_HASH_SEGMENT
) {
2436 magic
= le32_to_cpu(mea
->mea_magic
);
2439 * Old mea is not handled here.
2441 CERROR("Old not supportable EA is found\n");
2445 (*tmea
)->mea_magic
= magic
;
2446 (*tmea
)->mea_count
= le32_to_cpu(mea
->mea_count
);
2447 (*tmea
)->mea_master
= le32_to_cpu(mea
->mea_master
);
2449 for (i
= 0; i
< (*tmea
)->mea_count
; i
++) {
2450 (*tmea
)->mea_ids
[i
] = mea
->mea_ids
[i
];
2451 fid_le_to_cpu(&(*tmea
)->mea_ids
[i
], &(*tmea
)->mea_ids
[i
]);
2456 static int lmv_cancel_unused(struct obd_export
*exp
, const struct lu_fid
*fid
,
2457 ldlm_policy_data_t
*policy
, enum ldlm_mode mode
,
2458 enum ldlm_cancel_flags flags
, void *opaque
)
2460 struct obd_device
*obd
= exp
->exp_obd
;
2461 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
2468 for (i
= 0; i
< lmv
->desc
.ld_tgt_count
; i
++) {
2469 if (!lmv
->tgts
[i
] || !lmv
->tgts
[i
]->ltd_exp
||
2470 lmv
->tgts
[i
]->ltd_active
== 0)
2473 err
= md_cancel_unused(lmv
->tgts
[i
]->ltd_exp
, fid
,
2474 policy
, mode
, flags
, opaque
);
2481 static int lmv_set_lock_data(struct obd_export
*exp
, __u64
*lockh
, void *data
,
2484 struct lmv_obd
*lmv
= &exp
->exp_obd
->u
.lmv
;
2487 rc
= md_set_lock_data(lmv
->tgts
[0]->ltd_exp
, lockh
, data
, bits
);
2491 static enum ldlm_mode
lmv_lock_match(struct obd_export
*exp
, __u64 flags
,
2492 const struct lu_fid
*fid
,
2493 enum ldlm_type type
,
2494 ldlm_policy_data_t
*policy
,
2495 enum ldlm_mode mode
,
2496 struct lustre_handle
*lockh
)
2498 struct obd_device
*obd
= exp
->exp_obd
;
2499 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
2503 CDEBUG(D_INODE
, "Lock match for "DFID
"\n", PFID(fid
));
2506 * With CMD every object can have two locks in different namespaces:
2507 * lookup lock in space of mds storing direntry and update/open lock in
2508 * space of mds storing inode. Thus we check all targets, not only that
2509 * one fid was created in.
2511 for (i
= 0; i
< lmv
->desc
.ld_tgt_count
; i
++) {
2512 if (!lmv
->tgts
[i
] || !lmv
->tgts
[i
]->ltd_exp
||
2513 lmv
->tgts
[i
]->ltd_active
== 0)
2516 rc
= md_lock_match(lmv
->tgts
[i
]->ltd_exp
, flags
, fid
,
2517 type
, policy
, mode
, lockh
);
2525 static int lmv_get_lustre_md(struct obd_export
*exp
,
2526 struct ptlrpc_request
*req
,
2527 struct obd_export
*dt_exp
,
2528 struct obd_export
*md_exp
,
2529 struct lustre_md
*md
)
2531 struct lmv_obd
*lmv
= &exp
->exp_obd
->u
.lmv
;
2533 return md_get_lustre_md(lmv
->tgts
[0]->ltd_exp
, req
, dt_exp
, md_exp
, md
);
2536 static int lmv_free_lustre_md(struct obd_export
*exp
, struct lustre_md
*md
)
2538 struct obd_device
*obd
= exp
->exp_obd
;
2539 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
2542 obd_free_memmd(exp
, (void *)&md
->mea
);
2543 return md_free_lustre_md(lmv
->tgts
[0]->ltd_exp
, md
);
2546 static int lmv_set_open_replay_data(struct obd_export
*exp
,
2547 struct obd_client_handle
*och
,
2548 struct lookup_intent
*it
)
2550 struct obd_device
*obd
= exp
->exp_obd
;
2551 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
2552 struct lmv_tgt_desc
*tgt
;
2554 tgt
= lmv_find_target(lmv
, &och
->och_fid
);
2556 return PTR_ERR(tgt
);
2558 return md_set_open_replay_data(tgt
->ltd_exp
, och
, it
);
2561 static int lmv_clear_open_replay_data(struct obd_export
*exp
,
2562 struct obd_client_handle
*och
)
2564 struct obd_device
*obd
= exp
->exp_obd
;
2565 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
2566 struct lmv_tgt_desc
*tgt
;
2568 tgt
= lmv_find_target(lmv
, &och
->och_fid
);
2570 return PTR_ERR(tgt
);
2572 return md_clear_open_replay_data(tgt
->ltd_exp
, och
);
2575 static int lmv_get_remote_perm(struct obd_export
*exp
,
2576 const struct lu_fid
*fid
,
2577 __u32 suppgid
, struct ptlrpc_request
**request
)
2579 struct obd_device
*obd
= exp
->exp_obd
;
2580 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
2581 struct lmv_tgt_desc
*tgt
;
2584 rc
= lmv_check_connect(obd
);
2588 tgt
= lmv_find_target(lmv
, fid
);
2590 return PTR_ERR(tgt
);
2592 rc
= md_get_remote_perm(tgt
->ltd_exp
, fid
, suppgid
, request
);
2596 static int lmv_intent_getattr_async(struct obd_export
*exp
,
2597 struct md_enqueue_info
*minfo
,
2598 struct ldlm_enqueue_info
*einfo
)
2600 struct md_op_data
*op_data
= &minfo
->mi_data
;
2601 struct obd_device
*obd
= exp
->exp_obd
;
2602 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
2603 struct lmv_tgt_desc
*tgt
= NULL
;
2606 rc
= lmv_check_connect(obd
);
2610 tgt
= lmv_find_target(lmv
, &op_data
->op_fid1
);
2612 return PTR_ERR(tgt
);
2614 rc
= md_intent_getattr_async(tgt
->ltd_exp
, minfo
, einfo
);
2618 static int lmv_revalidate_lock(struct obd_export
*exp
, struct lookup_intent
*it
,
2619 struct lu_fid
*fid
, __u64
*bits
)
2621 struct obd_device
*obd
= exp
->exp_obd
;
2622 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
2623 struct lmv_tgt_desc
*tgt
;
2626 rc
= lmv_check_connect(obd
);
2630 tgt
= lmv_find_target(lmv
, fid
);
2632 return PTR_ERR(tgt
);
2634 rc
= md_revalidate_lock(tgt
->ltd_exp
, it
, fid
, bits
);
2639 * For lmv, only need to send request to master MDT, and the master MDT will
2640 * process with other slave MDTs. The only exception is Q_GETOQUOTA for which
2641 * we directly fetch data from the slave MDTs.
2643 static int lmv_quotactl(struct obd_device
*unused
, struct obd_export
*exp
,
2644 struct obd_quotactl
*oqctl
)
2646 struct obd_device
*obd
= class_exp2obd(exp
);
2647 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
2648 struct lmv_tgt_desc
*tgt
= lmv
->tgts
[0];
2650 __u64 curspace
, curinodes
;
2652 if (!lmv
->desc
.ld_tgt_count
|| !tgt
->ltd_active
) {
2653 CERROR("master lmv inactive\n");
2657 if (oqctl
->qc_cmd
!= Q_GETOQUOTA
) {
2658 rc
= obd_quotactl(tgt
->ltd_exp
, oqctl
);
2662 curspace
= curinodes
= 0;
2663 for (i
= 0; i
< lmv
->desc
.ld_tgt_count
; i
++) {
2668 if (!tgt
|| !tgt
->ltd_exp
|| tgt
->ltd_active
== 0)
2670 if (!tgt
->ltd_active
) {
2671 CDEBUG(D_HA
, "mdt %d is inactive.\n", i
);
2675 err
= obd_quotactl(tgt
->ltd_exp
, oqctl
);
2677 CERROR("getquota on mdt %d failed. %d\n", i
, err
);
2681 curspace
+= oqctl
->qc_dqblk
.dqb_curspace
;
2682 curinodes
+= oqctl
->qc_dqblk
.dqb_curinodes
;
2685 oqctl
->qc_dqblk
.dqb_curspace
= curspace
;
2686 oqctl
->qc_dqblk
.dqb_curinodes
= curinodes
;
2691 static int lmv_quotacheck(struct obd_device
*unused
, struct obd_export
*exp
,
2692 struct obd_quotactl
*oqctl
)
2694 struct obd_device
*obd
= class_exp2obd(exp
);
2695 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
2696 struct lmv_tgt_desc
*tgt
;
2699 for (i
= 0; i
< lmv
->desc
.ld_tgt_count
; i
++) {
2703 if (!tgt
|| !tgt
->ltd_exp
|| !tgt
->ltd_active
) {
2704 CERROR("lmv idx %d inactive\n", i
);
2708 err
= obd_quotacheck(tgt
->ltd_exp
, oqctl
);
2716 static struct obd_ops lmv_obd_ops
= {
2717 .owner
= THIS_MODULE
,
2719 .cleanup
= lmv_cleanup
,
2720 .precleanup
= lmv_precleanup
,
2721 .process_config
= lmv_process_config
,
2722 .connect
= lmv_connect
,
2723 .disconnect
= lmv_disconnect
,
2724 .statfs
= lmv_statfs
,
2725 .get_info
= lmv_get_info
,
2726 .set_info_async
= lmv_set_info_async
,
2727 .packmd
= lmv_packmd
,
2728 .unpackmd
= lmv_unpackmd
,
2729 .notify
= lmv_notify
,
2730 .get_uuid
= lmv_get_uuid
,
2731 .iocontrol
= lmv_iocontrol
,
2732 .quotacheck
= lmv_quotacheck
,
2733 .quotactl
= lmv_quotactl
2736 static struct md_ops lmv_md_ops
= {
2737 .getstatus
= lmv_getstatus
,
2738 .null_inode
= lmv_null_inode
,
2739 .find_cbdata
= lmv_find_cbdata
,
2741 .create
= lmv_create
,
2742 .done_writing
= lmv_done_writing
,
2743 .enqueue
= lmv_enqueue
,
2744 .getattr
= lmv_getattr
,
2745 .getxattr
= lmv_getxattr
,
2746 .getattr_name
= lmv_getattr_name
,
2747 .intent_lock
= lmv_intent_lock
,
2749 .rename
= lmv_rename
,
2750 .setattr
= lmv_setattr
,
2751 .setxattr
= lmv_setxattr
,
2753 .readpage
= lmv_readpage
,
2754 .unlink
= lmv_unlink
,
2755 .init_ea_size
= lmv_init_ea_size
,
2756 .cancel_unused
= lmv_cancel_unused
,
2757 .set_lock_data
= lmv_set_lock_data
,
2758 .lock_match
= lmv_lock_match
,
2759 .get_lustre_md
= lmv_get_lustre_md
,
2760 .free_lustre_md
= lmv_free_lustre_md
,
2761 .set_open_replay_data
= lmv_set_open_replay_data
,
2762 .clear_open_replay_data
= lmv_clear_open_replay_data
,
2763 .get_remote_perm
= lmv_get_remote_perm
,
2764 .intent_getattr_async
= lmv_intent_getattr_async
,
2765 .revalidate_lock
= lmv_revalidate_lock
2768 static int __init
lmv_init(void)
2770 struct lprocfs_static_vars lvars
;
2773 lprocfs_lmv_init_vars(&lvars
);
2775 rc
= class_register_type(&lmv_obd_ops
, &lmv_md_ops
,
2776 LUSTRE_LMV_NAME
, NULL
);
2780 static void lmv_exit(void)
2782 class_unregister_type(LUSTRE_LMV_NAME
);
2785 MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
2786 MODULE_DESCRIPTION("Lustre Logical Metadata Volume");
2787 MODULE_VERSION(LUSTRE_VERSION_STRING
);
2788 MODULE_LICENSE("GPL");
2790 module_init(lmv_init
);
2791 module_exit(lmv_exit
);