4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2011, 2015, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
34 * Author: Peter Braam <braam@clusterfs.com>
35 * Author: Phil Schwan <phil@clusterfs.com>
36 * Author: Andreas Dilger <adilger@clusterfs.com>
39 #define DEBUG_SUBSYSTEM S_LLITE
40 #include "../include/lustre_dlm.h"
41 #include "../include/lustre_lite.h"
42 #include <linux/pagemap.h>
43 #include <linux/file.h>
44 #include <linux/sched.h>
45 #include <linux/mount.h>
46 #include "llite_internal.h"
47 #include "../include/lustre/ll_fiemap.h"
48 #include "../include/lustre/lustre_ioctl.h"
50 #include "../include/cl_object.h"
53 ll_put_grouplock(struct inode
*inode
, struct file
*file
, unsigned long arg
);
55 static int ll_lease_close(struct obd_client_handle
*och
, struct inode
*inode
,
58 static enum llioc_iter
59 ll_iocontrol_call(struct inode
*inode
, struct file
*file
,
60 unsigned int cmd
, unsigned long arg
, int *rcp
);
62 static struct ll_file_data
*ll_file_data_get(void)
64 struct ll_file_data
*fd
;
66 fd
= kmem_cache_zalloc(ll_file_data_slab
, GFP_NOFS
);
69 fd
->fd_write_failed
= false;
73 static void ll_file_data_put(struct ll_file_data
*fd
)
76 kmem_cache_free(ll_file_data_slab
, fd
);
79 void ll_pack_inode2opdata(struct inode
*inode
, struct md_op_data
*op_data
,
80 struct lustre_handle
*fh
)
82 op_data
->op_fid1
= ll_i2info(inode
)->lli_fid
;
83 op_data
->op_attr
.ia_mode
= inode
->i_mode
;
84 op_data
->op_attr
.ia_atime
= inode
->i_atime
;
85 op_data
->op_attr
.ia_mtime
= inode
->i_mtime
;
86 op_data
->op_attr
.ia_ctime
= inode
->i_ctime
;
87 op_data
->op_attr
.ia_size
= i_size_read(inode
);
88 op_data
->op_attr_blocks
= inode
->i_blocks
;
89 op_data
->op_attr_flags
= ll_inode_to_ext_flags(inode
->i_flags
);
90 op_data
->op_ioepoch
= ll_i2info(inode
)->lli_ioepoch
;
92 op_data
->op_handle
= *fh
;
94 if (ll_i2info(inode
)->lli_flags
& LLIF_DATA_MODIFIED
)
95 op_data
->op_bias
|= MDS_DATA_MODIFIED
;
99 * Closes the IO epoch and packs all the attributes into @op_data for
102 static void ll_prepare_close(struct inode
*inode
, struct md_op_data
*op_data
,
103 struct obd_client_handle
*och
)
105 op_data
->op_attr
.ia_valid
= ATTR_MODE
| ATTR_ATIME
| ATTR_ATIME_SET
|
106 ATTR_MTIME
| ATTR_MTIME_SET
|
107 ATTR_CTIME
| ATTR_CTIME_SET
;
109 if (!(och
->och_flags
& FMODE_WRITE
))
112 if (!exp_connect_som(ll_i2mdexp(inode
)) || !S_ISREG(inode
->i_mode
))
113 op_data
->op_attr
.ia_valid
|= ATTR_SIZE
| ATTR_BLOCKS
;
115 ll_ioepoch_close(inode
, op_data
, &och
, 0);
118 ll_pack_inode2opdata(inode
, op_data
, &och
->och_fh
);
119 ll_prep_md_op_data(op_data
, inode
, NULL
, NULL
,
120 0, 0, LUSTRE_OPC_ANY
, NULL
);
123 static int ll_close_inode_openhandle(struct obd_export
*md_exp
,
125 struct obd_client_handle
*och
,
126 const __u64
*data_version
)
128 struct obd_export
*exp
= ll_i2mdexp(inode
);
129 struct md_op_data
*op_data
;
130 struct ptlrpc_request
*req
= NULL
;
131 struct obd_device
*obd
= class_exp2obd(exp
);
137 * XXX: in case of LMV, is this correct to access
140 CERROR("Invalid MDC connection handle %#llx\n",
141 ll_i2mdexp(inode
)->exp_handle
.h_cookie
);
146 op_data
= kzalloc(sizeof(*op_data
), GFP_NOFS
);
148 /* XXX We leak openhandle and request here. */
153 ll_prepare_close(inode
, op_data
, och
);
155 /* Pass in data_version implies release. */
156 op_data
->op_bias
|= MDS_HSM_RELEASE
;
157 op_data
->op_data_version
= *data_version
;
158 op_data
->op_lease_handle
= och
->och_lease_handle
;
159 op_data
->op_attr
.ia_valid
|= ATTR_SIZE
| ATTR_BLOCKS
;
161 epoch_close
= op_data
->op_flags
& MF_EPOCH_CLOSE
;
162 rc
= md_close(md_exp
, op_data
, och
->och_mod
, &req
);
164 /* This close must have the epoch closed. */
165 LASSERT(epoch_close
);
166 /* MDS has instructed us to obtain Size-on-MDS attribute from
167 * OSTs and send setattr to back to MDS.
169 rc
= ll_som_update(inode
, op_data
);
171 CERROR("%s: inode "DFID
" mdc Size-on-MDS update failed: rc = %d\n",
172 ll_i2mdexp(inode
)->exp_obd
->obd_name
,
173 PFID(ll_inode2fid(inode
)), rc
);
177 CERROR("%s: inode "DFID
" mdc close failed: rc = %d\n",
178 ll_i2mdexp(inode
)->exp_obd
->obd_name
,
179 PFID(ll_inode2fid(inode
)), rc
);
182 /* DATA_MODIFIED flag was successfully sent on close, cancel data
185 if (rc
== 0 && (op_data
->op_bias
& MDS_DATA_MODIFIED
)) {
186 struct ll_inode_info
*lli
= ll_i2info(inode
);
188 spin_lock(&lli
->lli_lock
);
189 lli
->lli_flags
&= ~LLIF_DATA_MODIFIED
;
190 spin_unlock(&lli
->lli_lock
);
194 rc
= ll_objects_destroy(req
, inode
);
196 CERROR("inode %lu ll_objects destroy: rc = %d\n",
199 if (rc
== 0 && op_data
->op_bias
& MDS_HSM_RELEASE
) {
200 struct mdt_body
*body
;
202 body
= req_capsule_server_get(&req
->rq_pill
, &RMF_MDT_BODY
);
203 if (!(body
->mbo_valid
& OBD_MD_FLRELEASED
))
207 ll_finish_md_op_data(op_data
);
210 if (exp_connect_som(exp
) && !epoch_close
&&
211 S_ISREG(inode
->i_mode
) && (och
->och_flags
& FMODE_WRITE
)) {
212 ll_queue_done_writing(inode
, LLIF_DONE_WRITING
);
214 md_clear_open_replay_data(md_exp
, och
);
215 /* Free @och if it is not waiting for DONE_WRITING. */
216 och
->och_fh
.cookie
= DEAD_HANDLE_MAGIC
;
219 if (req
) /* This is close request */
220 ptlrpc_req_finished(req
);
224 int ll_md_real_close(struct inode
*inode
, fmode_t fmode
)
226 struct ll_inode_info
*lli
= ll_i2info(inode
);
227 struct obd_client_handle
**och_p
;
228 struct obd_client_handle
*och
;
232 if (fmode
& FMODE_WRITE
) {
233 och_p
= &lli
->lli_mds_write_och
;
234 och_usecount
= &lli
->lli_open_fd_write_count
;
235 } else if (fmode
& FMODE_EXEC
) {
236 och_p
= &lli
->lli_mds_exec_och
;
237 och_usecount
= &lli
->lli_open_fd_exec_count
;
239 LASSERT(fmode
& FMODE_READ
);
240 och_p
= &lli
->lli_mds_read_och
;
241 och_usecount
= &lli
->lli_open_fd_read_count
;
244 mutex_lock(&lli
->lli_och_mutex
);
245 if (*och_usecount
> 0) {
246 /* There are still users of this handle, so skip
249 mutex_unlock(&lli
->lli_och_mutex
);
255 mutex_unlock(&lli
->lli_och_mutex
);
258 /* There might be a race and this handle may already
261 rc
= ll_close_inode_openhandle(ll_i2sbi(inode
)->ll_md_exp
,
268 static int ll_md_close(struct obd_export
*md_exp
, struct inode
*inode
,
271 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
272 struct ll_inode_info
*lli
= ll_i2info(inode
);
274 __u64 flags
= LDLM_FL_BLOCK_GRANTED
| LDLM_FL_TEST_LOCK
;
275 struct lustre_handle lockh
;
276 ldlm_policy_data_t policy
= {.l_inodebits
= {MDS_INODELOCK_OPEN
} };
279 /* clear group lock, if present */
280 if (unlikely(fd
->fd_flags
& LL_FILE_GROUP_LOCKED
))
281 ll_put_grouplock(inode
, file
, fd
->fd_grouplock
.lg_gid
);
283 if (fd
->fd_lease_och
) {
286 /* Usually the lease is not released when the
287 * application crashed, we need to release here.
289 rc
= ll_lease_close(fd
->fd_lease_och
, inode
, &lease_broken
);
290 CDEBUG(rc
? D_ERROR
: D_INODE
,
291 "Clean up lease " DFID
" %d/%d\n",
292 PFID(&lli
->lli_fid
), rc
, lease_broken
);
294 fd
->fd_lease_och
= NULL
;
298 rc
= ll_close_inode_openhandle(md_exp
, inode
, fd
->fd_och
, NULL
);
303 /* Let's see if we have good enough OPEN lock on the file and if
304 * we can skip talking to MDS
307 mutex_lock(&lli
->lli_och_mutex
);
308 if (fd
->fd_omode
& FMODE_WRITE
) {
310 LASSERT(lli
->lli_open_fd_write_count
);
311 lli
->lli_open_fd_write_count
--;
312 } else if (fd
->fd_omode
& FMODE_EXEC
) {
314 LASSERT(lli
->lli_open_fd_exec_count
);
315 lli
->lli_open_fd_exec_count
--;
318 LASSERT(lli
->lli_open_fd_read_count
);
319 lli
->lli_open_fd_read_count
--;
321 mutex_unlock(&lli
->lli_och_mutex
);
323 if (!md_lock_match(md_exp
, flags
, ll_inode2fid(inode
),
324 LDLM_IBITS
, &policy
, lockmode
, &lockh
))
325 rc
= ll_md_real_close(inode
, fd
->fd_omode
);
328 LUSTRE_FPRIVATE(file
) = NULL
;
329 ll_file_data_put(fd
);
334 /* While this returns an error code, fput() the caller does not, so we need
335 * to make every effort to clean up all of our state here. Also, applications
336 * rarely check close errors and even if an error is returned they will not
337 * re-try the close call.
339 int ll_file_release(struct inode
*inode
, struct file
*file
)
341 struct ll_file_data
*fd
;
342 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
343 struct ll_inode_info
*lli
= ll_i2info(inode
);
346 CDEBUG(D_VFSTRACE
, "VFS Op:inode="DFID
"(%p)\n",
347 PFID(ll_inode2fid(inode
)), inode
);
349 if (!is_root_inode(inode
))
350 ll_stats_ops_tally(sbi
, LPROC_LL_RELEASE
, 1);
351 fd
= LUSTRE_FPRIVATE(file
);
354 /* The last ref on @file, maybe not be the owner pid of statahead.
355 * Different processes can open the same dir, "ll_opendir_key" means:
356 * it is me that should stop the statahead thread.
358 if (S_ISDIR(inode
->i_mode
) && lli
->lli_opendir_key
== fd
&&
359 lli
->lli_opendir_pid
!= 0)
360 ll_stop_statahead(inode
, lli
->lli_opendir_key
);
362 if (is_root_inode(inode
)) {
363 LUSTRE_FPRIVATE(file
) = NULL
;
364 ll_file_data_put(fd
);
368 if (!S_ISDIR(inode
->i_mode
)) {
370 lov_read_and_clear_async_rc(lli
->lli_clob
);
371 lli
->lli_async_rc
= 0;
374 rc
= ll_md_close(sbi
->ll_md_exp
, inode
, file
);
376 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG
, cfs_fail_val
))
377 libcfs_debug_dumplog();
382 static int ll_intent_file_open(struct dentry
*de
, void *lmm
, int lmmsize
,
383 struct lookup_intent
*itp
)
385 struct inode
*inode
= d_inode(de
);
386 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
387 struct dentry
*parent
= de
->d_parent
;
388 const char *name
= NULL
;
389 struct md_op_data
*op_data
;
390 struct ptlrpc_request
*req
= NULL
;
394 LASSERT(itp
->it_flags
& MDS_OPEN_BY_FID
);
397 * if server supports open-by-fid, or file name is invalid, don't pack
398 * name in open request
400 if (!(exp_connect_flags(sbi
->ll_md_exp
) & OBD_CONNECT_OPEN_BY_FID
) &&
401 lu_name_is_valid_2(de
->d_name
.name
, de
->d_name
.len
)) {
402 name
= de
->d_name
.name
;
403 len
= de
->d_name
.len
;
406 op_data
= ll_prep_md_op_data(NULL
, d_inode(parent
), inode
, name
, len
,
407 O_RDWR
, LUSTRE_OPC_ANY
, NULL
);
409 return PTR_ERR(op_data
);
410 op_data
->op_data
= lmm
;
411 op_data
->op_data_size
= lmmsize
;
413 rc
= md_intent_lock(sbi
->ll_md_exp
, op_data
, itp
, &req
,
414 &ll_md_blocking_ast
, 0);
415 ll_finish_md_op_data(op_data
);
417 /* reason for keep own exit path - don`t flood log
418 * with messages with -ESTALE errors.
420 if (!it_disposition(itp
, DISP_OPEN_OPEN
) ||
421 it_open_error(DISP_OPEN_OPEN
, itp
))
423 ll_release_openhandle(inode
, itp
);
427 if (it_disposition(itp
, DISP_LOOKUP_NEG
)) {
432 if (rc
!= 0 || it_open_error(DISP_OPEN_OPEN
, itp
)) {
433 rc
= rc
? rc
: it_open_error(DISP_OPEN_OPEN
, itp
);
434 CDEBUG(D_VFSTRACE
, "lock enqueue: err: %d\n", rc
);
438 rc
= ll_prep_inode(&inode
, req
, NULL
, itp
);
439 if (!rc
&& itp
->it_lock_mode
)
440 ll_set_lock_data(sbi
->ll_md_exp
, inode
, itp
, NULL
);
443 ptlrpc_req_finished(req
);
444 ll_intent_drop_lock(itp
);
450 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
451 * not believe attributes if a few ioepoch holders exist. Attributes for
452 * previous ioepoch if new one is opened are also skipped by MDS.
454 void ll_ioepoch_open(struct ll_inode_info
*lli
, __u64 ioepoch
)
456 if (ioepoch
&& lli
->lli_ioepoch
!= ioepoch
) {
457 lli
->lli_ioepoch
= ioepoch
;
458 CDEBUG(D_INODE
, "Epoch %llu opened on "DFID
"\n",
459 ioepoch
, PFID(&lli
->lli_fid
));
463 static int ll_och_fill(struct obd_export
*md_exp
, struct lookup_intent
*it
,
464 struct obd_client_handle
*och
)
466 struct mdt_body
*body
;
468 body
= req_capsule_server_get(&it
->it_request
->rq_pill
, &RMF_MDT_BODY
);
469 och
->och_fh
= body
->mbo_handle
;
470 och
->och_fid
= body
->mbo_fid1
;
471 och
->och_lease_handle
.cookie
= it
->it_lock_handle
;
472 och
->och_magic
= OBD_CLIENT_HANDLE_MAGIC
;
473 och
->och_flags
= it
->it_flags
;
475 return md_set_open_replay_data(md_exp
, och
, it
);
478 static int ll_local_open(struct file
*file
, struct lookup_intent
*it
,
479 struct ll_file_data
*fd
, struct obd_client_handle
*och
)
481 struct inode
*inode
= file_inode(file
);
482 struct ll_inode_info
*lli
= ll_i2info(inode
);
484 LASSERT(!LUSTRE_FPRIVATE(file
));
489 struct mdt_body
*body
;
492 rc
= ll_och_fill(ll_i2sbi(inode
)->ll_md_exp
, it
, och
);
496 body
= req_capsule_server_get(&it
->it_request
->rq_pill
,
498 ll_ioepoch_open(lli
, body
->mbo_ioepoch
);
501 LUSTRE_FPRIVATE(file
) = fd
;
502 ll_readahead_init(inode
, &fd
->fd_ras
);
503 fd
->fd_omode
= it
->it_flags
& (FMODE_READ
| FMODE_WRITE
| FMODE_EXEC
);
505 /* ll_cl_context initialize */
506 rwlock_init(&fd
->fd_lock
);
507 INIT_LIST_HEAD(&fd
->fd_lccs
);
512 /* Open a file, and (for the very first open) create objects on the OSTs at
513 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
514 * creation or open until ll_lov_setstripe() ioctl is called.
516 * If we already have the stripe MD locally then we don't request it in
517 * md_open(), by passing a lmm_size = 0.
519 * It is up to the application to ensure no other processes open this file
520 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
521 * used. We might be able to avoid races of that sort by getting lli_open_sem
522 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
523 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
525 int ll_file_open(struct inode
*inode
, struct file
*file
)
527 struct ll_inode_info
*lli
= ll_i2info(inode
);
528 struct lookup_intent
*it
, oit
= { .it_op
= IT_OPEN
,
529 .it_flags
= file
->f_flags
};
530 struct obd_client_handle
**och_p
= NULL
;
531 __u64
*och_usecount
= NULL
;
532 struct ll_file_data
*fd
;
533 int rc
= 0, opendir_set
= 0;
535 CDEBUG(D_VFSTRACE
, "VFS Op:inode="DFID
"(%p), flags %o\n",
536 PFID(ll_inode2fid(inode
)), inode
, file
->f_flags
);
538 it
= file
->private_data
; /* XXX: compat macro */
539 file
->private_data
= NULL
; /* prevent ll_local_open assertion */
541 fd
= ll_file_data_get();
548 if (S_ISDIR(inode
->i_mode
)) {
549 spin_lock(&lli
->lli_sa_lock
);
550 if (!lli
->lli_opendir_key
&& !lli
->lli_sai
&&
551 lli
->lli_opendir_pid
== 0) {
552 lli
->lli_opendir_key
= fd
;
553 lli
->lli_opendir_pid
= current_pid();
556 spin_unlock(&lli
->lli_sa_lock
);
559 if (is_root_inode(inode
)) {
560 LUSTRE_FPRIVATE(file
) = fd
;
564 if (!it
|| !it
->it_disposition
) {
565 /* Convert f_flags into access mode. We cannot use file->f_mode,
566 * because everything but O_ACCMODE mask was stripped from
569 if ((oit
.it_flags
+ 1) & O_ACCMODE
)
571 if (file
->f_flags
& O_TRUNC
)
572 oit
.it_flags
|= FMODE_WRITE
;
574 /* kernel only call f_op->open in dentry_open. filp_open calls
575 * dentry_open after call to open_namei that checks permissions.
576 * Only nfsd_open call dentry_open directly without checking
577 * permissions and because of that this code below is safe.
579 if (oit
.it_flags
& (FMODE_WRITE
| FMODE_READ
))
580 oit
.it_flags
|= MDS_OPEN_OWNEROVERRIDE
;
582 /* We do not want O_EXCL here, presumably we opened the file
583 * already? XXX - NFS implications?
585 oit
.it_flags
&= ~O_EXCL
;
587 /* bug20584, if "it_flags" contains O_CREAT, the file will be
588 * created if necessary, then "IT_CREAT" should be set to keep
591 if (oit
.it_flags
& O_CREAT
)
592 oit
.it_op
|= IT_CREAT
;
598 /* Let's see if we have file open on MDS already. */
599 if (it
->it_flags
& FMODE_WRITE
) {
600 och_p
= &lli
->lli_mds_write_och
;
601 och_usecount
= &lli
->lli_open_fd_write_count
;
602 } else if (it
->it_flags
& FMODE_EXEC
) {
603 och_p
= &lli
->lli_mds_exec_och
;
604 och_usecount
= &lli
->lli_open_fd_exec_count
;
606 och_p
= &lli
->lli_mds_read_och
;
607 och_usecount
= &lli
->lli_open_fd_read_count
;
610 mutex_lock(&lli
->lli_och_mutex
);
611 if (*och_p
) { /* Open handle is present */
612 if (it_disposition(it
, DISP_OPEN_OPEN
)) {
613 /* Well, there's extra open request that we do not need,
614 * let's close it somehow. This will decref request.
616 rc
= it_open_error(DISP_OPEN_OPEN
, it
);
618 mutex_unlock(&lli
->lli_och_mutex
);
622 ll_release_openhandle(inode
, it
);
626 rc
= ll_local_open(file
, it
, fd
, NULL
);
629 mutex_unlock(&lli
->lli_och_mutex
);
633 LASSERT(*och_usecount
== 0);
634 if (!it
->it_disposition
) {
635 /* We cannot just request lock handle now, new ELC code
636 * means that one of other OPEN locks for this file
637 * could be cancelled, and since blocking ast handler
638 * would attempt to grab och_mutex as well, that would
639 * result in a deadlock
641 mutex_unlock(&lli
->lli_och_mutex
);
643 * Normally called under two situations:
645 * 2. revalidate with IT_OPEN (revalidate doesn't
646 * execute this intent any more).
648 * Always fetch MDS_OPEN_LOCK if this is not setstripe.
650 * Always specify MDS_OPEN_BY_FID because we don't want
651 * to get file with different fid.
653 it
->it_flags
|= MDS_OPEN_LOCK
| MDS_OPEN_BY_FID
;
654 rc
= ll_intent_file_open(file
->f_path
.dentry
, NULL
, 0, it
);
660 *och_p
= kzalloc(sizeof(struct obd_client_handle
), GFP_NOFS
);
668 /* md_intent_lock() didn't get a request ref if there was an
669 * open error, so don't do cleanup on the request here
672 /* XXX (green): Should not we bail out on any error here, not
675 rc
= it_open_error(DISP_OPEN_OPEN
, it
);
679 LASSERTF(it_disposition(it
, DISP_ENQ_OPEN_REF
),
680 "inode %p: disposition %x, status %d\n", inode
,
681 it_disposition(it
, ~0), it
->it_status
);
683 rc
= ll_local_open(file
, it
, fd
, *och_p
);
687 mutex_unlock(&lli
->lli_och_mutex
);
690 /* Must do this outside lli_och_mutex lock to prevent deadlock where
691 * different kind of OPEN lock for this same inode gets cancelled
694 if (!S_ISREG(inode
->i_mode
))
697 if (!lli
->lli_has_smd
&&
698 (cl_is_lov_delay_create(file
->f_flags
) ||
699 (file
->f_mode
& FMODE_WRITE
) == 0)) {
700 CDEBUG(D_INODE
, "object creation was delayed\n");
703 cl_lov_delay_create_clear(&file
->f_flags
);
708 if (och_p
&& *och_p
) {
713 mutex_unlock(&lli
->lli_och_mutex
);
716 if (opendir_set
!= 0)
717 ll_stop_statahead(inode
, lli
->lli_opendir_key
);
718 ll_file_data_put(fd
);
720 ll_stats_ops_tally(ll_i2sbi(inode
), LPROC_LL_OPEN
, 1);
723 if (it
&& it_disposition(it
, DISP_ENQ_OPEN_REF
)) {
724 ptlrpc_req_finished(it
->it_request
);
725 it_clear_disposition(it
, DISP_ENQ_OPEN_REF
);
731 static int ll_md_blocking_lease_ast(struct ldlm_lock
*lock
,
732 struct ldlm_lock_desc
*desc
,
733 void *data
, int flag
)
736 struct lustre_handle lockh
;
739 case LDLM_CB_BLOCKING
:
740 ldlm_lock2handle(lock
, &lockh
);
741 rc
= ldlm_cli_cancel(&lockh
, LCF_ASYNC
);
743 CDEBUG(D_INODE
, "ldlm_cli_cancel: %d\n", rc
);
747 case LDLM_CB_CANCELING
:
755 * Acquire a lease and open the file.
757 static struct obd_client_handle
*
758 ll_lease_open(struct inode
*inode
, struct file
*file
, fmode_t fmode
,
761 struct lookup_intent it
= { .it_op
= IT_OPEN
};
762 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
763 struct md_op_data
*op_data
;
764 struct ptlrpc_request
*req
= NULL
;
765 struct lustre_handle old_handle
= { 0 };
766 struct obd_client_handle
*och
= NULL
;
770 if (fmode
!= FMODE_WRITE
&& fmode
!= FMODE_READ
)
771 return ERR_PTR(-EINVAL
);
774 struct ll_inode_info
*lli
= ll_i2info(inode
);
775 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
776 struct obd_client_handle
**och_p
;
779 if (!(fmode
& file
->f_mode
) || (file
->f_mode
& FMODE_EXEC
))
780 return ERR_PTR(-EPERM
);
782 /* Get the openhandle of the file */
784 mutex_lock(&lli
->lli_och_mutex
);
785 if (fd
->fd_lease_och
) {
786 mutex_unlock(&lli
->lli_och_mutex
);
791 if (file
->f_mode
& FMODE_WRITE
) {
792 LASSERT(lli
->lli_mds_write_och
);
793 och_p
= &lli
->lli_mds_write_och
;
794 och_usecount
= &lli
->lli_open_fd_write_count
;
796 LASSERT(lli
->lli_mds_read_och
);
797 och_p
= &lli
->lli_mds_read_och
;
798 och_usecount
= &lli
->lli_open_fd_read_count
;
800 if (*och_usecount
== 1) {
807 mutex_unlock(&lli
->lli_och_mutex
);
808 if (rc
< 0) /* more than 1 opener */
812 old_handle
= fd
->fd_och
->och_fh
;
815 och
= kzalloc(sizeof(*och
), GFP_NOFS
);
817 return ERR_PTR(-ENOMEM
);
819 op_data
= ll_prep_md_op_data(NULL
, inode
, inode
, NULL
, 0, 0,
820 LUSTRE_OPC_ANY
, NULL
);
821 if (IS_ERR(op_data
)) {
822 rc
= PTR_ERR(op_data
);
826 /* To tell the MDT this openhandle is from the same owner */
827 op_data
->op_handle
= old_handle
;
829 it
.it_flags
= fmode
| open_flags
;
830 it
.it_flags
|= MDS_OPEN_LOCK
| MDS_OPEN_BY_FID
| MDS_OPEN_LEASE
;
831 rc
= md_intent_lock(sbi
->ll_md_exp
, op_data
, &it
, &req
,
832 &ll_md_blocking_lease_ast
,
833 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
834 * it can be cancelled which may mislead applications that the lease is
836 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
837 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
838 * doesn't deal with openhandle, so normal openhandle will be leaked.
840 LDLM_FL_NO_LRU
| LDLM_FL_EXCL
);
841 ll_finish_md_op_data(op_data
);
842 ptlrpc_req_finished(req
);
846 if (it_disposition(&it
, DISP_LOOKUP_NEG
)) {
851 rc
= it_open_error(DISP_OPEN_OPEN
, &it
);
855 LASSERT(it_disposition(&it
, DISP_ENQ_OPEN_REF
));
856 ll_och_fill(sbi
->ll_md_exp
, &it
, och
);
858 if (!it_disposition(&it
, DISP_OPEN_LEASE
)) /* old server? */ {
863 /* already get lease, handle lease lock */
864 ll_set_lock_data(sbi
->ll_md_exp
, inode
, &it
, NULL
);
865 if (it
.it_lock_mode
== 0 ||
866 it
.it_lock_bits
!= MDS_INODELOCK_OPEN
) {
867 /* open lock must return for lease */
868 CERROR(DFID
"lease granted but no open lock, %d/%llu.\n",
869 PFID(ll_inode2fid(inode
)), it
.it_lock_mode
,
875 ll_intent_release(&it
);
879 /* Cancel open lock */
880 if (it
.it_lock_mode
!= 0) {
881 ldlm_lock_decref_and_cancel(&och
->och_lease_handle
,
884 och
->och_lease_handle
.cookie
= 0ULL;
886 rc2
= ll_close_inode_openhandle(sbi
->ll_md_exp
, inode
, och
, NULL
);
888 CERROR("%s: error closing file "DFID
": %d\n",
889 ll_get_fsname(inode
->i_sb
, NULL
, 0),
890 PFID(&ll_i2info(inode
)->lli_fid
), rc2
);
891 och
= NULL
; /* och has been freed in ll_close_inode_openhandle() */
893 ll_intent_release(&it
);
900 * Release lease and close the file.
901 * It will check if the lease has ever broken.
903 static int ll_lease_close(struct obd_client_handle
*och
, struct inode
*inode
,
906 struct ldlm_lock
*lock
;
907 bool cancelled
= true;
910 lock
= ldlm_handle2lock(&och
->och_lease_handle
);
912 lock_res_and_lock(lock
);
913 cancelled
= ldlm_is_cancel(lock
);
914 unlock_res_and_lock(lock
);
918 CDEBUG(D_INODE
, "lease for " DFID
" broken? %d\n",
919 PFID(&ll_i2info(inode
)->lli_fid
), cancelled
);
922 ldlm_cli_cancel(&och
->och_lease_handle
, 0);
924 *lease_broken
= cancelled
;
926 rc
= ll_close_inode_openhandle(ll_i2sbi(inode
)->ll_md_exp
, inode
, och
,
931 /* Fills the obdo with the attributes for the lsm */
932 static int ll_lsm_getattr(struct lov_stripe_md
*lsm
, struct obd_export
*exp
,
933 struct obdo
*obdo
, __u64 ioepoch
, int dv_flags
)
935 struct ptlrpc_request_set
*set
;
936 struct obd_info oinfo
= { };
943 oinfo
.oi_oa
->o_oi
= lsm
->lsm_oi
;
944 oinfo
.oi_oa
->o_mode
= S_IFREG
;
945 oinfo
.oi_oa
->o_ioepoch
= ioepoch
;
946 oinfo
.oi_oa
->o_valid
= OBD_MD_FLID
| OBD_MD_FLTYPE
|
947 OBD_MD_FLSIZE
| OBD_MD_FLBLOCKS
|
948 OBD_MD_FLBLKSZ
| OBD_MD_FLATIME
|
949 OBD_MD_FLMTIME
| OBD_MD_FLCTIME
|
950 OBD_MD_FLGROUP
| OBD_MD_FLEPOCH
|
951 OBD_MD_FLDATAVERSION
;
952 if (dv_flags
& (LL_DV_WR_FLUSH
| LL_DV_RD_FLUSH
)) {
953 oinfo
.oi_oa
->o_valid
|= OBD_MD_FLFLAGS
;
954 oinfo
.oi_oa
->o_flags
|= OBD_FL_SRVLOCK
;
955 if (dv_flags
& LL_DV_WR_FLUSH
)
956 oinfo
.oi_oa
->o_flags
|= OBD_FL_FLUSH
;
959 set
= ptlrpc_prep_set();
961 CERROR("cannot allocate ptlrpc set: rc = %d\n", -ENOMEM
);
964 rc
= obd_getattr_async(exp
, &oinfo
, set
);
966 rc
= ptlrpc_set_wait(set
);
967 ptlrpc_set_destroy(set
);
970 oinfo
.oi_oa
->o_valid
&= (OBD_MD_FLBLOCKS
| OBD_MD_FLBLKSZ
|
971 OBD_MD_FLATIME
| OBD_MD_FLMTIME
|
972 OBD_MD_FLCTIME
| OBD_MD_FLSIZE
|
973 OBD_MD_FLDATAVERSION
| OBD_MD_FLFLAGS
);
974 if (dv_flags
& LL_DV_WR_FLUSH
&&
975 !(oinfo
.oi_oa
->o_valid
& OBD_MD_FLFLAGS
&&
976 oinfo
.oi_oa
->o_flags
& OBD_FL_FLUSH
))
983 * Performs the getattr on the inode and updates its fields.
984 * If @sync != 0, perform the getattr under the server-side lock.
986 int ll_inode_getattr(struct inode
*inode
, struct obdo
*obdo
,
987 __u64 ioepoch
, int sync
)
989 struct lov_stripe_md
*lsm
;
992 lsm
= ccc_inode_lsm_get(inode
);
993 rc
= ll_lsm_getattr(lsm
, ll_i2dtexp(inode
),
994 obdo
, ioepoch
, sync
? LL_DV_RD_FLUSH
: 0);
996 struct ost_id
*oi
= lsm
? &lsm
->lsm_oi
: &obdo
->o_oi
;
998 obdo_refresh_inode(inode
, obdo
, obdo
->o_valid
);
999 CDEBUG(D_INODE
, "objid " DOSTID
" size %llu, blocks %llu, blksize %lu\n",
1000 POSTID(oi
), i_size_read(inode
),
1001 (unsigned long long)inode
->i_blocks
,
1002 1UL << inode
->i_blkbits
);
1004 ccc_inode_lsm_put(inode
, lsm
);
1008 int ll_merge_attr(const struct lu_env
*env
, struct inode
*inode
)
1010 struct ll_inode_info
*lli
= ll_i2info(inode
);
1011 struct cl_object
*obj
= lli
->lli_clob
;
1012 struct cl_attr
*attr
= vvp_env_thread_attr(env
);
1018 ll_inode_size_lock(inode
);
1020 /* merge timestamps the most recently obtained from mds with
1021 * timestamps obtained from osts
1023 LTIME_S(inode
->i_atime
) = lli
->lli_atime
;
1024 LTIME_S(inode
->i_mtime
) = lli
->lli_mtime
;
1025 LTIME_S(inode
->i_ctime
) = lli
->lli_ctime
;
1027 mtime
= LTIME_S(inode
->i_mtime
);
1028 atime
= LTIME_S(inode
->i_atime
);
1029 ctime
= LTIME_S(inode
->i_ctime
);
1031 cl_object_attr_lock(obj
);
1032 rc
= cl_object_attr_get(env
, obj
, attr
);
1033 cl_object_attr_unlock(obj
);
1036 goto out_size_unlock
;
1038 if (atime
< attr
->cat_atime
)
1039 atime
= attr
->cat_atime
;
1041 if (ctime
< attr
->cat_ctime
)
1042 ctime
= attr
->cat_ctime
;
1044 if (mtime
< attr
->cat_mtime
)
1045 mtime
= attr
->cat_mtime
;
1047 CDEBUG(D_VFSTRACE
, DFID
" updating i_size %llu\n",
1048 PFID(&lli
->lli_fid
), attr
->cat_size
);
1050 i_size_write(inode
, attr
->cat_size
);
1052 inode
->i_blocks
= attr
->cat_blocks
;
1054 LTIME_S(inode
->i_mtime
) = mtime
;
1055 LTIME_S(inode
->i_atime
) = atime
;
1056 LTIME_S(inode
->i_ctime
) = ctime
;
1059 ll_inode_size_unlock(inode
);
1064 int ll_glimpse_ioctl(struct ll_sb_info
*sbi
, struct lov_stripe_md
*lsm
,
1067 struct obdo obdo
= { 0 };
1070 rc
= ll_lsm_getattr(lsm
, sbi
->ll_dt_exp
, &obdo
, 0, 0);
1072 st
->st_size
= obdo
.o_size
;
1073 st
->st_blocks
= obdo
.o_blocks
;
1074 st
->st_mtime
= obdo
.o_mtime
;
1075 st
->st_atime
= obdo
.o_atime
;
1076 st
->st_ctime
= obdo
.o_ctime
;
1081 static bool file_is_noatime(const struct file
*file
)
1083 const struct vfsmount
*mnt
= file
->f_path
.mnt
;
1084 const struct inode
*inode
= file_inode(file
);
1086 /* Adapted from file_accessed() and touch_atime().*/
1087 if (file
->f_flags
& O_NOATIME
)
1090 if (inode
->i_flags
& S_NOATIME
)
1093 if (IS_NOATIME(inode
))
1096 if (mnt
->mnt_flags
& (MNT_NOATIME
| MNT_READONLY
))
1099 if ((mnt
->mnt_flags
& MNT_NODIRATIME
) && S_ISDIR(inode
->i_mode
))
1102 if ((inode
->i_sb
->s_flags
& MS_NODIRATIME
) && S_ISDIR(inode
->i_mode
))
1108 void ll_io_init(struct cl_io
*io
, const struct file
*file
, int write
)
1110 struct inode
*inode
= file_inode(file
);
1112 io
->u
.ci_rw
.crw_nonblock
= file
->f_flags
& O_NONBLOCK
;
1114 io
->u
.ci_wr
.wr_append
= !!(file
->f_flags
& O_APPEND
);
1115 io
->u
.ci_wr
.wr_sync
= file
->f_flags
& O_SYNC
||
1116 file
->f_flags
& O_DIRECT
||
1119 io
->ci_obj
= ll_i2info(inode
)->lli_clob
;
1120 io
->ci_lockreq
= CILR_MAYBE
;
1121 if (ll_file_nolock(file
)) {
1122 io
->ci_lockreq
= CILR_NEVER
;
1123 io
->ci_no_srvlock
= 1;
1124 } else if (file
->f_flags
& O_APPEND
) {
1125 io
->ci_lockreq
= CILR_MANDATORY
;
1128 io
->ci_noatime
= file_is_noatime(file
);
1132 ll_file_io_generic(const struct lu_env
*env
, struct vvp_io_args
*args
,
1133 struct file
*file
, enum cl_io_type iot
,
1134 loff_t
*ppos
, size_t count
)
1136 struct ll_inode_info
*lli
= ll_i2info(file_inode(file
));
1137 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
1141 CDEBUG(D_VFSTRACE
, "file: %pD, type: %d ppos: %llu, count: %zd\n",
1142 file
, iot
, *ppos
, count
);
1145 io
= vvp_env_thread_io(env
);
1146 ll_io_init(io
, file
, iot
== CIT_WRITE
);
1148 if (cl_io_rw_init(env
, io
, iot
, *ppos
, count
) == 0) {
1149 struct vvp_io
*vio
= vvp_env_io(env
);
1150 int write_mutex_locked
= 0;
1152 vio
->vui_fd
= LUSTRE_FPRIVATE(file
);
1153 vio
->vui_io_subtype
= args
->via_io_subtype
;
1155 switch (vio
->vui_io_subtype
) {
1157 vio
->vui_iter
= args
->u
.normal
.via_iter
;
1158 vio
->vui_iocb
= args
->u
.normal
.via_iocb
;
1159 if ((iot
== CIT_WRITE
) &&
1160 !(vio
->vui_fd
->fd_flags
& LL_FILE_GROUP_LOCKED
)) {
1161 if (mutex_lock_interruptible(&lli
->
1163 result
= -ERESTARTSYS
;
1166 write_mutex_locked
= 1;
1168 down_read(&lli
->lli_trunc_sem
);
1171 vio
->u
.splice
.vui_pipe
= args
->u
.splice
.via_pipe
;
1172 vio
->u
.splice
.vui_flags
= args
->u
.splice
.via_flags
;
1175 CERROR("Unknown IO type - %u\n", vio
->vui_io_subtype
);
1178 ll_cl_add(file
, env
, io
);
1179 result
= cl_io_loop(env
, io
);
1180 ll_cl_remove(file
, env
);
1181 if (args
->via_io_subtype
== IO_NORMAL
)
1182 up_read(&lli
->lli_trunc_sem
);
1183 if (write_mutex_locked
)
1184 mutex_unlock(&lli
->lli_write_mutex
);
1186 /* cl_io_rw_init() handled IO */
1187 result
= io
->ci_result
;
1190 if (io
->ci_nob
> 0) {
1191 result
= io
->ci_nob
;
1192 *ppos
= io
->u
.ci_wr
.wr
.crw_pos
;
1196 cl_io_fini(env
, io
);
1197 /* If any bit been read/written (result != 0), we just return
1198 * short read/write instead of restart io.
1200 if ((result
== 0 || result
== -ENODATA
) && io
->ci_need_restart
) {
1201 CDEBUG(D_VFSTRACE
, "Restart %s on %pD from %lld, count:%zd\n",
1202 iot
== CIT_READ
? "read" : "write",
1203 file
, *ppos
, count
);
1204 LASSERTF(io
->ci_nob
== 0, "%zd\n", io
->ci_nob
);
1208 if (iot
== CIT_READ
) {
1210 ll_stats_ops_tally(ll_i2sbi(file_inode(file
)),
1211 LPROC_LL_READ_BYTES
, result
);
1212 } else if (iot
== CIT_WRITE
) {
1214 ll_stats_ops_tally(ll_i2sbi(file_inode(file
)),
1215 LPROC_LL_WRITE_BYTES
, result
);
1216 fd
->fd_write_failed
= false;
1217 } else if (result
!= -ERESTARTSYS
) {
1218 fd
->fd_write_failed
= true;
1221 CDEBUG(D_VFSTRACE
, "iot: %d, result: %zd\n", iot
, result
);
1226 static ssize_t
ll_file_read_iter(struct kiocb
*iocb
, struct iov_iter
*to
)
1229 struct vvp_io_args
*args
;
1233 env
= cl_env_get(&refcheck
);
1235 return PTR_ERR(env
);
1237 args
= ll_env_args(env
, IO_NORMAL
);
1238 args
->u
.normal
.via_iter
= to
;
1239 args
->u
.normal
.via_iocb
= iocb
;
1241 result
= ll_file_io_generic(env
, args
, iocb
->ki_filp
, CIT_READ
,
1242 &iocb
->ki_pos
, iov_iter_count(to
));
1243 cl_env_put(env
, &refcheck
);
1248 * Write to a file (through the page cache).
1250 static ssize_t
ll_file_write_iter(struct kiocb
*iocb
, struct iov_iter
*from
)
1253 struct vvp_io_args
*args
;
1257 env
= cl_env_get(&refcheck
);
1259 return PTR_ERR(env
);
1261 args
= ll_env_args(env
, IO_NORMAL
);
1262 args
->u
.normal
.via_iter
= from
;
1263 args
->u
.normal
.via_iocb
= iocb
;
1265 result
= ll_file_io_generic(env
, args
, iocb
->ki_filp
, CIT_WRITE
,
1266 &iocb
->ki_pos
, iov_iter_count(from
));
1267 cl_env_put(env
, &refcheck
);
1272 * Send file content (through pagecache) somewhere with helper
1274 static ssize_t
ll_file_splice_read(struct file
*in_file
, loff_t
*ppos
,
1275 struct pipe_inode_info
*pipe
, size_t count
,
1279 struct vvp_io_args
*args
;
1283 env
= cl_env_get(&refcheck
);
1285 return PTR_ERR(env
);
1287 args
= ll_env_args(env
, IO_SPLICE
);
1288 args
->u
.splice
.via_pipe
= pipe
;
1289 args
->u
.splice
.via_flags
= flags
;
1291 result
= ll_file_io_generic(env
, args
, in_file
, CIT_READ
, ppos
, count
);
1292 cl_env_put(env
, &refcheck
);
1296 static int ll_lov_recreate(struct inode
*inode
, struct ost_id
*oi
, u32 ost_idx
)
1298 struct obd_export
*exp
= ll_i2dtexp(inode
);
1299 struct obd_trans_info oti
= { 0 };
1300 struct obdo
*oa
= NULL
;
1303 struct lov_stripe_md
*lsm
= NULL
, *lsm2
;
1305 oa
= kmem_cache_zalloc(obdo_cachep
, GFP_NOFS
);
1309 lsm
= ccc_inode_lsm_get(inode
);
1310 if (!lsm_has_objects(lsm
)) {
1315 lsm_size
= sizeof(*lsm
) + (sizeof(struct lov_oinfo
) *
1316 (lsm
->lsm_stripe_count
));
1318 lsm2
= libcfs_kvzalloc(lsm_size
, GFP_NOFS
);
1325 oa
->o_nlink
= ost_idx
;
1326 oa
->o_flags
|= OBD_FL_RECREATE_OBJS
;
1327 oa
->o_valid
= OBD_MD_FLID
| OBD_MD_FLFLAGS
| OBD_MD_FLGROUP
;
1328 obdo_from_inode(oa
, inode
, OBD_MD_FLTYPE
| OBD_MD_FLATIME
|
1329 OBD_MD_FLMTIME
| OBD_MD_FLCTIME
);
1330 obdo_set_parent_fid(oa
, &ll_i2info(inode
)->lli_fid
);
1331 memcpy(lsm2
, lsm
, lsm_size
);
1332 ll_inode_size_lock(inode
);
1333 rc
= obd_create(NULL
, exp
, oa
, &lsm2
, &oti
);
1334 ll_inode_size_unlock(inode
);
1339 ccc_inode_lsm_put(inode
, lsm
);
1340 kmem_cache_free(obdo_cachep
, oa
);
1344 static int ll_lov_recreate_obj(struct inode
*inode
, unsigned long arg
)
1346 struct ll_recreate_obj ucreat
;
1349 if (!capable(CFS_CAP_SYS_ADMIN
))
1352 if (copy_from_user(&ucreat
, (struct ll_recreate_obj __user
*)arg
,
1356 ostid_set_seq_mdt0(&oi
);
1357 ostid_set_id(&oi
, ucreat
.lrc_id
);
1358 return ll_lov_recreate(inode
, &oi
, ucreat
.lrc_ost_idx
);
1361 static int ll_lov_recreate_fid(struct inode
*inode
, unsigned long arg
)
1367 if (!capable(CFS_CAP_SYS_ADMIN
))
1370 if (copy_from_user(&fid
, (struct lu_fid __user
*)arg
, sizeof(fid
)))
1373 fid_to_ostid(&fid
, &oi
);
1374 ost_idx
= (fid_seq(&fid
) >> 16) & 0xffff;
1375 return ll_lov_recreate(inode
, &oi
, ost_idx
);
1378 int ll_lov_setstripe_ea_info(struct inode
*inode
, struct dentry
*dentry
,
1379 __u64 flags
, struct lov_user_md
*lum
,
1382 struct lov_stripe_md
*lsm
= NULL
;
1383 struct lookup_intent oit
= {.it_op
= IT_OPEN
, .it_flags
= flags
};
1386 lsm
= ccc_inode_lsm_get(inode
);
1388 ccc_inode_lsm_put(inode
, lsm
);
1389 CDEBUG(D_IOCTL
, "stripe already exists for inode "DFID
"\n",
1390 PFID(ll_inode2fid(inode
)));
1395 ll_inode_size_lock(inode
);
1396 oit
.it_flags
|= MDS_OPEN_BY_FID
;
1397 rc
= ll_intent_file_open(dentry
, lum
, lum_size
, &oit
);
1404 ll_release_openhandle(inode
, &oit
);
1407 ll_inode_size_unlock(inode
);
1408 ll_intent_release(&oit
);
1409 ccc_inode_lsm_put(inode
, lsm
);
1413 ptlrpc_req_finished((struct ptlrpc_request
*)oit
.it_request
);
1417 int ll_lov_getstripe_ea_info(struct inode
*inode
, const char *filename
,
1418 struct lov_mds_md
**lmmp
, int *lmm_size
,
1419 struct ptlrpc_request
**request
)
1421 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
1422 struct mdt_body
*body
;
1423 struct lov_mds_md
*lmm
= NULL
;
1424 struct ptlrpc_request
*req
= NULL
;
1425 struct md_op_data
*op_data
;
1428 rc
= ll_get_default_mdsize(sbi
, &lmmsize
);
1432 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, filename
,
1433 strlen(filename
), lmmsize
,
1434 LUSTRE_OPC_ANY
, NULL
);
1435 if (IS_ERR(op_data
))
1436 return PTR_ERR(op_data
);
1438 op_data
->op_valid
= OBD_MD_FLEASIZE
| OBD_MD_FLDIREA
;
1439 rc
= md_getattr_name(sbi
->ll_md_exp
, op_data
, &req
);
1440 ll_finish_md_op_data(op_data
);
1442 CDEBUG(D_INFO
, "md_getattr_name failed on %s: rc %d\n",
1447 body
= req_capsule_server_get(&req
->rq_pill
, &RMF_MDT_BODY
);
1449 lmmsize
= body
->mbo_eadatasize
;
1451 if (!(body
->mbo_valid
& (OBD_MD_FLEASIZE
| OBD_MD_FLDIREA
)) ||
1457 lmm
= req_capsule_server_sized_get(&req
->rq_pill
, &RMF_MDT_MD
, lmmsize
);
1459 if ((lmm
->lmm_magic
!= cpu_to_le32(LOV_MAGIC_V1
)) &&
1460 (lmm
->lmm_magic
!= cpu_to_le32(LOV_MAGIC_V3
))) {
1466 * This is coming from the MDS, so is probably in
1467 * little endian. We convert it to host endian before
1468 * passing it to userspace.
1470 if (cpu_to_le32(LOV_MAGIC
) != LOV_MAGIC
) {
1473 stripe_count
= le16_to_cpu(lmm
->lmm_stripe_count
);
1474 if (le32_to_cpu(lmm
->lmm_pattern
) & LOV_PATTERN_F_RELEASED
)
1477 /* if function called for directory - we should
1478 * avoid swab not existent lsm objects
1480 if (lmm
->lmm_magic
== cpu_to_le32(LOV_MAGIC_V1
)) {
1481 lustre_swab_lov_user_md_v1((struct lov_user_md_v1
*)lmm
);
1482 if (S_ISREG(body
->mbo_mode
))
1483 lustre_swab_lov_user_md_objects(
1484 ((struct lov_user_md_v1
*)lmm
)->lmm_objects
,
1486 } else if (lmm
->lmm_magic
== cpu_to_le32(LOV_MAGIC_V3
)) {
1487 lustre_swab_lov_user_md_v3((struct lov_user_md_v3
*)lmm
);
1488 if (S_ISREG(body
->mbo_mode
))
1489 lustre_swab_lov_user_md_objects(
1490 ((struct lov_user_md_v3
*)lmm
)->lmm_objects
,
1497 *lmm_size
= lmmsize
;
1502 static int ll_lov_setea(struct inode
*inode
, struct file
*file
,
1505 __u64 flags
= MDS_OPEN_HAS_OBJS
| FMODE_WRITE
;
1506 struct lov_user_md
*lump
;
1507 int lum_size
= sizeof(struct lov_user_md
) +
1508 sizeof(struct lov_user_ost_data
);
1511 if (!capable(CFS_CAP_SYS_ADMIN
))
1514 lump
= libcfs_kvzalloc(lum_size
, GFP_NOFS
);
1518 if (copy_from_user(lump
, (struct lov_user_md __user
*)arg
, lum_size
)) {
1523 rc
= ll_lov_setstripe_ea_info(inode
, file
->f_path
.dentry
, flags
, lump
,
1525 cl_lov_delay_create_clear(&file
->f_flags
);
1531 static int ll_lov_setstripe(struct inode
*inode
, struct file
*file
,
1534 struct lov_user_md_v3 lumv3
;
1535 struct lov_user_md_v1
*lumv1
= (struct lov_user_md_v1
*)&lumv3
;
1536 struct lov_user_md_v1 __user
*lumv1p
= (void __user
*)arg
;
1537 struct lov_user_md_v3 __user
*lumv3p
= (void __user
*)arg
;
1539 __u64 flags
= FMODE_WRITE
;
1541 /* first try with v1 which is smaller than v3 */
1542 lum_size
= sizeof(struct lov_user_md_v1
);
1543 if (copy_from_user(lumv1
, lumv1p
, lum_size
))
1546 if (lumv1
->lmm_magic
== LOV_USER_MAGIC_V3
) {
1547 lum_size
= sizeof(struct lov_user_md_v3
);
1548 if (copy_from_user(&lumv3
, lumv3p
, lum_size
))
1552 rc
= ll_lov_setstripe_ea_info(inode
, file
->f_path
.dentry
, flags
, lumv1
,
1554 cl_lov_delay_create_clear(&file
->f_flags
);
1556 struct lov_stripe_md
*lsm
;
1559 put_user(0, &lumv1p
->lmm_stripe_count
);
1561 ll_layout_refresh(inode
, &gen
);
1562 lsm
= ccc_inode_lsm_get(inode
);
1563 rc
= obd_iocontrol(LL_IOC_LOV_GETSTRIPE
, ll_i2dtexp(inode
),
1564 0, lsm
, (void __user
*)arg
);
1565 ccc_inode_lsm_put(inode
, lsm
);
1570 static int ll_lov_getstripe(struct inode
*inode
, unsigned long arg
)
1572 struct lov_stripe_md
*lsm
;
1575 lsm
= ccc_inode_lsm_get(inode
);
1577 rc
= obd_iocontrol(LL_IOC_LOV_GETSTRIPE
, ll_i2dtexp(inode
), 0,
1578 lsm
, (void __user
*)arg
);
1579 ccc_inode_lsm_put(inode
, lsm
);
1584 ll_get_grouplock(struct inode
*inode
, struct file
*file
, unsigned long arg
)
1586 struct ll_inode_info
*lli
= ll_i2info(inode
);
1587 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
1588 struct ll_grouplock grouplock
;
1592 CWARN("group id for group lock must not be 0\n");
1596 if (ll_file_nolock(file
))
1599 spin_lock(&lli
->lli_lock
);
1600 if (fd
->fd_flags
& LL_FILE_GROUP_LOCKED
) {
1601 CWARN("group lock already existed with gid %lu\n",
1602 fd
->fd_grouplock
.lg_gid
);
1603 spin_unlock(&lli
->lli_lock
);
1606 LASSERT(!fd
->fd_grouplock
.lg_lock
);
1607 spin_unlock(&lli
->lli_lock
);
1609 rc
= cl_get_grouplock(ll_i2info(inode
)->lli_clob
,
1610 arg
, (file
->f_flags
& O_NONBLOCK
), &grouplock
);
1614 spin_lock(&lli
->lli_lock
);
1615 if (fd
->fd_flags
& LL_FILE_GROUP_LOCKED
) {
1616 spin_unlock(&lli
->lli_lock
);
1617 CERROR("another thread just won the race\n");
1618 cl_put_grouplock(&grouplock
);
1622 fd
->fd_flags
|= LL_FILE_GROUP_LOCKED
;
1623 fd
->fd_grouplock
= grouplock
;
1624 spin_unlock(&lli
->lli_lock
);
1626 CDEBUG(D_INFO
, "group lock %lu obtained\n", arg
);
1630 static int ll_put_grouplock(struct inode
*inode
, struct file
*file
,
1633 struct ll_inode_info
*lli
= ll_i2info(inode
);
1634 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
1635 struct ll_grouplock grouplock
;
1637 spin_lock(&lli
->lli_lock
);
1638 if (!(fd
->fd_flags
& LL_FILE_GROUP_LOCKED
)) {
1639 spin_unlock(&lli
->lli_lock
);
1640 CWARN("no group lock held\n");
1643 LASSERT(fd
->fd_grouplock
.lg_lock
);
1645 if (fd
->fd_grouplock
.lg_gid
!= arg
) {
1646 CWARN("group lock %lu doesn't match current id %lu\n",
1647 arg
, fd
->fd_grouplock
.lg_gid
);
1648 spin_unlock(&lli
->lli_lock
);
1652 grouplock
= fd
->fd_grouplock
;
1653 memset(&fd
->fd_grouplock
, 0, sizeof(fd
->fd_grouplock
));
1654 fd
->fd_flags
&= ~LL_FILE_GROUP_LOCKED
;
1655 spin_unlock(&lli
->lli_lock
);
1657 cl_put_grouplock(&grouplock
);
1658 CDEBUG(D_INFO
, "group lock %lu released\n", arg
);
1663 * Close inode open handle
1665 * \param inode [in] inode in question
1666 * \param it [in,out] intent which contains open info and result
1669 * \retval <0 failure
1671 int ll_release_openhandle(struct inode
*inode
, struct lookup_intent
*it
)
1673 struct obd_client_handle
*och
;
1678 /* Root ? Do nothing. */
1679 if (is_root_inode(inode
))
1682 /* No open handle to close? Move away */
1683 if (!it_disposition(it
, DISP_OPEN_OPEN
))
1686 LASSERT(it_open_error(DISP_OPEN_OPEN
, it
) == 0);
1688 och
= kzalloc(sizeof(*och
), GFP_NOFS
);
1694 ll_och_fill(ll_i2sbi(inode
)->ll_md_exp
, it
, och
);
1696 rc
= ll_close_inode_openhandle(ll_i2sbi(inode
)->ll_md_exp
,
1699 /* this one is in place of ll_file_open */
1700 if (it_disposition(it
, DISP_ENQ_OPEN_REF
)) {
1701 ptlrpc_req_finished(it
->it_request
);
1702 it_clear_disposition(it
, DISP_ENQ_OPEN_REF
);
1708 * Get size for inode for which FIEMAP mapping is requested.
1709 * Make the FIEMAP get_info call and returns the result.
1711 static int ll_do_fiemap(struct inode
*inode
, struct ll_user_fiemap
*fiemap
,
1714 struct obd_export
*exp
= ll_i2dtexp(inode
);
1715 struct lov_stripe_md
*lsm
= NULL
;
1716 struct ll_fiemap_info_key fm_key
= { .name
= KEY_FIEMAP
, };
1717 __u32 vallen
= num_bytes
;
1720 /* Checks for fiemap flags */
1721 if (fiemap
->fm_flags
& ~LUSTRE_FIEMAP_FLAGS_COMPAT
) {
1722 fiemap
->fm_flags
&= ~LUSTRE_FIEMAP_FLAGS_COMPAT
;
1726 /* Check for FIEMAP_FLAG_SYNC */
1727 if (fiemap
->fm_flags
& FIEMAP_FLAG_SYNC
) {
1728 rc
= filemap_fdatawrite(inode
->i_mapping
);
1733 lsm
= ccc_inode_lsm_get(inode
);
1737 /* If the stripe_count > 1 and the application does not understand
1738 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1740 if (lsm
->lsm_stripe_count
> 1 &&
1741 !(fiemap
->fm_flags
& FIEMAP_FLAG_DEVICE_ORDER
)) {
1746 fm_key
.oa
.o_oi
= lsm
->lsm_oi
;
1747 fm_key
.oa
.o_valid
= OBD_MD_FLID
| OBD_MD_FLGROUP
;
1749 if (i_size_read(inode
) == 0) {
1750 rc
= ll_glimpse_size(inode
);
1755 obdo_from_inode(&fm_key
.oa
, inode
, OBD_MD_FLSIZE
);
1756 obdo_set_parent_fid(&fm_key
.oa
, &ll_i2info(inode
)->lli_fid
);
1757 /* If filesize is 0, then there would be no objects for mapping */
1758 if (fm_key
.oa
.o_size
== 0) {
1759 fiemap
->fm_mapped_extents
= 0;
1764 memcpy(&fm_key
.fiemap
, fiemap
, sizeof(*fiemap
));
1766 rc
= obd_get_info(NULL
, exp
, sizeof(fm_key
), &fm_key
, &vallen
,
1769 CERROR("obd_get_info failed: rc = %d\n", rc
);
1772 ccc_inode_lsm_put(inode
, lsm
);
1776 int ll_fid2path(struct inode
*inode
, void __user
*arg
)
1778 struct obd_export
*exp
= ll_i2mdexp(inode
);
1779 const struct getinfo_fid2path __user
*gfin
= arg
;
1780 struct getinfo_fid2path
*gfout
;
1785 if (!capable(CFS_CAP_DAC_READ_SEARCH
) &&
1786 !(ll_i2sbi(inode
)->ll_flags
& LL_SBI_USER_FID2PATH
))
1789 /* Only need to get the buflen */
1790 if (get_user(pathlen
, &gfin
->gf_pathlen
))
1793 if (pathlen
> PATH_MAX
)
1796 outsize
= sizeof(*gfout
) + pathlen
;
1798 gfout
= kzalloc(outsize
, GFP_NOFS
);
1802 if (copy_from_user(gfout
, arg
, sizeof(*gfout
))) {
1807 /* Call mdc_iocontrol */
1808 rc
= obd_iocontrol(OBD_IOC_FID2PATH
, exp
, outsize
, gfout
, NULL
);
1812 if (copy_to_user(arg
, gfout
, outsize
))
1820 static int ll_ioctl_fiemap(struct inode
*inode
, unsigned long arg
)
1822 struct ll_user_fiemap
*fiemap_s
;
1823 size_t num_bytes
, ret_bytes
;
1824 unsigned int extent_count
;
1827 /* Get the extent count so we can calculate the size of
1828 * required fiemap buffer
1830 if (get_user(extent_count
,
1831 &((struct ll_user_fiemap __user
*)arg
)->fm_extent_count
))
1835 (SIZE_MAX
- sizeof(*fiemap_s
)) / sizeof(struct ll_fiemap_extent
))
1837 num_bytes
= sizeof(*fiemap_s
) + (extent_count
*
1838 sizeof(struct ll_fiemap_extent
));
1840 fiemap_s
= libcfs_kvzalloc(num_bytes
, GFP_NOFS
);
1844 /* get the fiemap value */
1845 if (copy_from_user(fiemap_s
, (struct ll_user_fiemap __user
*)arg
,
1846 sizeof(*fiemap_s
))) {
1851 /* If fm_extent_count is non-zero, read the first extent since
1852 * it is used to calculate end_offset and device from previous
1856 if (copy_from_user(&fiemap_s
->fm_extents
[0],
1857 (char __user
*)arg
+ sizeof(*fiemap_s
),
1858 sizeof(struct ll_fiemap_extent
))) {
1864 rc
= ll_do_fiemap(inode
, fiemap_s
, num_bytes
);
1868 ret_bytes
= sizeof(struct ll_user_fiemap
);
1870 if (extent_count
!= 0)
1871 ret_bytes
+= (fiemap_s
->fm_mapped_extents
*
1872 sizeof(struct ll_fiemap_extent
));
1874 if (copy_to_user((void __user
*)arg
, fiemap_s
, ret_bytes
))
1883 * Read the data_version for inode.
1885 * This value is computed using stripe object version on OST.
1886 * Version is computed using server side locking.
1888 * @param sync if do sync on the OST side;
1890 * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
1891 * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
1893 int ll_data_version(struct inode
*inode
, __u64
*data_version
, int flags
)
1895 struct lov_stripe_md
*lsm
= NULL
;
1896 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
1897 struct obdo
*obdo
= NULL
;
1900 /* If no stripe, we consider version is 0. */
1901 lsm
= ccc_inode_lsm_get(inode
);
1902 if (!lsm_has_objects(lsm
)) {
1904 CDEBUG(D_INODE
, "No object for inode\n");
1909 obdo
= kzalloc(sizeof(*obdo
), GFP_NOFS
);
1915 rc
= ll_lsm_getattr(lsm
, sbi
->ll_dt_exp
, obdo
, 0, flags
);
1917 if (!(obdo
->o_valid
& OBD_MD_FLDATAVERSION
))
1920 *data_version
= obdo
->o_data_version
;
1925 ccc_inode_lsm_put(inode
, lsm
);
1930 * Trigger a HSM release request for the provided inode.
1932 int ll_hsm_release(struct inode
*inode
)
1934 struct cl_env_nest nest
;
1936 struct obd_client_handle
*och
= NULL
;
1937 __u64 data_version
= 0;
1940 CDEBUG(D_INODE
, "%s: Releasing file "DFID
".\n",
1941 ll_get_fsname(inode
->i_sb
, NULL
, 0),
1942 PFID(&ll_i2info(inode
)->lli_fid
));
1944 och
= ll_lease_open(inode
, NULL
, FMODE_WRITE
, MDS_OPEN_RELEASE
);
1950 /* Grab latest data_version and [am]time values */
1951 rc
= ll_data_version(inode
, &data_version
, LL_DV_WR_FLUSH
);
1955 env
= cl_env_nested_get(&nest
);
1961 ll_merge_attr(env
, inode
);
1962 cl_env_nested_put(&nest
, env
);
1964 /* Release the file.
1965 * NB: lease lock handle is released in mdc_hsm_release_pack() because
1966 * we still need it to pack l_remote_handle to MDT.
1968 rc
= ll_close_inode_openhandle(ll_i2sbi(inode
)->ll_md_exp
, inode
, och
,
1973 if (och
&& !IS_ERR(och
)) /* close the file */
1974 ll_lease_close(och
, inode
, NULL
);
1979 struct ll_swap_stack
{
1980 struct iattr ia1
, ia2
;
1982 struct inode
*inode1
, *inode2
;
1983 bool check_dv1
, check_dv2
;
1986 static int ll_swap_layouts(struct file
*file1
, struct file
*file2
,
1987 struct lustre_swap_layouts
*lsl
)
1989 struct mdc_swap_layouts msl
;
1990 struct md_op_data
*op_data
;
1993 struct ll_swap_stack
*llss
= NULL
;
1996 llss
= kzalloc(sizeof(*llss
), GFP_NOFS
);
2000 llss
->inode1
= file_inode(file1
);
2001 llss
->inode2
= file_inode(file2
);
2003 if (!S_ISREG(llss
->inode2
->i_mode
)) {
2008 if (inode_permission(llss
->inode1
, MAY_WRITE
) ||
2009 inode_permission(llss
->inode2
, MAY_WRITE
)) {
2014 if (llss
->inode2
->i_sb
!= llss
->inode1
->i_sb
) {
2019 /* we use 2 bool because it is easier to swap than 2 bits */
2020 if (lsl
->sl_flags
& SWAP_LAYOUTS_CHECK_DV1
)
2021 llss
->check_dv1
= true;
2023 if (lsl
->sl_flags
& SWAP_LAYOUTS_CHECK_DV2
)
2024 llss
->check_dv2
= true;
2026 /* we cannot use lsl->sl_dvX directly because we may swap them */
2027 llss
->dv1
= lsl
->sl_dv1
;
2028 llss
->dv2
= lsl
->sl_dv2
;
2030 rc
= lu_fid_cmp(ll_inode2fid(llss
->inode1
), ll_inode2fid(llss
->inode2
));
2031 if (rc
== 0) /* same file, done! */ {
2036 if (rc
< 0) { /* sequentialize it */
2037 swap(llss
->inode1
, llss
->inode2
);
2039 swap(llss
->dv1
, llss
->dv2
);
2040 swap(llss
->check_dv1
, llss
->check_dv2
);
2044 if (gid
!= 0) { /* application asks to flush dirty cache */
2045 rc
= ll_get_grouplock(llss
->inode1
, file1
, gid
);
2049 rc
= ll_get_grouplock(llss
->inode2
, file2
, gid
);
2051 ll_put_grouplock(llss
->inode1
, file1
, gid
);
2056 /* to be able to restore mtime and atime after swap
2057 * we need to first save them
2060 (SWAP_LAYOUTS_KEEP_MTIME
| SWAP_LAYOUTS_KEEP_ATIME
)) {
2061 llss
->ia1
.ia_mtime
= llss
->inode1
->i_mtime
;
2062 llss
->ia1
.ia_atime
= llss
->inode1
->i_atime
;
2063 llss
->ia1
.ia_valid
= ATTR_MTIME
| ATTR_ATIME
;
2064 llss
->ia2
.ia_mtime
= llss
->inode2
->i_mtime
;
2065 llss
->ia2
.ia_atime
= llss
->inode2
->i_atime
;
2066 llss
->ia2
.ia_valid
= ATTR_MTIME
| ATTR_ATIME
;
2069 /* ultimate check, before swapping the layouts we check if
2070 * dataversion has changed (if requested)
2072 if (llss
->check_dv1
) {
2073 rc
= ll_data_version(llss
->inode1
, &dv
, 0);
2076 if (dv
!= llss
->dv1
) {
2082 if (llss
->check_dv2
) {
2083 rc
= ll_data_version(llss
->inode2
, &dv
, 0);
2086 if (dv
!= llss
->dv2
) {
2092 /* struct md_op_data is used to send the swap args to the mdt
2093 * only flags is missing, so we use struct mdc_swap_layouts
2094 * through the md_op_data->op_data
2096 /* flags from user space have to be converted before they are send to
2097 * server, no flag is sent today, they are only used on the client
2101 op_data
= ll_prep_md_op_data(NULL
, llss
->inode1
, llss
->inode2
, NULL
, 0,
2102 0, LUSTRE_OPC_ANY
, &msl
);
2103 if (IS_ERR(op_data
)) {
2104 rc
= PTR_ERR(op_data
);
2108 rc
= obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS
, ll_i2mdexp(llss
->inode1
),
2109 sizeof(*op_data
), op_data
, NULL
);
2110 ll_finish_md_op_data(op_data
);
2114 ll_put_grouplock(llss
->inode2
, file2
, gid
);
2115 ll_put_grouplock(llss
->inode1
, file1
, gid
);
2118 /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */
2122 /* clear useless flags */
2123 if (!(lsl
->sl_flags
& SWAP_LAYOUTS_KEEP_MTIME
)) {
2124 llss
->ia1
.ia_valid
&= ~ATTR_MTIME
;
2125 llss
->ia2
.ia_valid
&= ~ATTR_MTIME
;
2128 if (!(lsl
->sl_flags
& SWAP_LAYOUTS_KEEP_ATIME
)) {
2129 llss
->ia1
.ia_valid
&= ~ATTR_ATIME
;
2130 llss
->ia2
.ia_valid
&= ~ATTR_ATIME
;
2133 /* update time if requested */
2135 if (llss
->ia2
.ia_valid
!= 0) {
2136 inode_lock(llss
->inode1
);
2137 rc
= ll_setattr(file1
->f_path
.dentry
, &llss
->ia2
);
2138 inode_unlock(llss
->inode1
);
2141 if (llss
->ia1
.ia_valid
!= 0) {
2144 inode_lock(llss
->inode2
);
2145 rc1
= ll_setattr(file2
->f_path
.dentry
, &llss
->ia1
);
2146 inode_unlock(llss
->inode2
);
2157 static int ll_hsm_state_set(struct inode
*inode
, struct hsm_state_set
*hss
)
2159 struct md_op_data
*op_data
;
2162 /* Detect out-of range masks */
2163 if ((hss
->hss_setmask
| hss
->hss_clearmask
) & ~HSM_FLAGS_MASK
)
2166 /* Non-root users are forbidden to set or clear flags which are
2167 * NOT defined in HSM_USER_MASK.
2169 if (((hss
->hss_setmask
| hss
->hss_clearmask
) & ~HSM_USER_MASK
) &&
2170 !capable(CFS_CAP_SYS_ADMIN
))
2173 /* Detect out-of range archive id */
2174 if ((hss
->hss_valid
& HSS_ARCHIVE_ID
) &&
2175 (hss
->hss_archive_id
> LL_HSM_MAX_ARCHIVE
))
2178 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, NULL
, 0, 0,
2179 LUSTRE_OPC_ANY
, hss
);
2180 if (IS_ERR(op_data
))
2181 return PTR_ERR(op_data
);
2183 rc
= obd_iocontrol(LL_IOC_HSM_STATE_SET
, ll_i2mdexp(inode
),
2184 sizeof(*op_data
), op_data
, NULL
);
2186 ll_finish_md_op_data(op_data
);
2191 static int ll_hsm_import(struct inode
*inode
, struct file
*file
,
2192 struct hsm_user_import
*hui
)
2194 struct hsm_state_set
*hss
= NULL
;
2195 struct iattr
*attr
= NULL
;
2198 if (!S_ISREG(inode
->i_mode
))
2202 hss
= kzalloc(sizeof(*hss
), GFP_NOFS
);
2206 hss
->hss_valid
= HSS_SETMASK
| HSS_ARCHIVE_ID
;
2207 hss
->hss_archive_id
= hui
->hui_archive_id
;
2208 hss
->hss_setmask
= HS_ARCHIVED
| HS_EXISTS
| HS_RELEASED
;
2209 rc
= ll_hsm_state_set(inode
, hss
);
2213 attr
= kzalloc(sizeof(*attr
), GFP_NOFS
);
2219 attr
->ia_mode
= hui
->hui_mode
& (S_IRWXU
| S_IRWXG
| S_IRWXO
);
2220 attr
->ia_mode
|= S_IFREG
;
2221 attr
->ia_uid
= make_kuid(&init_user_ns
, hui
->hui_uid
);
2222 attr
->ia_gid
= make_kgid(&init_user_ns
, hui
->hui_gid
);
2223 attr
->ia_size
= hui
->hui_size
;
2224 attr
->ia_mtime
.tv_sec
= hui
->hui_mtime
;
2225 attr
->ia_mtime
.tv_nsec
= hui
->hui_mtime_ns
;
2226 attr
->ia_atime
.tv_sec
= hui
->hui_atime
;
2227 attr
->ia_atime
.tv_nsec
= hui
->hui_atime_ns
;
2229 attr
->ia_valid
= ATTR_SIZE
| ATTR_MODE
| ATTR_FORCE
|
2230 ATTR_UID
| ATTR_GID
|
2231 ATTR_MTIME
| ATTR_MTIME_SET
|
2232 ATTR_ATIME
| ATTR_ATIME_SET
;
2236 rc
= ll_setattr_raw(file
->f_path
.dentry
, attr
, true);
2240 inode_unlock(inode
);
2249 ll_file_ioctl(struct file
*file
, unsigned int cmd
, unsigned long arg
)
2251 struct inode
*inode
= file_inode(file
);
2252 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
2255 CDEBUG(D_VFSTRACE
, "VFS Op:inode="DFID
"(%p),cmd=%x\n",
2256 PFID(ll_inode2fid(inode
)), inode
, cmd
);
2257 ll_stats_ops_tally(ll_i2sbi(inode
), LPROC_LL_IOCTL
, 1);
2259 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2260 if (_IOC_TYPE(cmd
) == 'T' || _IOC_TYPE(cmd
) == 't') /* tty ioctls */
2264 case LL_IOC_GETFLAGS
:
2265 /* Get the current value of the file flags */
2266 return put_user(fd
->fd_flags
, (int __user
*)arg
);
2267 case LL_IOC_SETFLAGS
:
2268 case LL_IOC_CLRFLAGS
:
2269 /* Set or clear specific file flags */
2270 /* XXX This probably needs checks to ensure the flags are
2271 * not abused, and to handle any flag side effects.
2273 if (get_user(flags
, (int __user
*)arg
))
2276 if (cmd
== LL_IOC_SETFLAGS
) {
2277 if ((flags
& LL_FILE_IGNORE_LOCK
) &&
2278 !(file
->f_flags
& O_DIRECT
)) {
2279 CERROR("%s: unable to disable locking on non-O_DIRECT file\n",
2284 fd
->fd_flags
|= flags
;
2286 fd
->fd_flags
&= ~flags
;
2289 case LL_IOC_LOV_SETSTRIPE
:
2290 return ll_lov_setstripe(inode
, file
, arg
);
2291 case LL_IOC_LOV_SETEA
:
2292 return ll_lov_setea(inode
, file
, arg
);
2293 case LL_IOC_LOV_SWAP_LAYOUTS
: {
2295 struct lustre_swap_layouts lsl
;
2297 if (copy_from_user(&lsl
, (char __user
*)arg
,
2298 sizeof(struct lustre_swap_layouts
)))
2301 if ((file
->f_flags
& O_ACCMODE
) == 0) /* O_RDONLY */
2304 file2
= fget(lsl
.sl_fd
);
2309 if ((file2
->f_flags
& O_ACCMODE
) != 0) /* O_WRONLY or O_RDWR */
2310 rc
= ll_swap_layouts(file
, file2
, &lsl
);
2314 case LL_IOC_LOV_GETSTRIPE
:
2315 return ll_lov_getstripe(inode
, arg
);
2316 case LL_IOC_RECREATE_OBJ
:
2317 return ll_lov_recreate_obj(inode
, arg
);
2318 case LL_IOC_RECREATE_FID
:
2319 return ll_lov_recreate_fid(inode
, arg
);
2320 case FSFILT_IOC_FIEMAP
:
2321 return ll_ioctl_fiemap(inode
, arg
);
2322 case FSFILT_IOC_GETFLAGS
:
2323 case FSFILT_IOC_SETFLAGS
:
2324 return ll_iocontrol(inode
, file
, cmd
, arg
);
2325 case FSFILT_IOC_GETVERSION_OLD
:
2326 case FSFILT_IOC_GETVERSION
:
2327 return put_user(inode
->i_generation
, (int __user
*)arg
);
2328 case LL_IOC_GROUP_LOCK
:
2329 return ll_get_grouplock(inode
, file
, arg
);
2330 case LL_IOC_GROUP_UNLOCK
:
2331 return ll_put_grouplock(inode
, file
, arg
);
2332 case IOC_OBD_STATFS
:
2333 return ll_obd_statfs(inode
, (void __user
*)arg
);
2335 /* We need to special case any other ioctls we want to handle,
2336 * to send them to the MDS/OST as appropriate and to properly
2337 * network encode the arg field.
2338 case FSFILT_IOC_SETVERSION_OLD:
2339 case FSFILT_IOC_SETVERSION:
2341 case LL_IOC_FLUSHCTX
:
2342 return ll_flush_ctx(inode
);
2343 case LL_IOC_PATH2FID
: {
2344 if (copy_to_user((void __user
*)arg
, ll_inode2fid(inode
),
2345 sizeof(struct lu_fid
)))
2350 case OBD_IOC_FID2PATH
:
2351 return ll_fid2path(inode
, (void __user
*)arg
);
2352 case LL_IOC_DATA_VERSION
: {
2353 struct ioc_data_version idv
;
2356 if (copy_from_user(&idv
, (char __user
*)arg
, sizeof(idv
)))
2359 idv
.idv_flags
&= LL_DV_RD_FLUSH
| LL_DV_WR_FLUSH
;
2360 rc
= ll_data_version(inode
, &idv
.idv_version
, idv
.idv_flags
);
2361 if (rc
== 0 && copy_to_user((char __user
*)arg
, &idv
,
2368 case LL_IOC_GET_MDTIDX
: {
2371 mdtidx
= ll_get_mdt_idx(inode
);
2375 if (put_user(mdtidx
, (int __user
*)arg
))
2380 case OBD_IOC_GETDTNAME
:
2381 case OBD_IOC_GETMDNAME
:
2382 return ll_get_obd_name(inode
, cmd
, arg
);
2383 case LL_IOC_HSM_STATE_GET
: {
2384 struct md_op_data
*op_data
;
2385 struct hsm_user_state
*hus
;
2388 hus
= kzalloc(sizeof(*hus
), GFP_NOFS
);
2392 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, NULL
, 0, 0,
2393 LUSTRE_OPC_ANY
, hus
);
2394 if (IS_ERR(op_data
)) {
2396 return PTR_ERR(op_data
);
2399 rc
= obd_iocontrol(cmd
, ll_i2mdexp(inode
), sizeof(*op_data
),
2402 if (copy_to_user((void __user
*)arg
, hus
, sizeof(*hus
)))
2405 ll_finish_md_op_data(op_data
);
2409 case LL_IOC_HSM_STATE_SET
: {
2410 struct hsm_state_set
*hss
;
2413 hss
= memdup_user((char __user
*)arg
, sizeof(*hss
));
2415 return PTR_ERR(hss
);
2417 rc
= ll_hsm_state_set(inode
, hss
);
2422 case LL_IOC_HSM_ACTION
: {
2423 struct md_op_data
*op_data
;
2424 struct hsm_current_action
*hca
;
2427 hca
= kzalloc(sizeof(*hca
), GFP_NOFS
);
2431 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, NULL
, 0, 0,
2432 LUSTRE_OPC_ANY
, hca
);
2433 if (IS_ERR(op_data
)) {
2435 return PTR_ERR(op_data
);
2438 rc
= obd_iocontrol(cmd
, ll_i2mdexp(inode
), sizeof(*op_data
),
2441 if (copy_to_user((char __user
*)arg
, hca
, sizeof(*hca
)))
2444 ll_finish_md_op_data(op_data
);
2448 case LL_IOC_SET_LEASE
: {
2449 struct ll_inode_info
*lli
= ll_i2info(inode
);
2450 struct obd_client_handle
*och
= NULL
;
2456 if (!(file
->f_mode
& FMODE_WRITE
))
2461 if (!(file
->f_mode
& FMODE_READ
))
2466 mutex_lock(&lli
->lli_och_mutex
);
2467 if (fd
->fd_lease_och
) {
2468 och
= fd
->fd_lease_och
;
2469 fd
->fd_lease_och
= NULL
;
2471 mutex_unlock(&lli
->lli_och_mutex
);
2474 mode
= och
->och_flags
&
2475 (FMODE_READ
| FMODE_WRITE
);
2476 rc
= ll_lease_close(och
, inode
, &lease_broken
);
2477 if (rc
== 0 && lease_broken
)
2483 /* return the type of lease or error */
2484 return rc
< 0 ? rc
: (int)mode
;
2489 CDEBUG(D_INODE
, "Set lease with mode %d\n", mode
);
2491 /* apply for lease */
2492 och
= ll_lease_open(inode
, file
, mode
, 0);
2494 return PTR_ERR(och
);
2497 mutex_lock(&lli
->lli_och_mutex
);
2498 if (!fd
->fd_lease_och
) {
2499 fd
->fd_lease_och
= och
;
2502 mutex_unlock(&lli
->lli_och_mutex
);
2504 /* impossible now that only excl is supported for now */
2505 ll_lease_close(och
, inode
, &lease_broken
);
2510 case LL_IOC_GET_LEASE
: {
2511 struct ll_inode_info
*lli
= ll_i2info(inode
);
2512 struct ldlm_lock
*lock
= NULL
;
2515 mutex_lock(&lli
->lli_och_mutex
);
2516 if (fd
->fd_lease_och
) {
2517 struct obd_client_handle
*och
= fd
->fd_lease_och
;
2519 lock
= ldlm_handle2lock(&och
->och_lease_handle
);
2521 lock_res_and_lock(lock
);
2522 if (!ldlm_is_cancel(lock
))
2523 rc
= och
->och_flags
&
2524 (FMODE_READ
| FMODE_WRITE
);
2525 unlock_res_and_lock(lock
);
2526 LDLM_LOCK_PUT(lock
);
2529 mutex_unlock(&lli
->lli_och_mutex
);
2532 case LL_IOC_HSM_IMPORT
: {
2533 struct hsm_user_import
*hui
;
2535 hui
= memdup_user((void __user
*)arg
, sizeof(*hui
));
2537 return PTR_ERR(hui
);
2539 rc
= ll_hsm_import(inode
, file
, hui
);
2547 if (ll_iocontrol_call(inode
, file
, cmd
, arg
, &err
) ==
2551 return obd_iocontrol(cmd
, ll_i2dtexp(inode
), 0, NULL
,
2552 (void __user
*)arg
);
2557 static loff_t
ll_file_seek(struct file
*file
, loff_t offset
, int origin
)
2559 struct inode
*inode
= file_inode(file
);
2560 loff_t retval
, eof
= 0;
2562 retval
= offset
+ ((origin
== SEEK_END
) ? i_size_read(inode
) :
2563 (origin
== SEEK_CUR
) ? file
->f_pos
: 0);
2564 CDEBUG(D_VFSTRACE
, "VFS Op:inode="DFID
"(%p), to=%llu=%#llx(%d)\n",
2565 PFID(ll_inode2fid(inode
)), inode
, retval
, retval
, origin
);
2566 ll_stats_ops_tally(ll_i2sbi(inode
), LPROC_LL_LLSEEK
, 1);
2568 if (origin
== SEEK_END
|| origin
== SEEK_HOLE
|| origin
== SEEK_DATA
) {
2569 retval
= ll_glimpse_size(inode
);
2572 eof
= i_size_read(inode
);
2575 retval
= generic_file_llseek_size(file
, offset
, origin
,
2576 ll_file_maxbytes(inode
), eof
);
2580 static int ll_flush(struct file
*file
, fl_owner_t id
)
2582 struct inode
*inode
= file_inode(file
);
2583 struct ll_inode_info
*lli
= ll_i2info(inode
);
2584 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
2587 LASSERT(!S_ISDIR(inode
->i_mode
));
2589 /* catch async errors that were recorded back when async writeback
2590 * failed for pages in this mapping.
2592 rc
= lli
->lli_async_rc
;
2593 lli
->lli_async_rc
= 0;
2594 if (lli
->lli_clob
) {
2595 err
= lov_read_and_clear_async_rc(lli
->lli_clob
);
2600 /* The application has been told about write failure already.
2601 * Do not report failure again.
2603 if (fd
->fd_write_failed
)
2605 return rc
? -EIO
: 0;
2609 * Called to make sure a portion of file has been written out.
2610 * if @mode is not CL_FSYNC_LOCAL, it will send OST_SYNC RPCs to OST.
2612 * Return how many pages have been written.
2614 int cl_sync_file_range(struct inode
*inode
, loff_t start
, loff_t end
,
2615 enum cl_fsync_mode mode
, int ignore_layout
)
2617 struct cl_env_nest nest
;
2620 struct cl_fsync_io
*fio
;
2623 if (mode
!= CL_FSYNC_NONE
&& mode
!= CL_FSYNC_LOCAL
&&
2624 mode
!= CL_FSYNC_DISCARD
&& mode
!= CL_FSYNC_ALL
)
2627 env
= cl_env_nested_get(&nest
);
2629 return PTR_ERR(env
);
2631 io
= vvp_env_thread_io(env
);
2632 io
->ci_obj
= ll_i2info(inode
)->lli_clob
;
2633 io
->ci_ignore_layout
= ignore_layout
;
2635 /* initialize parameters for sync */
2636 fio
= &io
->u
.ci_fsync
;
2637 fio
->fi_start
= start
;
2639 fio
->fi_fid
= ll_inode2fid(inode
);
2640 fio
->fi_mode
= mode
;
2641 fio
->fi_nr_written
= 0;
2643 if (cl_io_init(env
, io
, CIT_FSYNC
, io
->ci_obj
) == 0)
2644 result
= cl_io_loop(env
, io
);
2646 result
= io
->ci_result
;
2648 result
= fio
->fi_nr_written
;
2649 cl_io_fini(env
, io
);
2650 cl_env_nested_put(&nest
, env
);
2655 int ll_fsync(struct file
*file
, loff_t start
, loff_t end
, int datasync
)
2657 struct inode
*inode
= file_inode(file
);
2658 struct ll_inode_info
*lli
= ll_i2info(inode
);
2659 struct ptlrpc_request
*req
;
2662 CDEBUG(D_VFSTRACE
, "VFS Op:inode="DFID
"(%p)\n",
2663 PFID(ll_inode2fid(inode
)), inode
);
2664 ll_stats_ops_tally(ll_i2sbi(inode
), LPROC_LL_FSYNC
, 1);
2666 rc
= filemap_write_and_wait_range(inode
->i_mapping
, start
, end
);
2669 /* catch async errors that were recorded back when async writeback
2670 * failed for pages in this mapping.
2672 if (!S_ISDIR(inode
->i_mode
)) {
2673 err
= lli
->lli_async_rc
;
2674 lli
->lli_async_rc
= 0;
2677 err
= lov_read_and_clear_async_rc(lli
->lli_clob
);
2682 err
= md_sync(ll_i2sbi(inode
)->ll_md_exp
, ll_inode2fid(inode
), &req
);
2686 ptlrpc_req_finished(req
);
2688 if (S_ISREG(inode
->i_mode
)) {
2689 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
2691 err
= cl_sync_file_range(inode
, start
, end
, CL_FSYNC_ALL
, 0);
2692 if (rc
== 0 && err
< 0)
2695 fd
->fd_write_failed
= true;
2697 fd
->fd_write_failed
= false;
2700 inode_unlock(inode
);
2705 ll_file_flock(struct file
*file
, int cmd
, struct file_lock
*file_lock
)
2707 struct inode
*inode
= file_inode(file
);
2708 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
2709 struct ldlm_enqueue_info einfo
= {
2710 .ei_type
= LDLM_FLOCK
,
2711 .ei_cb_cp
= ldlm_flock_completion_ast
,
2712 .ei_cbdata
= file_lock
,
2714 struct md_op_data
*op_data
;
2715 struct lustre_handle lockh
= {0};
2716 ldlm_policy_data_t flock
= { {0} };
2717 int fl_type
= file_lock
->fl_type
;
2722 CDEBUG(D_VFSTRACE
, "VFS Op:inode="DFID
" file_lock=%p\n",
2723 PFID(ll_inode2fid(inode
)), file_lock
);
2725 ll_stats_ops_tally(ll_i2sbi(inode
), LPROC_LL_FLOCK
, 1);
2727 if (file_lock
->fl_flags
& FL_FLOCK
)
2728 LASSERT((cmd
== F_SETLKW
) || (cmd
== F_SETLK
));
2729 else if (!(file_lock
->fl_flags
& FL_POSIX
))
2732 flock
.l_flock
.owner
= (unsigned long)file_lock
->fl_owner
;
2733 flock
.l_flock
.pid
= file_lock
->fl_pid
;
2734 flock
.l_flock
.start
= file_lock
->fl_start
;
2735 flock
.l_flock
.end
= file_lock
->fl_end
;
2737 /* Somewhat ugly workaround for svc lockd.
2738 * lockd installs custom fl_lmops->lm_compare_owner that checks
2739 * for the fl_owner to be the same (which it always is on local node
2740 * I guess between lockd processes) and then compares pid.
2741 * As such we assign pid to the owner field to make it all work,
2742 * conflict with normal locks is unlikely since pid space and
2743 * pointer space for current->files are not intersecting
2745 if (file_lock
->fl_lmops
&& file_lock
->fl_lmops
->lm_compare_owner
)
2746 flock
.l_flock
.owner
= (unsigned long)file_lock
->fl_pid
;
2750 einfo
.ei_mode
= LCK_PR
;
2753 /* An unlock request may or may not have any relation to
2754 * existing locks so we may not be able to pass a lock handle
2755 * via a normal ldlm_lock_cancel() request. The request may even
2756 * unlock a byte range in the middle of an existing lock. In
2757 * order to process an unlock request we need all of the same
2758 * information that is given with a normal read or write record
2759 * lock request. To avoid creating another ldlm unlock (cancel)
2760 * message we'll treat a LCK_NL flock request as an unlock.
2762 einfo
.ei_mode
= LCK_NL
;
2765 einfo
.ei_mode
= LCK_PW
;
2768 CDEBUG(D_INFO
, "Unknown fcntl lock type: %d\n", fl_type
);
2783 flags
= LDLM_FL_BLOCK_NOWAIT
;
2789 flags
= LDLM_FL_TEST_LOCK
;
2792 CERROR("unknown fcntl lock command: %d\n", cmd
);
2797 * Save the old mode so that if the mode in the lock changes we
2798 * can decrement the appropriate reader or writer refcount.
2800 file_lock
->fl_type
= einfo
.ei_mode
;
2802 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, NULL
, 0, 0,
2803 LUSTRE_OPC_ANY
, NULL
);
2804 if (IS_ERR(op_data
))
2805 return PTR_ERR(op_data
);
2807 CDEBUG(D_DLMTRACE
, "inode="DFID
", pid=%u, flags=%#llx, mode=%u, start=%llu, end=%llu\n",
2808 PFID(ll_inode2fid(inode
)), flock
.l_flock
.pid
, flags
,
2809 einfo
.ei_mode
, flock
.l_flock
.start
, flock
.l_flock
.end
);
2811 rc
= md_enqueue(sbi
->ll_md_exp
, &einfo
, &flock
, NULL
, op_data
, &lockh
,
2814 /* Restore the file lock type if not TEST lock. */
2815 if (!(flags
& LDLM_FL_TEST_LOCK
))
2816 file_lock
->fl_type
= fl_type
;
2818 if ((rc
== 0 || file_lock
->fl_type
== F_UNLCK
) &&
2819 !(flags
& LDLM_FL_TEST_LOCK
))
2820 rc2
= locks_lock_file_wait(file
, file_lock
);
2822 if (rc2
&& file_lock
->fl_type
!= F_UNLCK
) {
2823 einfo
.ei_mode
= LCK_NL
;
2824 md_enqueue(sbi
->ll_md_exp
, &einfo
, &flock
, NULL
, op_data
,
2829 ll_finish_md_op_data(op_data
);
2834 int ll_get_fid_by_name(struct inode
*parent
, const char *name
,
2835 int namelen
, struct lu_fid
*fid
)
2837 struct md_op_data
*op_data
= NULL
;
2838 struct ptlrpc_request
*req
;
2839 struct mdt_body
*body
;
2842 op_data
= ll_prep_md_op_data(NULL
, parent
, NULL
, name
, namelen
, 0,
2843 LUSTRE_OPC_ANY
, NULL
);
2844 if (IS_ERR(op_data
))
2845 return PTR_ERR(op_data
);
2847 op_data
->op_valid
= OBD_MD_FLID
;
2848 rc
= md_getattr_name(ll_i2sbi(parent
)->ll_md_exp
, op_data
, &req
);
2849 ll_finish_md_op_data(op_data
);
2853 body
= req_capsule_server_get(&req
->rq_pill
, &RMF_MDT_BODY
);
2859 *fid
= body
->mbo_fid1
;
2861 ptlrpc_req_finished(req
);
2865 int ll_migrate(struct inode
*parent
, struct file
*file
, int mdtidx
,
2866 const char *name
, int namelen
)
2868 struct ptlrpc_request
*request
= NULL
;
2869 struct inode
*child_inode
= NULL
;
2870 struct dentry
*dchild
= NULL
;
2871 struct md_op_data
*op_data
;
2875 CDEBUG(D_VFSTRACE
, "migrate %s under "DFID
" to MDT%d\n",
2876 name
, PFID(ll_inode2fid(parent
)), mdtidx
);
2878 op_data
= ll_prep_md_op_data(NULL
, parent
, NULL
, name
, namelen
,
2879 0, LUSTRE_OPC_ANY
, NULL
);
2880 if (IS_ERR(op_data
))
2881 return PTR_ERR(op_data
);
2883 /* Get child FID first */
2884 qstr
.hash
= full_name_hash(parent
, name
, namelen
);
2887 dchild
= d_lookup(file_dentry(file
), &qstr
);
2888 if (dchild
&& dchild
->d_inode
) {
2889 op_data
->op_fid3
= *ll_inode2fid(dchild
->d_inode
);
2890 if (dchild
->d_inode
) {
2891 child_inode
= igrab(dchild
->d_inode
);
2892 ll_invalidate_aliases(child_inode
);
2896 rc
= ll_get_fid_by_name(parent
, name
, namelen
,
2902 if (!fid_is_sane(&op_data
->op_fid3
)) {
2903 CERROR("%s: migrate %s, but fid "DFID
" is insane\n",
2904 ll_get_fsname(parent
->i_sb
, NULL
, 0), name
,
2905 PFID(&op_data
->op_fid3
));
2910 rc
= ll_get_mdt_idx_by_fid(ll_i2sbi(parent
), &op_data
->op_fid3
);
2915 CDEBUG(D_INFO
, "%s:"DFID
" is already on MDT%d.\n", name
,
2916 PFID(&op_data
->op_fid3
), mdtidx
);
2921 op_data
->op_mds
= mdtidx
;
2922 op_data
->op_cli_flags
= CLI_MIGRATE
;
2923 rc
= md_rename(ll_i2sbi(parent
)->ll_md_exp
, op_data
, name
,
2924 namelen
, name
, namelen
, &request
);
2926 ll_update_times(request
, parent
);
2928 ptlrpc_req_finished(request
);
2932 clear_nlink(child_inode
);
2936 ll_finish_md_op_data(op_data
);
2941 ll_file_noflock(struct file
*file
, int cmd
, struct file_lock
*file_lock
)
2947 * test if some locks matching bits and l_req_mode are acquired
2948 * - bits can be in different locks
2949 * - if found clear the common lock bits in *bits
2950 * - the bits not found, are kept in *bits
2952 * \param bits [IN] searched lock bits [IN]
2953 * \param l_req_mode [IN] searched lock mode
2954 * \retval boolean, true iff all bits are found
2956 int ll_have_md_lock(struct inode
*inode
, __u64
*bits
,
2957 enum ldlm_mode l_req_mode
)
2959 struct lustre_handle lockh
;
2960 ldlm_policy_data_t policy
;
2961 enum ldlm_mode mode
= (l_req_mode
== LCK_MINMODE
) ?
2962 (LCK_CR
| LCK_CW
| LCK_PR
| LCK_PW
) : l_req_mode
;
2970 fid
= &ll_i2info(inode
)->lli_fid
;
2971 CDEBUG(D_INFO
, "trying to match res "DFID
" mode %s\n", PFID(fid
),
2972 ldlm_lockname
[mode
]);
2974 flags
= LDLM_FL_BLOCK_GRANTED
| LDLM_FL_CBPENDING
| LDLM_FL_TEST_LOCK
;
2975 for (i
= 0; i
<= MDS_INODELOCK_MAXSHIFT
&& *bits
!= 0; i
++) {
2976 policy
.l_inodebits
.bits
= *bits
& (1 << i
);
2977 if (policy
.l_inodebits
.bits
== 0)
2980 if (md_lock_match(ll_i2mdexp(inode
), flags
, fid
, LDLM_IBITS
,
2981 &policy
, mode
, &lockh
)) {
2982 struct ldlm_lock
*lock
;
2984 lock
= ldlm_handle2lock(&lockh
);
2987 ~(lock
->l_policy_data
.l_inodebits
.bits
);
2988 LDLM_LOCK_PUT(lock
);
2990 *bits
&= ~policy
.l_inodebits
.bits
;
2997 enum ldlm_mode
ll_take_md_lock(struct inode
*inode
, __u64 bits
,
2998 struct lustre_handle
*lockh
, __u64 flags
,
2999 enum ldlm_mode mode
)
3001 ldlm_policy_data_t policy
= { .l_inodebits
= {bits
} };
3005 fid
= &ll_i2info(inode
)->lli_fid
;
3006 CDEBUG(D_INFO
, "trying to match res "DFID
"\n", PFID(fid
));
3008 rc
= md_lock_match(ll_i2mdexp(inode
), flags
| LDLM_FL_BLOCK_GRANTED
,
3009 fid
, LDLM_IBITS
, &policy
, mode
, lockh
);
3014 static int ll_inode_revalidate_fini(struct inode
*inode
, int rc
)
3016 /* Already unlinked. Just update nlink and return success */
3017 if (rc
== -ENOENT
) {
3019 /* This path cannot be hit for regular files unless in
3020 * case of obscure races, so no need to validate size.
3022 if (!S_ISREG(inode
->i_mode
) && !S_ISDIR(inode
->i_mode
))
3024 } else if (rc
!= 0) {
3025 CDEBUG_LIMIT((rc
== -EACCES
|| rc
== -EIDRM
) ? D_INFO
: D_ERROR
,
3026 "%s: revalidate FID "DFID
" error: rc = %d\n",
3027 ll_get_fsname(inode
->i_sb
, NULL
, 0),
3028 PFID(ll_inode2fid(inode
)), rc
);
3034 static int __ll_inode_revalidate(struct dentry
*dentry
, __u64 ibits
)
3036 struct inode
*inode
= d_inode(dentry
);
3037 struct ptlrpc_request
*req
= NULL
;
3038 struct obd_export
*exp
;
3041 CDEBUG(D_VFSTRACE
, "VFS Op:inode="DFID
"(%p),name=%pd\n",
3042 PFID(ll_inode2fid(inode
)), inode
, dentry
);
3044 exp
= ll_i2mdexp(inode
);
3046 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
3047 * But under CMD case, it caused some lock issues, should be fixed
3048 * with new CMD ibits lock. See bug 12718
3050 if (exp_connect_flags(exp
) & OBD_CONNECT_ATTRFID
) {
3051 struct lookup_intent oit
= { .it_op
= IT_GETATTR
};
3052 struct md_op_data
*op_data
;
3054 if (ibits
== MDS_INODELOCK_LOOKUP
)
3055 oit
.it_op
= IT_LOOKUP
;
3057 /* Call getattr by fid, so do not provide name at all. */
3058 op_data
= ll_prep_md_op_data(NULL
, inode
,
3060 LUSTRE_OPC_ANY
, NULL
);
3061 if (IS_ERR(op_data
))
3062 return PTR_ERR(op_data
);
3064 rc
= md_intent_lock(exp
, op_data
, &oit
, &req
,
3065 &ll_md_blocking_ast
, 0);
3066 ll_finish_md_op_data(op_data
);
3068 rc
= ll_inode_revalidate_fini(inode
, rc
);
3072 rc
= ll_revalidate_it_finish(req
, &oit
, inode
);
3074 ll_intent_release(&oit
);
3078 /* Unlinked? Unhash dentry, so it is not picked up later by
3079 * do_lookup() -> ll_revalidate_it(). We cannot use d_drop
3080 * here to preserve get_cwd functionality on 2.6.
3083 if (!d_inode(dentry
)->i_nlink
) {
3084 spin_lock(&inode
->i_lock
);
3085 d_lustre_invalidate(dentry
, 0);
3086 spin_unlock(&inode
->i_lock
);
3089 ll_lookup_finish_locks(&oit
, inode
);
3090 } else if (!ll_have_md_lock(d_inode(dentry
), &ibits
, LCK_MINMODE
)) {
3091 struct ll_sb_info
*sbi
= ll_i2sbi(d_inode(dentry
));
3092 u64 valid
= OBD_MD_FLGETATTR
;
3093 struct md_op_data
*op_data
;
3096 if (S_ISREG(inode
->i_mode
)) {
3097 rc
= ll_get_default_mdsize(sbi
, &ealen
);
3100 valid
|= OBD_MD_FLEASIZE
| OBD_MD_FLMODEASIZE
;
3103 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, NULL
,
3104 0, ealen
, LUSTRE_OPC_ANY
,
3106 if (IS_ERR(op_data
))
3107 return PTR_ERR(op_data
);
3109 op_data
->op_valid
= valid
;
3110 rc
= md_getattr(sbi
->ll_md_exp
, op_data
, &req
);
3111 ll_finish_md_op_data(op_data
);
3113 rc
= ll_inode_revalidate_fini(inode
, rc
);
3117 rc
= ll_prep_inode(&inode
, req
, NULL
, NULL
);
3120 ptlrpc_req_finished(req
);
3124 static int ll_merge_md_attr(struct inode
*inode
)
3126 struct cl_attr attr
= { 0 };
3129 LASSERT(ll_i2info(inode
)->lli_lsm_md
);
3130 rc
= md_merge_attr(ll_i2mdexp(inode
), ll_i2info(inode
)->lli_lsm_md
,
3135 ll_i2info(inode
)->lli_stripe_dir_size
= attr
.cat_size
;
3136 ll_i2info(inode
)->lli_stripe_dir_nlink
= attr
.cat_nlink
;
3138 ll_i2info(inode
)->lli_atime
= attr
.cat_atime
;
3139 ll_i2info(inode
)->lli_mtime
= attr
.cat_mtime
;
3140 ll_i2info(inode
)->lli_ctime
= attr
.cat_ctime
;
3145 static int ll_inode_revalidate(struct dentry
*dentry
, __u64 ibits
)
3147 struct inode
*inode
= d_inode(dentry
);
3150 rc
= __ll_inode_revalidate(dentry
, ibits
);
3154 /* if object isn't regular file, don't validate size */
3155 if (!S_ISREG(inode
->i_mode
)) {
3156 if (S_ISDIR(inode
->i_mode
) &&
3157 ll_i2info(inode
)->lli_lsm_md
) {
3158 rc
= ll_merge_md_attr(inode
);
3163 LTIME_S(inode
->i_atime
) = ll_i2info(inode
)->lli_atime
;
3164 LTIME_S(inode
->i_mtime
) = ll_i2info(inode
)->lli_mtime
;
3165 LTIME_S(inode
->i_ctime
) = ll_i2info(inode
)->lli_ctime
;
3167 /* In case of restore, the MDT has the right size and has
3168 * already send it back without granting the layout lock,
3169 * inode is up-to-date so glimpse is useless.
3170 * Also to glimpse we need the layout, in case of a running
3171 * restore the MDT holds the layout lock so the glimpse will
3172 * block up to the end of restore (getattr will block)
3174 if (!(ll_i2info(inode
)->lli_flags
& LLIF_FILE_RESTORING
))
3175 rc
= ll_glimpse_size(inode
);
3180 int ll_getattr(struct vfsmount
*mnt
, struct dentry
*de
, struct kstat
*stat
)
3182 struct inode
*inode
= d_inode(de
);
3183 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
3184 struct ll_inode_info
*lli
= ll_i2info(inode
);
3187 res
= ll_inode_revalidate(de
, MDS_INODELOCK_UPDATE
|
3188 MDS_INODELOCK_LOOKUP
);
3189 ll_stats_ops_tally(sbi
, LPROC_LL_GETATTR
, 1);
3194 OBD_FAIL_TIMEOUT(OBD_FAIL_GETATTR_DELAY
, 30);
3196 stat
->dev
= inode
->i_sb
->s_dev
;
3197 if (ll_need_32bit_api(sbi
))
3198 stat
->ino
= cl_fid_build_ino(&lli
->lli_fid
, 1);
3200 stat
->ino
= inode
->i_ino
;
3201 stat
->mode
= inode
->i_mode
;
3202 stat
->uid
= inode
->i_uid
;
3203 stat
->gid
= inode
->i_gid
;
3204 stat
->rdev
= inode
->i_rdev
;
3205 stat
->atime
= inode
->i_atime
;
3206 stat
->mtime
= inode
->i_mtime
;
3207 stat
->ctime
= inode
->i_ctime
;
3208 stat
->blksize
= 1 << inode
->i_blkbits
;
3209 stat
->blocks
= inode
->i_blocks
;
3211 if (S_ISDIR(inode
->i_mode
) &&
3212 ll_i2info(inode
)->lli_lsm_md
) {
3213 stat
->nlink
= lli
->lli_stripe_dir_nlink
;
3214 stat
->size
= lli
->lli_stripe_dir_size
;
3216 stat
->nlink
= inode
->i_nlink
;
3217 stat
->size
= i_size_read(inode
);
3223 static int ll_fiemap(struct inode
*inode
, struct fiemap_extent_info
*fieinfo
,
3224 __u64 start
, __u64 len
)
3228 struct ll_user_fiemap
*fiemap
;
3229 unsigned int extent_count
= fieinfo
->fi_extents_max
;
3231 num_bytes
= sizeof(*fiemap
) + (extent_count
*
3232 sizeof(struct ll_fiemap_extent
));
3233 fiemap
= libcfs_kvzalloc(num_bytes
, GFP_NOFS
);
3238 fiemap
->fm_flags
= fieinfo
->fi_flags
;
3239 fiemap
->fm_extent_count
= fieinfo
->fi_extents_max
;
3240 fiemap
->fm_start
= start
;
3241 fiemap
->fm_length
= len
;
3242 if (extent_count
> 0 &&
3243 copy_from_user(&fiemap
->fm_extents
[0], fieinfo
->fi_extents_start
,
3244 sizeof(struct ll_fiemap_extent
)) != 0) {
3249 rc
= ll_do_fiemap(inode
, fiemap
, num_bytes
);
3251 fieinfo
->fi_flags
= fiemap
->fm_flags
;
3252 fieinfo
->fi_extents_mapped
= fiemap
->fm_mapped_extents
;
3253 if (extent_count
> 0 &&
3254 copy_to_user(fieinfo
->fi_extents_start
, &fiemap
->fm_extents
[0],
3255 fiemap
->fm_mapped_extents
*
3256 sizeof(struct ll_fiemap_extent
)) != 0) {
3266 struct posix_acl
*ll_get_acl(struct inode
*inode
, int type
)
3268 struct ll_inode_info
*lli
= ll_i2info(inode
);
3269 struct posix_acl
*acl
= NULL
;
3271 spin_lock(&lli
->lli_lock
);
3272 /* VFS' acl_permission_check->check_acl will release the refcount */
3273 acl
= posix_acl_dup(lli
->lli_posix_acl
);
3274 #ifdef CONFIG_FS_POSIX_ACL
3275 forget_cached_acl(inode
, type
);
3277 spin_unlock(&lli
->lli_lock
);
3282 int ll_inode_permission(struct inode
*inode
, int mask
)
3284 struct ll_sb_info
*sbi
;
3285 struct root_squash_info
*squash
;
3286 const struct cred
*old_cred
= NULL
;
3287 struct cred
*cred
= NULL
;
3288 bool squash_id
= false;
3292 if (mask
& MAY_NOT_BLOCK
)
3295 /* as root inode are NOT getting validated in lookup operation,
3296 * need to do it before permission check.
3299 if (is_root_inode(inode
)) {
3300 rc
= __ll_inode_revalidate(inode
->i_sb
->s_root
,
3301 MDS_INODELOCK_LOOKUP
);
3306 CDEBUG(D_VFSTRACE
, "VFS Op:inode="DFID
"(%p), inode mode %x mask %o\n",
3307 PFID(ll_inode2fid(inode
)), inode
, inode
->i_mode
, mask
);
3309 /* squash fsuid/fsgid if needed */
3310 sbi
= ll_i2sbi(inode
);
3311 squash
= &sbi
->ll_squash
;
3312 if (unlikely(squash
->rsi_uid
&&
3313 uid_eq(current_fsuid(), GLOBAL_ROOT_UID
) &&
3314 !(sbi
->ll_flags
& LL_SBI_NOROOTSQUASH
))) {
3319 CDEBUG(D_OTHER
, "squash creds (%d:%d)=>(%d:%d)\n",
3320 __kuid_val(current_fsuid()), __kgid_val(current_fsgid()),
3321 squash
->rsi_uid
, squash
->rsi_gid
);
3324 * update current process's credentials
3327 cred
= prepare_creds();
3331 cred
->fsuid
= make_kuid(&init_user_ns
, squash
->rsi_uid
);
3332 cred
->fsgid
= make_kgid(&init_user_ns
, squash
->rsi_gid
);
3333 for (cap
= 0; cap
< sizeof(cfs_cap_t
) * 8; cap
++) {
3334 if ((1 << cap
) & CFS_CAP_FS_MASK
)
3335 cap_lower(cred
->cap_effective
, cap
);
3337 old_cred
= override_creds(cred
);
3340 ll_stats_ops_tally(ll_i2sbi(inode
), LPROC_LL_INODE_PERM
, 1);
3341 rc
= generic_permission(inode
, mask
);
3343 /* restore current process's credentials and FS capability */
3345 revert_creds(old_cred
);
3352 /* -o localflock - only provides locally consistent flock locks */
3353 struct file_operations ll_file_operations
= {
3354 .read_iter
= ll_file_read_iter
,
3355 .write_iter
= ll_file_write_iter
,
3356 .unlocked_ioctl
= ll_file_ioctl
,
3357 .open
= ll_file_open
,
3358 .release
= ll_file_release
,
3359 .mmap
= ll_file_mmap
,
3360 .llseek
= ll_file_seek
,
3361 .splice_read
= ll_file_splice_read
,
3366 struct file_operations ll_file_operations_flock
= {
3367 .read_iter
= ll_file_read_iter
,
3368 .write_iter
= ll_file_write_iter
,
3369 .unlocked_ioctl
= ll_file_ioctl
,
3370 .open
= ll_file_open
,
3371 .release
= ll_file_release
,
3372 .mmap
= ll_file_mmap
,
3373 .llseek
= ll_file_seek
,
3374 .splice_read
= ll_file_splice_read
,
3377 .flock
= ll_file_flock
,
3378 .lock
= ll_file_flock
3381 /* These are for -o noflock - to return ENOSYS on flock calls */
3382 struct file_operations ll_file_operations_noflock
= {
3383 .read_iter
= ll_file_read_iter
,
3384 .write_iter
= ll_file_write_iter
,
3385 .unlocked_ioctl
= ll_file_ioctl
,
3386 .open
= ll_file_open
,
3387 .release
= ll_file_release
,
3388 .mmap
= ll_file_mmap
,
3389 .llseek
= ll_file_seek
,
3390 .splice_read
= ll_file_splice_read
,
3393 .flock
= ll_file_noflock
,
3394 .lock
= ll_file_noflock
3397 const struct inode_operations ll_file_inode_operations
= {
3398 .setattr
= ll_setattr
,
3399 .getattr
= ll_getattr
,
3400 .permission
= ll_inode_permission
,
3401 .setxattr
= generic_setxattr
,
3402 .getxattr
= generic_getxattr
,
3403 .listxattr
= ll_listxattr
,
3404 .removexattr
= generic_removexattr
,
3405 .fiemap
= ll_fiemap
,
3406 .get_acl
= ll_get_acl
,
3409 /* dynamic ioctl number support routines */
3410 static struct llioc_ctl_data
{
3411 struct rw_semaphore ioc_sem
;
3412 struct list_head ioc_head
;
3414 __RWSEM_INITIALIZER(llioc
.ioc_sem
),
3415 LIST_HEAD_INIT(llioc
.ioc_head
)
3419 struct list_head iocd_list
;
3420 unsigned int iocd_size
;
3421 llioc_callback_t iocd_cb
;
3422 unsigned int iocd_count
;
3423 unsigned int iocd_cmd
[0];
3426 void *ll_iocontrol_register(llioc_callback_t cb
, int count
, unsigned int *cmd
)
3429 struct llioc_data
*in_data
= NULL
;
3431 if (!cb
|| !cmd
|| count
> LLIOC_MAX_CMD
|| count
< 0)
3434 size
= sizeof(*in_data
) + count
* sizeof(unsigned int);
3435 in_data
= kzalloc(size
, GFP_NOFS
);
3439 in_data
->iocd_size
= size
;
3440 in_data
->iocd_cb
= cb
;
3441 in_data
->iocd_count
= count
;
3442 memcpy(in_data
->iocd_cmd
, cmd
, sizeof(unsigned int) * count
);
3444 down_write(&llioc
.ioc_sem
);
3445 list_add_tail(&in_data
->iocd_list
, &llioc
.ioc_head
);
3446 up_write(&llioc
.ioc_sem
);
3450 EXPORT_SYMBOL(ll_iocontrol_register
);
3452 void ll_iocontrol_unregister(void *magic
)
3454 struct llioc_data
*tmp
;
3459 down_write(&llioc
.ioc_sem
);
3460 list_for_each_entry(tmp
, &llioc
.ioc_head
, iocd_list
) {
3462 list_del(&tmp
->iocd_list
);
3463 up_write(&llioc
.ioc_sem
);
3469 up_write(&llioc
.ioc_sem
);
3471 CWARN("didn't find iocontrol register block with magic: %p\n", magic
);
3473 EXPORT_SYMBOL(ll_iocontrol_unregister
);
3475 static enum llioc_iter
3476 ll_iocontrol_call(struct inode
*inode
, struct file
*file
,
3477 unsigned int cmd
, unsigned long arg
, int *rcp
)
3479 enum llioc_iter ret
= LLIOC_CONT
;
3480 struct llioc_data
*data
;
3481 int rc
= -EINVAL
, i
;
3483 down_read(&llioc
.ioc_sem
);
3484 list_for_each_entry(data
, &llioc
.ioc_head
, iocd_list
) {
3485 for (i
= 0; i
< data
->iocd_count
; i
++) {
3486 if (cmd
!= data
->iocd_cmd
[i
])
3489 ret
= data
->iocd_cb(inode
, file
, cmd
, arg
, data
, &rc
);
3493 if (ret
== LLIOC_STOP
)
3496 up_read(&llioc
.ioc_sem
);
3503 int ll_layout_conf(struct inode
*inode
, const struct cl_object_conf
*conf
)
3505 struct ll_inode_info
*lli
= ll_i2info(inode
);
3506 struct cl_env_nest nest
;
3513 env
= cl_env_nested_get(&nest
);
3515 return PTR_ERR(env
);
3517 result
= cl_conf_set(env
, lli
->lli_clob
, conf
);
3518 cl_env_nested_put(&nest
, env
);
3520 if (conf
->coc_opc
== OBJECT_CONF_SET
) {
3521 struct ldlm_lock
*lock
= conf
->coc_lock
;
3524 LASSERT(ldlm_has_layout(lock
));
3526 /* it can only be allowed to match after layout is
3527 * applied to inode otherwise false layout would be
3528 * seen. Applying layout should happen before dropping
3531 ldlm_lock_allow_match(lock
);
3537 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
3538 static int ll_layout_fetch(struct inode
*inode
, struct ldlm_lock
*lock
)
3541 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
3542 struct ptlrpc_request
*req
;
3543 struct mdt_body
*body
;
3549 CDEBUG(D_INODE
, DFID
" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
3550 PFID(ll_inode2fid(inode
)), ldlm_is_lvb_ready(lock
),
3551 lock
->l_lvb_data
, lock
->l_lvb_len
);
3553 if (lock
->l_lvb_data
&& ldlm_is_lvb_ready(lock
))
3556 /* if layout lock was granted right away, the layout is returned
3557 * within DLM_LVB of dlm reply; otherwise if the lock was ever
3558 * blocked and then granted via completion ast, we have to fetch
3559 * layout here. Please note that we can't use the LVB buffer in
3560 * completion AST because it doesn't have a large enough buffer
3562 rc
= ll_get_default_mdsize(sbi
, &lmmsize
);
3564 rc
= md_getxattr(sbi
->ll_md_exp
, ll_inode2fid(inode
),
3565 OBD_MD_FLXATTR
, XATTR_NAME_LOV
, NULL
, 0,
3570 body
= req_capsule_server_get(&req
->rq_pill
, &RMF_MDT_BODY
);
3576 lmmsize
= body
->mbo_eadatasize
;
3577 if (lmmsize
== 0) /* empty layout */ {
3582 lmm
= req_capsule_server_sized_get(&req
->rq_pill
, &RMF_EADATA
, lmmsize
);
3588 lvbdata
= libcfs_kvzalloc(lmmsize
, GFP_NOFS
);
3594 memcpy(lvbdata
, lmm
, lmmsize
);
3595 lock_res_and_lock(lock
);
3596 if (lock
->l_lvb_data
)
3597 kvfree(lock
->l_lvb_data
);
3599 lock
->l_lvb_data
= lvbdata
;
3600 lock
->l_lvb_len
= lmmsize
;
3601 unlock_res_and_lock(lock
);
3604 ptlrpc_req_finished(req
);
3609 * Apply the layout to the inode. Layout lock is held and will be released
3612 static int ll_layout_lock_set(struct lustre_handle
*lockh
, enum ldlm_mode mode
,
3613 struct inode
*inode
, __u32
*gen
, bool reconf
)
3615 struct ll_inode_info
*lli
= ll_i2info(inode
);
3616 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
3617 struct ldlm_lock
*lock
;
3618 struct lustre_md md
= { NULL
};
3619 struct cl_object_conf conf
;
3622 bool wait_layout
= false;
3624 LASSERT(lustre_handle_is_used(lockh
));
3626 lock
= ldlm_handle2lock(lockh
);
3628 LASSERT(ldlm_has_layout(lock
));
3630 LDLM_DEBUG(lock
, "File "DFID
"(%p) being reconfigured: %d",
3631 PFID(&lli
->lli_fid
), inode
, reconf
);
3633 /* in case this is a caching lock and reinstate with new inode */
3634 md_set_lock_data(sbi
->ll_md_exp
, lockh
, inode
, NULL
);
3636 lock_res_and_lock(lock
);
3637 lvb_ready
= ldlm_is_lvb_ready(lock
);
3638 unlock_res_and_lock(lock
);
3639 /* checking lvb_ready is racy but this is okay. The worst case is
3640 * that multi processes may configure the file on the same time.
3642 if (lvb_ready
|| !reconf
) {
3645 /* layout_gen must be valid if layout lock is not
3646 * cancelled and stripe has already set
3648 *gen
= ll_layout_version_get(lli
);
3654 rc
= ll_layout_fetch(inode
, lock
);
3658 /* for layout lock, lmm is returned in lock's lvb.
3659 * lvb_data is immutable if the lock is held so it's safe to access it
3660 * without res lock. See the description in ldlm_lock_decref_internal()
3661 * for the condition to free lvb_data of layout lock
3663 if (lock
->l_lvb_data
) {
3664 rc
= obd_unpackmd(sbi
->ll_dt_exp
, &md
.lsm
,
3665 lock
->l_lvb_data
, lock
->l_lvb_len
);
3667 *gen
= LL_LAYOUT_GEN_EMPTY
;
3669 *gen
= md
.lsm
->lsm_layout_gen
;
3672 CERROR("%s: file " DFID
" unpackmd error: %d\n",
3673 ll_get_fsname(inode
->i_sb
, NULL
, 0),
3674 PFID(&lli
->lli_fid
), rc
);
3680 /* set layout to file. Unlikely this will fail as old layout was
3683 memset(&conf
, 0, sizeof(conf
));
3684 conf
.coc_opc
= OBJECT_CONF_SET
;
3685 conf
.coc_inode
= inode
;
3686 conf
.coc_lock
= lock
;
3687 conf
.u
.coc_md
= &md
;
3688 rc
= ll_layout_conf(inode
, &conf
);
3691 obd_free_memmd(sbi
->ll_dt_exp
, &md
.lsm
);
3693 /* refresh layout failed, need to wait */
3694 wait_layout
= rc
== -EBUSY
;
3697 LDLM_LOCK_PUT(lock
);
3698 ldlm_lock_decref(lockh
, mode
);
3700 /* wait for IO to complete if it's still being used. */
3702 CDEBUG(D_INODE
, "%s: "DFID
"(%p) wait for layout reconf\n",
3703 ll_get_fsname(inode
->i_sb
, NULL
, 0),
3704 PFID(&lli
->lli_fid
), inode
);
3706 memset(&conf
, 0, sizeof(conf
));
3707 conf
.coc_opc
= OBJECT_CONF_WAIT
;
3708 conf
.coc_inode
= inode
;
3709 rc
= ll_layout_conf(inode
, &conf
);
3713 CDEBUG(D_INODE
, "%s: file="DFID
" waiting layout return: %d.\n",
3714 ll_get_fsname(inode
->i_sb
, NULL
, 0),
3715 PFID(&lli
->lli_fid
), rc
);
3721 * This function checks if there exists a LAYOUT lock on the client side,
3722 * or enqueues it if it doesn't have one in cache.
3724 * This function will not hold layout lock so it may be revoked any time after
3725 * this function returns. Any operations depend on layout should be redone
3728 * This function should be called before lov_io_init() to get an uptodate
3729 * layout version, the caller should save the version number and after IO
3730 * is finished, this function should be called again to verify that layout
3731 * is not changed during IO time.
3733 int ll_layout_refresh(struct inode
*inode
, __u32
*gen
)
3735 struct ll_inode_info
*lli
= ll_i2info(inode
);
3736 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
3737 struct md_op_data
*op_data
;
3738 struct lookup_intent it
;
3739 struct lustre_handle lockh
;
3740 enum ldlm_mode mode
;
3741 struct ldlm_enqueue_info einfo
= {
3742 .ei_type
= LDLM_IBITS
,
3744 .ei_cb_bl
= &ll_md_blocking_ast
,
3745 .ei_cb_cp
= &ldlm_completion_ast
,
3749 *gen
= ll_layout_version_get(lli
);
3750 if (!(sbi
->ll_flags
& LL_SBI_LAYOUT_LOCK
) || *gen
!= LL_LAYOUT_GEN_NONE
)
3754 LASSERT(fid_is_sane(ll_inode2fid(inode
)));
3755 LASSERT(S_ISREG(inode
->i_mode
));
3757 /* take layout lock mutex to enqueue layout lock exclusively. */
3758 mutex_lock(&lli
->lli_layout_mutex
);
3761 /* mostly layout lock is caching on the local side, so try to match
3762 * it before grabbing layout lock mutex.
3764 mode
= ll_take_md_lock(inode
, MDS_INODELOCK_LAYOUT
, &lockh
, 0,
3765 LCK_CR
| LCK_CW
| LCK_PR
| LCK_PW
);
3766 if (mode
!= 0) { /* hit cached lock */
3767 rc
= ll_layout_lock_set(&lockh
, mode
, inode
, gen
, true);
3771 mutex_unlock(&lli
->lli_layout_mutex
);
3775 op_data
= ll_prep_md_op_data(NULL
, inode
, inode
, NULL
,
3776 0, 0, LUSTRE_OPC_ANY
, NULL
);
3777 if (IS_ERR(op_data
)) {
3778 mutex_unlock(&lli
->lli_layout_mutex
);
3779 return PTR_ERR(op_data
);
3782 /* have to enqueue one */
3783 memset(&it
, 0, sizeof(it
));
3784 it
.it_op
= IT_LAYOUT
;
3785 lockh
.cookie
= 0ULL;
3787 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file "DFID
"(%p)",
3788 ll_get_fsname(inode
->i_sb
, NULL
, 0),
3789 PFID(&lli
->lli_fid
), inode
);
3791 rc
= md_enqueue(sbi
->ll_md_exp
, &einfo
, NULL
, &it
, op_data
, &lockh
, 0);
3792 ptlrpc_req_finished(it
.it_request
);
3793 it
.it_request
= NULL
;
3795 ll_finish_md_op_data(op_data
);
3797 mode
= it
.it_lock_mode
;
3798 it
.it_lock_mode
= 0;
3799 ll_intent_drop_lock(&it
);
3802 /* set lock data in case this is a new lock */
3803 ll_set_lock_data(sbi
->ll_md_exp
, inode
, &it
, NULL
);
3804 rc
= ll_layout_lock_set(&lockh
, mode
, inode
, gen
, true);
3808 mutex_unlock(&lli
->lli_layout_mutex
);
3814 * This function send a restore request to the MDT
3816 int ll_layout_restore(struct inode
*inode
, loff_t offset
, __u64 length
)
3818 struct hsm_user_request
*hur
;
3821 len
= sizeof(struct hsm_user_request
) +
3822 sizeof(struct hsm_user_item
);
3823 hur
= kzalloc(len
, GFP_NOFS
);
3827 hur
->hur_request
.hr_action
= HUA_RESTORE
;
3828 hur
->hur_request
.hr_archive_id
= 0;
3829 hur
->hur_request
.hr_flags
= 0;
3830 memcpy(&hur
->hur_user_item
[0].hui_fid
, &ll_i2info(inode
)->lli_fid
,
3831 sizeof(hur
->hur_user_item
[0].hui_fid
));
3832 hur
->hur_user_item
[0].hui_extent
.offset
= offset
;
3833 hur
->hur_user_item
[0].hui_extent
.length
= length
;
3834 hur
->hur_request
.hr_itemcount
= 1;
3835 rc
= obd_iocontrol(LL_IOC_HSM_REQUEST
, ll_i2sbi(inode
)->ll_md_exp
,