4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2011, 2015, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
34 * Author: Peter Braam <braam@clusterfs.com>
35 * Author: Phil Schwan <phil@clusterfs.com>
36 * Author: Andreas Dilger <adilger@clusterfs.com>
39 #define DEBUG_SUBSYSTEM S_LLITE
40 #include "../include/lustre_dlm.h"
41 #include "../include/lustre_lite.h"
42 #include <linux/pagemap.h>
43 #include <linux/file.h>
44 #include <linux/mount.h>
45 #include "llite_internal.h"
46 #include "../include/lustre/ll_fiemap.h"
48 #include "../include/cl_object.h"
51 ll_put_grouplock(struct inode
*inode
, struct file
*file
, unsigned long arg
);
53 static int ll_lease_close(struct obd_client_handle
*och
, struct inode
*inode
,
56 static enum llioc_iter
57 ll_iocontrol_call(struct inode
*inode
, struct file
*file
,
58 unsigned int cmd
, unsigned long arg
, int *rcp
);
60 static struct ll_file_data
*ll_file_data_get(void)
62 struct ll_file_data
*fd
;
64 fd
= kmem_cache_zalloc(ll_file_data_slab
, GFP_NOFS
);
67 fd
->fd_write_failed
= false;
71 static void ll_file_data_put(struct ll_file_data
*fd
)
74 kmem_cache_free(ll_file_data_slab
, fd
);
77 void ll_pack_inode2opdata(struct inode
*inode
, struct md_op_data
*op_data
,
78 struct lustre_handle
*fh
)
80 op_data
->op_fid1
= ll_i2info(inode
)->lli_fid
;
81 op_data
->op_attr
.ia_mode
= inode
->i_mode
;
82 op_data
->op_attr
.ia_atime
= inode
->i_atime
;
83 op_data
->op_attr
.ia_mtime
= inode
->i_mtime
;
84 op_data
->op_attr
.ia_ctime
= inode
->i_ctime
;
85 op_data
->op_attr
.ia_size
= i_size_read(inode
);
86 op_data
->op_attr_blocks
= inode
->i_blocks
;
87 op_data
->op_attr_flags
= ll_inode_to_ext_flags(inode
->i_flags
);
88 op_data
->op_ioepoch
= ll_i2info(inode
)->lli_ioepoch
;
90 op_data
->op_handle
= *fh
;
92 if (ll_i2info(inode
)->lli_flags
& LLIF_DATA_MODIFIED
)
93 op_data
->op_bias
|= MDS_DATA_MODIFIED
;
97 * Closes the IO epoch and packs all the attributes into @op_data for
100 static void ll_prepare_close(struct inode
*inode
, struct md_op_data
*op_data
,
101 struct obd_client_handle
*och
)
103 op_data
->op_attr
.ia_valid
= ATTR_MODE
| ATTR_ATIME
| ATTR_ATIME_SET
|
104 ATTR_MTIME
| ATTR_MTIME_SET
|
105 ATTR_CTIME
| ATTR_CTIME_SET
;
107 if (!(och
->och_flags
& FMODE_WRITE
))
110 if (!exp_connect_som(ll_i2mdexp(inode
)) || !S_ISREG(inode
->i_mode
))
111 op_data
->op_attr
.ia_valid
|= ATTR_SIZE
| ATTR_BLOCKS
;
113 ll_ioepoch_close(inode
, op_data
, &och
, 0);
116 ll_pack_inode2opdata(inode
, op_data
, &och
->och_fh
);
117 ll_prep_md_op_data(op_data
, inode
, NULL
, NULL
,
118 0, 0, LUSTRE_OPC_ANY
, NULL
);
121 static int ll_close_inode_openhandle(struct obd_export
*md_exp
,
123 struct obd_client_handle
*och
,
124 const __u64
*data_version
)
126 struct obd_export
*exp
= ll_i2mdexp(inode
);
127 struct md_op_data
*op_data
;
128 struct ptlrpc_request
*req
= NULL
;
129 struct obd_device
*obd
= class_exp2obd(exp
);
135 * XXX: in case of LMV, is this correct to access
138 CERROR("Invalid MDC connection handle %#llx\n",
139 ll_i2mdexp(inode
)->exp_handle
.h_cookie
);
144 op_data
= kzalloc(sizeof(*op_data
), GFP_NOFS
);
146 /* XXX We leak openhandle and request here. */
151 ll_prepare_close(inode
, op_data
, och
);
153 /* Pass in data_version implies release. */
154 op_data
->op_bias
|= MDS_HSM_RELEASE
;
155 op_data
->op_data_version
= *data_version
;
156 op_data
->op_lease_handle
= och
->och_lease_handle
;
157 op_data
->op_attr
.ia_valid
|= ATTR_SIZE
| ATTR_BLOCKS
;
159 epoch_close
= op_data
->op_flags
& MF_EPOCH_CLOSE
;
160 rc
= md_close(md_exp
, op_data
, och
->och_mod
, &req
);
162 /* This close must have the epoch closed. */
163 LASSERT(epoch_close
);
164 /* MDS has instructed us to obtain Size-on-MDS attribute from
165 * OSTs and send setattr to back to MDS.
167 rc
= ll_som_update(inode
, op_data
);
169 CERROR("%s: inode "DFID
" mdc Size-on-MDS update failed: rc = %d\n",
170 ll_i2mdexp(inode
)->exp_obd
->obd_name
,
171 PFID(ll_inode2fid(inode
)), rc
);
175 CERROR("%s: inode "DFID
" mdc close failed: rc = %d\n",
176 ll_i2mdexp(inode
)->exp_obd
->obd_name
,
177 PFID(ll_inode2fid(inode
)), rc
);
180 /* DATA_MODIFIED flag was successfully sent on close, cancel data
183 if (rc
== 0 && (op_data
->op_bias
& MDS_DATA_MODIFIED
)) {
184 struct ll_inode_info
*lli
= ll_i2info(inode
);
186 spin_lock(&lli
->lli_lock
);
187 lli
->lli_flags
&= ~LLIF_DATA_MODIFIED
;
188 spin_unlock(&lli
->lli_lock
);
192 rc
= ll_objects_destroy(req
, inode
);
194 CERROR("inode %lu ll_objects destroy: rc = %d\n",
197 if (rc
== 0 && op_data
->op_bias
& MDS_HSM_RELEASE
) {
198 struct mdt_body
*body
;
200 body
= req_capsule_server_get(&req
->rq_pill
, &RMF_MDT_BODY
);
201 if (!(body
->valid
& OBD_MD_FLRELEASED
))
205 ll_finish_md_op_data(op_data
);
208 if (exp_connect_som(exp
) && !epoch_close
&&
209 S_ISREG(inode
->i_mode
) && (och
->och_flags
& FMODE_WRITE
)) {
210 ll_queue_done_writing(inode
, LLIF_DONE_WRITING
);
212 md_clear_open_replay_data(md_exp
, och
);
213 /* Free @och if it is not waiting for DONE_WRITING. */
214 och
->och_fh
.cookie
= DEAD_HANDLE_MAGIC
;
217 if (req
) /* This is close request */
218 ptlrpc_req_finished(req
);
222 int ll_md_real_close(struct inode
*inode
, fmode_t fmode
)
224 struct ll_inode_info
*lli
= ll_i2info(inode
);
225 struct obd_client_handle
**och_p
;
226 struct obd_client_handle
*och
;
230 if (fmode
& FMODE_WRITE
) {
231 och_p
= &lli
->lli_mds_write_och
;
232 och_usecount
= &lli
->lli_open_fd_write_count
;
233 } else if (fmode
& FMODE_EXEC
) {
234 och_p
= &lli
->lli_mds_exec_och
;
235 och_usecount
= &lli
->lli_open_fd_exec_count
;
237 LASSERT(fmode
& FMODE_READ
);
238 och_p
= &lli
->lli_mds_read_och
;
239 och_usecount
= &lli
->lli_open_fd_read_count
;
242 mutex_lock(&lli
->lli_och_mutex
);
243 if (*och_usecount
> 0) {
244 /* There are still users of this handle, so skip
247 mutex_unlock(&lli
->lli_och_mutex
);
253 mutex_unlock(&lli
->lli_och_mutex
);
256 /* There might be a race and this handle may already
259 rc
= ll_close_inode_openhandle(ll_i2sbi(inode
)->ll_md_exp
,
266 static int ll_md_close(struct obd_export
*md_exp
, struct inode
*inode
,
269 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
270 struct ll_inode_info
*lli
= ll_i2info(inode
);
272 __u64 flags
= LDLM_FL_BLOCK_GRANTED
| LDLM_FL_TEST_LOCK
;
273 struct lustre_handle lockh
;
274 ldlm_policy_data_t policy
= {.l_inodebits
= {MDS_INODELOCK_OPEN
} };
277 /* clear group lock, if present */
278 if (unlikely(fd
->fd_flags
& LL_FILE_GROUP_LOCKED
))
279 ll_put_grouplock(inode
, file
, fd
->fd_grouplock
.lg_gid
);
281 if (fd
->fd_lease_och
) {
284 /* Usually the lease is not released when the
285 * application crashed, we need to release here.
287 rc
= ll_lease_close(fd
->fd_lease_och
, inode
, &lease_broken
);
288 CDEBUG(rc
? D_ERROR
: D_INODE
,
289 "Clean up lease " DFID
" %d/%d\n",
290 PFID(&lli
->lli_fid
), rc
, lease_broken
);
292 fd
->fd_lease_och
= NULL
;
296 rc
= ll_close_inode_openhandle(md_exp
, inode
, fd
->fd_och
, NULL
);
301 /* Let's see if we have good enough OPEN lock on the file and if
302 * we can skip talking to MDS
305 mutex_lock(&lli
->lli_och_mutex
);
306 if (fd
->fd_omode
& FMODE_WRITE
) {
308 LASSERT(lli
->lli_open_fd_write_count
);
309 lli
->lli_open_fd_write_count
--;
310 } else if (fd
->fd_omode
& FMODE_EXEC
) {
312 LASSERT(lli
->lli_open_fd_exec_count
);
313 lli
->lli_open_fd_exec_count
--;
316 LASSERT(lli
->lli_open_fd_read_count
);
317 lli
->lli_open_fd_read_count
--;
319 mutex_unlock(&lli
->lli_och_mutex
);
321 if (!md_lock_match(md_exp
, flags
, ll_inode2fid(inode
),
322 LDLM_IBITS
, &policy
, lockmode
, &lockh
))
323 rc
= ll_md_real_close(inode
, fd
->fd_omode
);
326 LUSTRE_FPRIVATE(file
) = NULL
;
327 ll_file_data_put(fd
);
332 /* While this returns an error code, fput() the caller does not, so we need
333 * to make every effort to clean up all of our state here. Also, applications
334 * rarely check close errors and even if an error is returned they will not
335 * re-try the close call.
337 int ll_file_release(struct inode
*inode
, struct file
*file
)
339 struct ll_file_data
*fd
;
340 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
341 struct ll_inode_info
*lli
= ll_i2info(inode
);
344 CDEBUG(D_VFSTRACE
, "VFS Op:inode="DFID
"(%p)\n",
345 PFID(ll_inode2fid(inode
)), inode
);
347 if (!is_root_inode(inode
))
348 ll_stats_ops_tally(sbi
, LPROC_LL_RELEASE
, 1);
349 fd
= LUSTRE_FPRIVATE(file
);
352 /* The last ref on @file, maybe not be the owner pid of statahead.
353 * Different processes can open the same dir, "ll_opendir_key" means:
354 * it is me that should stop the statahead thread.
356 if (S_ISDIR(inode
->i_mode
) && lli
->lli_opendir_key
== fd
&&
357 lli
->lli_opendir_pid
!= 0)
358 ll_stop_statahead(inode
, lli
->lli_opendir_key
);
360 if (is_root_inode(inode
)) {
361 LUSTRE_FPRIVATE(file
) = NULL
;
362 ll_file_data_put(fd
);
366 if (!S_ISDIR(inode
->i_mode
)) {
367 lov_read_and_clear_async_rc(lli
->lli_clob
);
368 lli
->lli_async_rc
= 0;
371 rc
= ll_md_close(sbi
->ll_md_exp
, inode
, file
);
373 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG
, cfs_fail_val
))
374 libcfs_debug_dumplog();
379 static int ll_intent_file_open(struct dentry
*dentry
, void *lmm
,
380 int lmmsize
, struct lookup_intent
*itp
)
382 struct inode
*inode
= d_inode(dentry
);
383 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
384 struct dentry
*parent
= dentry
->d_parent
;
385 const char *name
= dentry
->d_name
.name
;
386 const int len
= dentry
->d_name
.len
;
387 struct md_op_data
*op_data
;
388 struct ptlrpc_request
*req
;
389 __u32 opc
= LUSTRE_OPC_ANY
;
392 /* Usually we come here only for NFSD, and we want open lock. */
393 /* We can also get here if there was cached open handle in revalidate_it
394 * but it disappeared while we were getting from there to ll_file_open.
395 * But this means this file was closed and immediately opened which
396 * makes a good candidate for using OPEN lock
398 /* If lmmsize & lmm are not 0, we are just setting stripe info
399 * parameters. No need for the open lock
401 if (!lmm
&& lmmsize
== 0) {
402 struct ll_dentry_data
*ldd
= ll_d2d(dentry
);
404 * If we came via ll_iget_for_nfs, then we need to request
405 * struct ll_dentry_data *ldd = ll_d2d(file->f_dentry);
407 * NB: when ldd is NULL, it must have come via normal
408 * lookup path only, since ll_iget_for_nfs always calls
411 if (ldd
&& ldd
->lld_nfs_dentry
) {
412 ldd
->lld_nfs_dentry
= 0;
413 itp
->it_flags
|= MDS_OPEN_LOCK
;
415 if (itp
->it_flags
& FMODE_WRITE
)
416 opc
= LUSTRE_OPC_CREATE
;
419 op_data
= ll_prep_md_op_data(NULL
, d_inode(parent
),
423 return PTR_ERR(op_data
);
425 itp
->it_flags
|= MDS_OPEN_BY_FID
;
426 rc
= md_intent_lock(sbi
->ll_md_exp
, op_data
, lmm
, lmmsize
, itp
,
427 0 /*unused */, &req
, ll_md_blocking_ast
, 0);
428 ll_finish_md_op_data(op_data
);
430 /* reason for keep own exit path - don`t flood log
431 * with messages with -ESTALE errors.
433 if (!it_disposition(itp
, DISP_OPEN_OPEN
) ||
434 it_open_error(DISP_OPEN_OPEN
, itp
))
436 ll_release_openhandle(inode
, itp
);
440 if (it_disposition(itp
, DISP_LOOKUP_NEG
)) {
445 if (rc
!= 0 || it_open_error(DISP_OPEN_OPEN
, itp
)) {
446 rc
= rc
? rc
: it_open_error(DISP_OPEN_OPEN
, itp
);
447 CDEBUG(D_VFSTRACE
, "lock enqueue: err: %d\n", rc
);
451 rc
= ll_prep_inode(&inode
, req
, NULL
, itp
);
452 if (!rc
&& itp
->it_lock_mode
)
453 ll_set_lock_data(sbi
->ll_md_exp
, inode
, itp
, NULL
);
456 ptlrpc_req_finished(req
);
457 ll_intent_drop_lock(itp
);
463 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
464 * not believe attributes if a few ioepoch holders exist. Attributes for
465 * previous ioepoch if new one is opened are also skipped by MDS.
467 void ll_ioepoch_open(struct ll_inode_info
*lli
, __u64 ioepoch
)
469 if (ioepoch
&& lli
->lli_ioepoch
!= ioepoch
) {
470 lli
->lli_ioepoch
= ioepoch
;
471 CDEBUG(D_INODE
, "Epoch %llu opened on "DFID
"\n",
472 ioepoch
, PFID(&lli
->lli_fid
));
476 static int ll_och_fill(struct obd_export
*md_exp
, struct lookup_intent
*it
,
477 struct obd_client_handle
*och
)
479 struct mdt_body
*body
;
481 body
= req_capsule_server_get(&it
->it_request
->rq_pill
, &RMF_MDT_BODY
);
482 och
->och_fh
= body
->handle
;
483 och
->och_fid
= body
->fid1
;
484 och
->och_lease_handle
.cookie
= it
->it_lock_handle
;
485 och
->och_magic
= OBD_CLIENT_HANDLE_MAGIC
;
486 och
->och_flags
= it
->it_flags
;
488 return md_set_open_replay_data(md_exp
, och
, it
);
491 static int ll_local_open(struct file
*file
, struct lookup_intent
*it
,
492 struct ll_file_data
*fd
, struct obd_client_handle
*och
)
494 struct inode
*inode
= file_inode(file
);
495 struct ll_inode_info
*lli
= ll_i2info(inode
);
497 LASSERT(!LUSTRE_FPRIVATE(file
));
502 struct mdt_body
*body
;
505 rc
= ll_och_fill(ll_i2sbi(inode
)->ll_md_exp
, it
, och
);
509 body
= req_capsule_server_get(&it
->it_request
->rq_pill
,
511 ll_ioepoch_open(lli
, body
->ioepoch
);
514 LUSTRE_FPRIVATE(file
) = fd
;
515 ll_readahead_init(inode
, &fd
->fd_ras
);
516 fd
->fd_omode
= it
->it_flags
& (FMODE_READ
| FMODE_WRITE
| FMODE_EXEC
);
518 /* ll_cl_context initialize */
519 rwlock_init(&fd
->fd_lock
);
520 INIT_LIST_HEAD(&fd
->fd_lccs
);
525 /* Open a file, and (for the very first open) create objects on the OSTs at
526 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
527 * creation or open until ll_lov_setstripe() ioctl is called.
529 * If we already have the stripe MD locally then we don't request it in
530 * md_open(), by passing a lmm_size = 0.
532 * It is up to the application to ensure no other processes open this file
533 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
534 * used. We might be able to avoid races of that sort by getting lli_open_sem
535 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
536 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
538 int ll_file_open(struct inode
*inode
, struct file
*file
)
540 struct ll_inode_info
*lli
= ll_i2info(inode
);
541 struct lookup_intent
*it
, oit
= { .it_op
= IT_OPEN
,
542 .it_flags
= file
->f_flags
};
543 struct obd_client_handle
**och_p
= NULL
;
544 __u64
*och_usecount
= NULL
;
545 struct ll_file_data
*fd
;
546 int rc
= 0, opendir_set
= 0;
548 CDEBUG(D_VFSTRACE
, "VFS Op:inode="DFID
"(%p), flags %o\n",
549 PFID(ll_inode2fid(inode
)), inode
, file
->f_flags
);
551 it
= file
->private_data
; /* XXX: compat macro */
552 file
->private_data
= NULL
; /* prevent ll_local_open assertion */
554 fd
= ll_file_data_get();
561 if (S_ISDIR(inode
->i_mode
)) {
562 spin_lock(&lli
->lli_sa_lock
);
563 if (!lli
->lli_opendir_key
&& !lli
->lli_sai
&&
564 lli
->lli_opendir_pid
== 0) {
565 lli
->lli_opendir_key
= fd
;
566 lli
->lli_opendir_pid
= current_pid();
569 spin_unlock(&lli
->lli_sa_lock
);
572 if (is_root_inode(inode
)) {
573 LUSTRE_FPRIVATE(file
) = fd
;
577 if (!it
|| !it
->it_disposition
) {
578 /* Convert f_flags into access mode. We cannot use file->f_mode,
579 * because everything but O_ACCMODE mask was stripped from
582 if ((oit
.it_flags
+ 1) & O_ACCMODE
)
584 if (file
->f_flags
& O_TRUNC
)
585 oit
.it_flags
|= FMODE_WRITE
;
587 /* kernel only call f_op->open in dentry_open. filp_open calls
588 * dentry_open after call to open_namei that checks permissions.
589 * Only nfsd_open call dentry_open directly without checking
590 * permissions and because of that this code below is safe.
592 if (oit
.it_flags
& (FMODE_WRITE
| FMODE_READ
))
593 oit
.it_flags
|= MDS_OPEN_OWNEROVERRIDE
;
595 /* We do not want O_EXCL here, presumably we opened the file
596 * already? XXX - NFS implications?
598 oit
.it_flags
&= ~O_EXCL
;
600 /* bug20584, if "it_flags" contains O_CREAT, the file will be
601 * created if necessary, then "IT_CREAT" should be set to keep
604 if (oit
.it_flags
& O_CREAT
)
605 oit
.it_op
|= IT_CREAT
;
611 /* Let's see if we have file open on MDS already. */
612 if (it
->it_flags
& FMODE_WRITE
) {
613 och_p
= &lli
->lli_mds_write_och
;
614 och_usecount
= &lli
->lli_open_fd_write_count
;
615 } else if (it
->it_flags
& FMODE_EXEC
) {
616 och_p
= &lli
->lli_mds_exec_och
;
617 och_usecount
= &lli
->lli_open_fd_exec_count
;
619 och_p
= &lli
->lli_mds_read_och
;
620 och_usecount
= &lli
->lli_open_fd_read_count
;
623 mutex_lock(&lli
->lli_och_mutex
);
624 if (*och_p
) { /* Open handle is present */
625 if (it_disposition(it
, DISP_OPEN_OPEN
)) {
626 /* Well, there's extra open request that we do not need,
627 * let's close it somehow. This will decref request.
629 rc
= it_open_error(DISP_OPEN_OPEN
, it
);
631 mutex_unlock(&lli
->lli_och_mutex
);
635 ll_release_openhandle(inode
, it
);
639 rc
= ll_local_open(file
, it
, fd
, NULL
);
642 mutex_unlock(&lli
->lli_och_mutex
);
646 LASSERT(*och_usecount
== 0);
647 if (!it
->it_disposition
) {
648 /* We cannot just request lock handle now, new ELC code
649 * means that one of other OPEN locks for this file
650 * could be cancelled, and since blocking ast handler
651 * would attempt to grab och_mutex as well, that would
652 * result in a deadlock
654 mutex_unlock(&lli
->lli_och_mutex
);
655 it
->it_create_mode
|= M_CHECK_STALE
;
656 rc
= ll_intent_file_open(file
->f_path
.dentry
, NULL
, 0, it
);
657 it
->it_create_mode
&= ~M_CHECK_STALE
;
663 *och_p
= kzalloc(sizeof(struct obd_client_handle
), GFP_NOFS
);
671 /* md_intent_lock() didn't get a request ref if there was an
672 * open error, so don't do cleanup on the request here
675 /* XXX (green): Should not we bail out on any error here, not
678 rc
= it_open_error(DISP_OPEN_OPEN
, it
);
682 LASSERTF(it_disposition(it
, DISP_ENQ_OPEN_REF
),
683 "inode %p: disposition %x, status %d\n", inode
,
684 it_disposition(it
, ~0), it
->it_status
);
686 rc
= ll_local_open(file
, it
, fd
, *och_p
);
690 mutex_unlock(&lli
->lli_och_mutex
);
693 /* Must do this outside lli_och_mutex lock to prevent deadlock where
694 * different kind of OPEN lock for this same inode gets cancelled
697 if (!S_ISREG(inode
->i_mode
))
700 if (!lli
->lli_has_smd
&&
701 (cl_is_lov_delay_create(file
->f_flags
) ||
702 (file
->f_mode
& FMODE_WRITE
) == 0)) {
703 CDEBUG(D_INODE
, "object creation was delayed\n");
706 cl_lov_delay_create_clear(&file
->f_flags
);
711 if (och_p
&& *och_p
) {
716 mutex_unlock(&lli
->lli_och_mutex
);
719 if (opendir_set
!= 0)
720 ll_stop_statahead(inode
, lli
->lli_opendir_key
);
721 ll_file_data_put(fd
);
723 ll_stats_ops_tally(ll_i2sbi(inode
), LPROC_LL_OPEN
, 1);
726 if (it
&& it_disposition(it
, DISP_ENQ_OPEN_REF
)) {
727 ptlrpc_req_finished(it
->it_request
);
728 it_clear_disposition(it
, DISP_ENQ_OPEN_REF
);
734 static int ll_md_blocking_lease_ast(struct ldlm_lock
*lock
,
735 struct ldlm_lock_desc
*desc
,
736 void *data
, int flag
)
739 struct lustre_handle lockh
;
742 case LDLM_CB_BLOCKING
:
743 ldlm_lock2handle(lock
, &lockh
);
744 rc
= ldlm_cli_cancel(&lockh
, LCF_ASYNC
);
746 CDEBUG(D_INODE
, "ldlm_cli_cancel: %d\n", rc
);
750 case LDLM_CB_CANCELING
:
758 * Acquire a lease and open the file.
760 static struct obd_client_handle
*
761 ll_lease_open(struct inode
*inode
, struct file
*file
, fmode_t fmode
,
764 struct lookup_intent it
= { .it_op
= IT_OPEN
};
765 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
766 struct md_op_data
*op_data
;
767 struct ptlrpc_request
*req
;
768 struct lustre_handle old_handle
= { 0 };
769 struct obd_client_handle
*och
= NULL
;
773 if (fmode
!= FMODE_WRITE
&& fmode
!= FMODE_READ
)
774 return ERR_PTR(-EINVAL
);
777 struct ll_inode_info
*lli
= ll_i2info(inode
);
778 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
779 struct obd_client_handle
**och_p
;
782 if (!(fmode
& file
->f_mode
) || (file
->f_mode
& FMODE_EXEC
))
783 return ERR_PTR(-EPERM
);
785 /* Get the openhandle of the file */
787 mutex_lock(&lli
->lli_och_mutex
);
788 if (fd
->fd_lease_och
) {
789 mutex_unlock(&lli
->lli_och_mutex
);
794 if (file
->f_mode
& FMODE_WRITE
) {
795 LASSERT(lli
->lli_mds_write_och
);
796 och_p
= &lli
->lli_mds_write_och
;
797 och_usecount
= &lli
->lli_open_fd_write_count
;
799 LASSERT(lli
->lli_mds_read_och
);
800 och_p
= &lli
->lli_mds_read_och
;
801 och_usecount
= &lli
->lli_open_fd_read_count
;
803 if (*och_usecount
== 1) {
810 mutex_unlock(&lli
->lli_och_mutex
);
811 if (rc
< 0) /* more than 1 opener */
815 old_handle
= fd
->fd_och
->och_fh
;
818 och
= kzalloc(sizeof(*och
), GFP_NOFS
);
820 return ERR_PTR(-ENOMEM
);
822 op_data
= ll_prep_md_op_data(NULL
, inode
, inode
, NULL
, 0, 0,
823 LUSTRE_OPC_ANY
, NULL
);
824 if (IS_ERR(op_data
)) {
825 rc
= PTR_ERR(op_data
);
829 /* To tell the MDT this openhandle is from the same owner */
830 op_data
->op_handle
= old_handle
;
832 it
.it_flags
= fmode
| open_flags
;
833 it
.it_flags
|= MDS_OPEN_LOCK
| MDS_OPEN_BY_FID
| MDS_OPEN_LEASE
;
834 rc
= md_intent_lock(sbi
->ll_md_exp
, op_data
, NULL
, 0, &it
, 0, &req
,
835 ll_md_blocking_lease_ast
,
836 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
837 * it can be cancelled which may mislead applications that the lease is
839 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
840 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
841 * doesn't deal with openhandle, so normal openhandle will be leaked.
843 LDLM_FL_NO_LRU
| LDLM_FL_EXCL
);
844 ll_finish_md_op_data(op_data
);
845 ptlrpc_req_finished(req
);
849 if (it_disposition(&it
, DISP_LOOKUP_NEG
)) {
854 rc
= it_open_error(DISP_OPEN_OPEN
, &it
);
858 LASSERT(it_disposition(&it
, DISP_ENQ_OPEN_REF
));
859 ll_och_fill(sbi
->ll_md_exp
, &it
, och
);
861 if (!it_disposition(&it
, DISP_OPEN_LEASE
)) /* old server? */ {
866 /* already get lease, handle lease lock */
867 ll_set_lock_data(sbi
->ll_md_exp
, inode
, &it
, NULL
);
868 if (it
.it_lock_mode
== 0 ||
869 it
.it_lock_bits
!= MDS_INODELOCK_OPEN
) {
870 /* open lock must return for lease */
871 CERROR(DFID
"lease granted but no open lock, %d/%llu.\n",
872 PFID(ll_inode2fid(inode
)), it
.it_lock_mode
,
878 ll_intent_release(&it
);
882 /* Cancel open lock */
883 if (it
.it_lock_mode
!= 0) {
884 ldlm_lock_decref_and_cancel(&och
->och_lease_handle
,
887 och
->och_lease_handle
.cookie
= 0ULL;
889 rc2
= ll_close_inode_openhandle(sbi
->ll_md_exp
, inode
, och
, NULL
);
891 CERROR("%s: error closing file "DFID
": %d\n",
892 ll_get_fsname(inode
->i_sb
, NULL
, 0),
893 PFID(&ll_i2info(inode
)->lli_fid
), rc2
);
894 och
= NULL
; /* och has been freed in ll_close_inode_openhandle() */
896 ll_intent_release(&it
);
903 * Release lease and close the file.
904 * It will check if the lease has ever broken.
906 static int ll_lease_close(struct obd_client_handle
*och
, struct inode
*inode
,
909 struct ldlm_lock
*lock
;
910 bool cancelled
= true;
913 lock
= ldlm_handle2lock(&och
->och_lease_handle
);
915 lock_res_and_lock(lock
);
916 cancelled
= ldlm_is_cancel(lock
);
917 unlock_res_and_lock(lock
);
921 CDEBUG(D_INODE
, "lease for " DFID
" broken? %d\n",
922 PFID(&ll_i2info(inode
)->lli_fid
), cancelled
);
925 ldlm_cli_cancel(&och
->och_lease_handle
, 0);
927 *lease_broken
= cancelled
;
929 rc
= ll_close_inode_openhandle(ll_i2sbi(inode
)->ll_md_exp
, inode
, och
,
934 /* Fills the obdo with the attributes for the lsm */
935 static int ll_lsm_getattr(struct lov_stripe_md
*lsm
, struct obd_export
*exp
,
936 struct obdo
*obdo
, __u64 ioepoch
, int dv_flags
)
938 struct ptlrpc_request_set
*set
;
939 struct obd_info oinfo
= { };
946 oinfo
.oi_oa
->o_oi
= lsm
->lsm_oi
;
947 oinfo
.oi_oa
->o_mode
= S_IFREG
;
948 oinfo
.oi_oa
->o_ioepoch
= ioepoch
;
949 oinfo
.oi_oa
->o_valid
= OBD_MD_FLID
| OBD_MD_FLTYPE
|
950 OBD_MD_FLSIZE
| OBD_MD_FLBLOCKS
|
951 OBD_MD_FLBLKSZ
| OBD_MD_FLATIME
|
952 OBD_MD_FLMTIME
| OBD_MD_FLCTIME
|
953 OBD_MD_FLGROUP
| OBD_MD_FLEPOCH
|
954 OBD_MD_FLDATAVERSION
;
955 if (dv_flags
& (LL_DV_WR_FLUSH
| LL_DV_RD_FLUSH
)) {
956 oinfo
.oi_oa
->o_valid
|= OBD_MD_FLFLAGS
;
957 oinfo
.oi_oa
->o_flags
|= OBD_FL_SRVLOCK
;
958 if (dv_flags
& LL_DV_WR_FLUSH
)
959 oinfo
.oi_oa
->o_flags
|= OBD_FL_FLUSH
;
962 set
= ptlrpc_prep_set();
964 CERROR("cannot allocate ptlrpc set: rc = %d\n", -ENOMEM
);
967 rc
= obd_getattr_async(exp
, &oinfo
, set
);
969 rc
= ptlrpc_set_wait(set
);
970 ptlrpc_set_destroy(set
);
973 oinfo
.oi_oa
->o_valid
&= (OBD_MD_FLBLOCKS
| OBD_MD_FLBLKSZ
|
974 OBD_MD_FLATIME
| OBD_MD_FLMTIME
|
975 OBD_MD_FLCTIME
| OBD_MD_FLSIZE
|
976 OBD_MD_FLDATAVERSION
| OBD_MD_FLFLAGS
);
977 if (dv_flags
& LL_DV_WR_FLUSH
&&
978 !(oinfo
.oi_oa
->o_valid
& OBD_MD_FLFLAGS
&&
979 oinfo
.oi_oa
->o_flags
& OBD_FL_FLUSH
))
986 * Performs the getattr on the inode and updates its fields.
987 * If @sync != 0, perform the getattr under the server-side lock.
989 int ll_inode_getattr(struct inode
*inode
, struct obdo
*obdo
,
990 __u64 ioepoch
, int sync
)
992 struct lov_stripe_md
*lsm
;
995 lsm
= ccc_inode_lsm_get(inode
);
996 rc
= ll_lsm_getattr(lsm
, ll_i2dtexp(inode
),
997 obdo
, ioepoch
, sync
? LL_DV_RD_FLUSH
: 0);
999 struct ost_id
*oi
= lsm
? &lsm
->lsm_oi
: &obdo
->o_oi
;
1001 obdo_refresh_inode(inode
, obdo
, obdo
->o_valid
);
1002 CDEBUG(D_INODE
, "objid " DOSTID
" size %llu, blocks %llu, blksize %lu\n",
1003 POSTID(oi
), i_size_read(inode
),
1004 (unsigned long long)inode
->i_blocks
,
1005 1UL << inode
->i_blkbits
);
1007 ccc_inode_lsm_put(inode
, lsm
);
1011 int ll_merge_attr(const struct lu_env
*env
, struct inode
*inode
)
1013 struct ll_inode_info
*lli
= ll_i2info(inode
);
1014 struct cl_object
*obj
= lli
->lli_clob
;
1015 struct cl_attr
*attr
= vvp_env_thread_attr(env
);
1021 ll_inode_size_lock(inode
);
1023 /* merge timestamps the most recently obtained from mds with
1024 * timestamps obtained from osts
1026 LTIME_S(inode
->i_atime
) = lli
->lli_atime
;
1027 LTIME_S(inode
->i_mtime
) = lli
->lli_mtime
;
1028 LTIME_S(inode
->i_ctime
) = lli
->lli_ctime
;
1030 mtime
= LTIME_S(inode
->i_mtime
);
1031 atime
= LTIME_S(inode
->i_atime
);
1032 ctime
= LTIME_S(inode
->i_ctime
);
1034 cl_object_attr_lock(obj
);
1035 rc
= cl_object_attr_get(env
, obj
, attr
);
1036 cl_object_attr_unlock(obj
);
1039 goto out_size_unlock
;
1041 if (atime
< attr
->cat_atime
)
1042 atime
= attr
->cat_atime
;
1044 if (ctime
< attr
->cat_ctime
)
1045 ctime
= attr
->cat_ctime
;
1047 if (mtime
< attr
->cat_mtime
)
1048 mtime
= attr
->cat_mtime
;
1050 CDEBUG(D_VFSTRACE
, DFID
" updating i_size %llu\n",
1051 PFID(&lli
->lli_fid
), attr
->cat_size
);
1053 i_size_write(inode
, attr
->cat_size
);
1055 inode
->i_blocks
= attr
->cat_blocks
;
1057 LTIME_S(inode
->i_mtime
) = mtime
;
1058 LTIME_S(inode
->i_atime
) = atime
;
1059 LTIME_S(inode
->i_ctime
) = ctime
;
1062 ll_inode_size_unlock(inode
);
1067 int ll_glimpse_ioctl(struct ll_sb_info
*sbi
, struct lov_stripe_md
*lsm
,
1070 struct obdo obdo
= { 0 };
1073 rc
= ll_lsm_getattr(lsm
, sbi
->ll_dt_exp
, &obdo
, 0, 0);
1075 st
->st_size
= obdo
.o_size
;
1076 st
->st_blocks
= obdo
.o_blocks
;
1077 st
->st_mtime
= obdo
.o_mtime
;
1078 st
->st_atime
= obdo
.o_atime
;
1079 st
->st_ctime
= obdo
.o_ctime
;
1084 static bool file_is_noatime(const struct file
*file
)
1086 const struct vfsmount
*mnt
= file
->f_path
.mnt
;
1087 const struct inode
*inode
= file_inode(file
);
1089 /* Adapted from file_accessed() and touch_atime().*/
1090 if (file
->f_flags
& O_NOATIME
)
1093 if (inode
->i_flags
& S_NOATIME
)
1096 if (IS_NOATIME(inode
))
1099 if (mnt
->mnt_flags
& (MNT_NOATIME
| MNT_READONLY
))
1102 if ((mnt
->mnt_flags
& MNT_NODIRATIME
) && S_ISDIR(inode
->i_mode
))
1105 if ((inode
->i_sb
->s_flags
& MS_NODIRATIME
) && S_ISDIR(inode
->i_mode
))
1111 void ll_io_init(struct cl_io
*io
, const struct file
*file
, int write
)
1113 struct inode
*inode
= file_inode(file
);
1115 io
->u
.ci_rw
.crw_nonblock
= file
->f_flags
& O_NONBLOCK
;
1117 io
->u
.ci_wr
.wr_append
= !!(file
->f_flags
& O_APPEND
);
1118 io
->u
.ci_wr
.wr_sync
= file
->f_flags
& O_SYNC
||
1119 file
->f_flags
& O_DIRECT
||
1122 io
->ci_obj
= ll_i2info(inode
)->lli_clob
;
1123 io
->ci_lockreq
= CILR_MAYBE
;
1124 if (ll_file_nolock(file
)) {
1125 io
->ci_lockreq
= CILR_NEVER
;
1126 io
->ci_no_srvlock
= 1;
1127 } else if (file
->f_flags
& O_APPEND
) {
1128 io
->ci_lockreq
= CILR_MANDATORY
;
1131 io
->ci_noatime
= file_is_noatime(file
);
1135 ll_file_io_generic(const struct lu_env
*env
, struct vvp_io_args
*args
,
1136 struct file
*file
, enum cl_io_type iot
,
1137 loff_t
*ppos
, size_t count
)
1139 struct ll_inode_info
*lli
= ll_i2info(file_inode(file
));
1140 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
1144 CDEBUG(D_VFSTRACE
, "file: %s, type: %d ppos: %llu, count: %zd\n",
1145 file
->f_path
.dentry
->d_name
.name
, iot
, *ppos
, count
);
1148 io
= vvp_env_thread_io(env
);
1149 ll_io_init(io
, file
, iot
== CIT_WRITE
);
1151 if (cl_io_rw_init(env
, io
, iot
, *ppos
, count
) == 0) {
1152 struct vvp_io
*vio
= vvp_env_io(env
);
1153 int write_mutex_locked
= 0;
1155 vio
->vui_fd
= LUSTRE_FPRIVATE(file
);
1156 vio
->vui_io_subtype
= args
->via_io_subtype
;
1158 switch (vio
->vui_io_subtype
) {
1160 vio
->vui_iter
= args
->u
.normal
.via_iter
;
1161 vio
->vui_iocb
= args
->u
.normal
.via_iocb
;
1162 if ((iot
== CIT_WRITE
) &&
1163 !(vio
->vui_fd
->fd_flags
& LL_FILE_GROUP_LOCKED
)) {
1164 if (mutex_lock_interruptible(&lli
->
1166 result
= -ERESTARTSYS
;
1169 write_mutex_locked
= 1;
1171 down_read(&lli
->lli_trunc_sem
);
1174 vio
->u
.splice
.vui_pipe
= args
->u
.splice
.via_pipe
;
1175 vio
->u
.splice
.vui_flags
= args
->u
.splice
.via_flags
;
1178 CERROR("Unknown IO type - %u\n", vio
->vui_io_subtype
);
1181 ll_cl_add(file
, env
, io
);
1182 result
= cl_io_loop(env
, io
);
1183 ll_cl_remove(file
, env
);
1184 if (args
->via_io_subtype
== IO_NORMAL
)
1185 up_read(&lli
->lli_trunc_sem
);
1186 if (write_mutex_locked
)
1187 mutex_unlock(&lli
->lli_write_mutex
);
1189 /* cl_io_rw_init() handled IO */
1190 result
= io
->ci_result
;
1193 if (io
->ci_nob
> 0) {
1194 result
= io
->ci_nob
;
1195 *ppos
= io
->u
.ci_wr
.wr
.crw_pos
;
1199 cl_io_fini(env
, io
);
1200 /* If any bit been read/written (result != 0), we just return
1201 * short read/write instead of restart io.
1203 if ((result
== 0 || result
== -ENODATA
) && io
->ci_need_restart
) {
1204 CDEBUG(D_VFSTRACE
, "Restart %s on %pD from %lld, count:%zd\n",
1205 iot
== CIT_READ
? "read" : "write",
1206 file
, *ppos
, count
);
1207 LASSERTF(io
->ci_nob
== 0, "%zd\n", io
->ci_nob
);
1211 if (iot
== CIT_READ
) {
1213 ll_stats_ops_tally(ll_i2sbi(file_inode(file
)),
1214 LPROC_LL_READ_BYTES
, result
);
1215 } else if (iot
== CIT_WRITE
) {
1217 ll_stats_ops_tally(ll_i2sbi(file_inode(file
)),
1218 LPROC_LL_WRITE_BYTES
, result
);
1219 fd
->fd_write_failed
= false;
1220 } else if (result
!= -ERESTARTSYS
) {
1221 fd
->fd_write_failed
= true;
1224 CDEBUG(D_VFSTRACE
, "iot: %d, result: %zd\n", iot
, result
);
1229 static ssize_t
ll_file_read_iter(struct kiocb
*iocb
, struct iov_iter
*to
)
1232 struct vvp_io_args
*args
;
1236 env
= cl_env_get(&refcheck
);
1238 return PTR_ERR(env
);
1240 args
= ll_env_args(env
, IO_NORMAL
);
1241 args
->u
.normal
.via_iter
= to
;
1242 args
->u
.normal
.via_iocb
= iocb
;
1244 result
= ll_file_io_generic(env
, args
, iocb
->ki_filp
, CIT_READ
,
1245 &iocb
->ki_pos
, iov_iter_count(to
));
1246 cl_env_put(env
, &refcheck
);
1251 * Write to a file (through the page cache).
1253 static ssize_t
ll_file_write_iter(struct kiocb
*iocb
, struct iov_iter
*from
)
1256 struct vvp_io_args
*args
;
1260 env
= cl_env_get(&refcheck
);
1262 return PTR_ERR(env
);
1264 args
= ll_env_args(env
, IO_NORMAL
);
1265 args
->u
.normal
.via_iter
= from
;
1266 args
->u
.normal
.via_iocb
= iocb
;
1268 result
= ll_file_io_generic(env
, args
, iocb
->ki_filp
, CIT_WRITE
,
1269 &iocb
->ki_pos
, iov_iter_count(from
));
1270 cl_env_put(env
, &refcheck
);
1275 * Send file content (through pagecache) somewhere with helper
1277 static ssize_t
ll_file_splice_read(struct file
*in_file
, loff_t
*ppos
,
1278 struct pipe_inode_info
*pipe
, size_t count
,
1282 struct vvp_io_args
*args
;
1286 env
= cl_env_get(&refcheck
);
1288 return PTR_ERR(env
);
1290 args
= ll_env_args(env
, IO_SPLICE
);
1291 args
->u
.splice
.via_pipe
= pipe
;
1292 args
->u
.splice
.via_flags
= flags
;
1294 result
= ll_file_io_generic(env
, args
, in_file
, CIT_READ
, ppos
, count
);
1295 cl_env_put(env
, &refcheck
);
1299 static int ll_lov_recreate(struct inode
*inode
, struct ost_id
*oi
, u32 ost_idx
)
1301 struct obd_export
*exp
= ll_i2dtexp(inode
);
1302 struct obd_trans_info oti
= { 0 };
1303 struct obdo
*oa
= NULL
;
1306 struct lov_stripe_md
*lsm
= NULL
, *lsm2
;
1308 oa
= kmem_cache_zalloc(obdo_cachep
, GFP_NOFS
);
1312 lsm
= ccc_inode_lsm_get(inode
);
1313 if (!lsm_has_objects(lsm
)) {
1318 lsm_size
= sizeof(*lsm
) + (sizeof(struct lov_oinfo
) *
1319 (lsm
->lsm_stripe_count
));
1321 lsm2
= libcfs_kvzalloc(lsm_size
, GFP_NOFS
);
1328 oa
->o_nlink
= ost_idx
;
1329 oa
->o_flags
|= OBD_FL_RECREATE_OBJS
;
1330 oa
->o_valid
= OBD_MD_FLID
| OBD_MD_FLFLAGS
| OBD_MD_FLGROUP
;
1331 obdo_from_inode(oa
, inode
, OBD_MD_FLTYPE
| OBD_MD_FLATIME
|
1332 OBD_MD_FLMTIME
| OBD_MD_FLCTIME
);
1333 obdo_set_parent_fid(oa
, &ll_i2info(inode
)->lli_fid
);
1334 memcpy(lsm2
, lsm
, lsm_size
);
1335 ll_inode_size_lock(inode
);
1336 rc
= obd_create(NULL
, exp
, oa
, &lsm2
, &oti
);
1337 ll_inode_size_unlock(inode
);
1342 ccc_inode_lsm_put(inode
, lsm
);
1343 kmem_cache_free(obdo_cachep
, oa
);
1347 static int ll_lov_recreate_obj(struct inode
*inode
, unsigned long arg
)
1349 struct ll_recreate_obj ucreat
;
1352 if (!capable(CFS_CAP_SYS_ADMIN
))
1355 if (copy_from_user(&ucreat
, (struct ll_recreate_obj __user
*)arg
,
1359 ostid_set_seq_mdt0(&oi
);
1360 ostid_set_id(&oi
, ucreat
.lrc_id
);
1361 return ll_lov_recreate(inode
, &oi
, ucreat
.lrc_ost_idx
);
1364 static int ll_lov_recreate_fid(struct inode
*inode
, unsigned long arg
)
1370 if (!capable(CFS_CAP_SYS_ADMIN
))
1373 if (copy_from_user(&fid
, (struct lu_fid __user
*)arg
, sizeof(fid
)))
1376 fid_to_ostid(&fid
, &oi
);
1377 ost_idx
= (fid_seq(&fid
) >> 16) & 0xffff;
1378 return ll_lov_recreate(inode
, &oi
, ost_idx
);
1381 int ll_lov_setstripe_ea_info(struct inode
*inode
, struct dentry
*dentry
,
1382 __u64 flags
, struct lov_user_md
*lum
,
1385 struct lov_stripe_md
*lsm
= NULL
;
1386 struct lookup_intent oit
= {.it_op
= IT_OPEN
, .it_flags
= flags
};
1389 lsm
= ccc_inode_lsm_get(inode
);
1391 ccc_inode_lsm_put(inode
, lsm
);
1392 CDEBUG(D_IOCTL
, "stripe already exists for inode "DFID
"\n",
1393 PFID(ll_inode2fid(inode
)));
1398 ll_inode_size_lock(inode
);
1399 rc
= ll_intent_file_open(dentry
, lum
, lum_size
, &oit
);
1406 ll_release_openhandle(inode
, &oit
);
1409 ll_inode_size_unlock(inode
);
1410 ll_intent_release(&oit
);
1411 ccc_inode_lsm_put(inode
, lsm
);
1415 ptlrpc_req_finished((struct ptlrpc_request
*)oit
.it_request
);
1419 int ll_lov_getstripe_ea_info(struct inode
*inode
, const char *filename
,
1420 struct lov_mds_md
**lmmp
, int *lmm_size
,
1421 struct ptlrpc_request
**request
)
1423 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
1424 struct mdt_body
*body
;
1425 struct lov_mds_md
*lmm
= NULL
;
1426 struct ptlrpc_request
*req
= NULL
;
1427 struct md_op_data
*op_data
;
1430 rc
= ll_get_default_mdsize(sbi
, &lmmsize
);
1434 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, filename
,
1435 strlen(filename
), lmmsize
,
1436 LUSTRE_OPC_ANY
, NULL
);
1437 if (IS_ERR(op_data
))
1438 return PTR_ERR(op_data
);
1440 op_data
->op_valid
= OBD_MD_FLEASIZE
| OBD_MD_FLDIREA
;
1441 rc
= md_getattr_name(sbi
->ll_md_exp
, op_data
, &req
);
1442 ll_finish_md_op_data(op_data
);
1444 CDEBUG(D_INFO
, "md_getattr_name failed on %s: rc %d\n",
1449 body
= req_capsule_server_get(&req
->rq_pill
, &RMF_MDT_BODY
);
1451 lmmsize
= body
->eadatasize
;
1453 if (!(body
->valid
& (OBD_MD_FLEASIZE
| OBD_MD_FLDIREA
)) ||
1459 lmm
= req_capsule_server_sized_get(&req
->rq_pill
, &RMF_MDT_MD
, lmmsize
);
1461 if ((lmm
->lmm_magic
!= cpu_to_le32(LOV_MAGIC_V1
)) &&
1462 (lmm
->lmm_magic
!= cpu_to_le32(LOV_MAGIC_V3
))) {
1468 * This is coming from the MDS, so is probably in
1469 * little endian. We convert it to host endian before
1470 * passing it to userspace.
1472 if (cpu_to_le32(LOV_MAGIC
) != LOV_MAGIC
) {
1475 stripe_count
= le16_to_cpu(lmm
->lmm_stripe_count
);
1476 if (le32_to_cpu(lmm
->lmm_pattern
) & LOV_PATTERN_F_RELEASED
)
1479 /* if function called for directory - we should
1480 * avoid swab not existent lsm objects
1482 if (lmm
->lmm_magic
== cpu_to_le32(LOV_MAGIC_V1
)) {
1483 lustre_swab_lov_user_md_v1((struct lov_user_md_v1
*)lmm
);
1484 if (S_ISREG(body
->mode
))
1485 lustre_swab_lov_user_md_objects(
1486 ((struct lov_user_md_v1
*)lmm
)->lmm_objects
,
1488 } else if (lmm
->lmm_magic
== cpu_to_le32(LOV_MAGIC_V3
)) {
1489 lustre_swab_lov_user_md_v3((struct lov_user_md_v3
*)lmm
);
1490 if (S_ISREG(body
->mode
))
1491 lustre_swab_lov_user_md_objects(
1492 ((struct lov_user_md_v3
*)lmm
)->lmm_objects
,
1499 *lmm_size
= lmmsize
;
1504 static int ll_lov_setea(struct inode
*inode
, struct file
*file
,
1507 __u64 flags
= MDS_OPEN_HAS_OBJS
| FMODE_WRITE
;
1508 struct lov_user_md
*lump
;
1509 int lum_size
= sizeof(struct lov_user_md
) +
1510 sizeof(struct lov_user_ost_data
);
1513 if (!capable(CFS_CAP_SYS_ADMIN
))
1516 lump
= libcfs_kvzalloc(lum_size
, GFP_NOFS
);
1520 if (copy_from_user(lump
, (struct lov_user_md __user
*)arg
, lum_size
)) {
1525 rc
= ll_lov_setstripe_ea_info(inode
, file
->f_path
.dentry
, flags
, lump
,
1527 cl_lov_delay_create_clear(&file
->f_flags
);
1533 static int ll_lov_setstripe(struct inode
*inode
, struct file
*file
,
1536 struct lov_user_md_v3 lumv3
;
1537 struct lov_user_md_v1
*lumv1
= (struct lov_user_md_v1
*)&lumv3
;
1538 struct lov_user_md_v1 __user
*lumv1p
= (void __user
*)arg
;
1539 struct lov_user_md_v3 __user
*lumv3p
= (void __user
*)arg
;
1541 __u64 flags
= FMODE_WRITE
;
1543 /* first try with v1 which is smaller than v3 */
1544 lum_size
= sizeof(struct lov_user_md_v1
);
1545 if (copy_from_user(lumv1
, lumv1p
, lum_size
))
1548 if (lumv1
->lmm_magic
== LOV_USER_MAGIC_V3
) {
1549 lum_size
= sizeof(struct lov_user_md_v3
);
1550 if (copy_from_user(&lumv3
, lumv3p
, lum_size
))
1554 rc
= ll_lov_setstripe_ea_info(inode
, file
->f_path
.dentry
, flags
, lumv1
,
1556 cl_lov_delay_create_clear(&file
->f_flags
);
1558 struct lov_stripe_md
*lsm
;
1561 put_user(0, &lumv1p
->lmm_stripe_count
);
1563 ll_layout_refresh(inode
, &gen
);
1564 lsm
= ccc_inode_lsm_get(inode
);
1565 rc
= obd_iocontrol(LL_IOC_LOV_GETSTRIPE
, ll_i2dtexp(inode
),
1566 0, lsm
, (void __user
*)arg
);
1567 ccc_inode_lsm_put(inode
, lsm
);
1572 static int ll_lov_getstripe(struct inode
*inode
, unsigned long arg
)
1574 struct lov_stripe_md
*lsm
;
1577 lsm
= ccc_inode_lsm_get(inode
);
1579 rc
= obd_iocontrol(LL_IOC_LOV_GETSTRIPE
, ll_i2dtexp(inode
), 0,
1580 lsm
, (void __user
*)arg
);
1581 ccc_inode_lsm_put(inode
, lsm
);
1586 ll_get_grouplock(struct inode
*inode
, struct file
*file
, unsigned long arg
)
1588 struct ll_inode_info
*lli
= ll_i2info(inode
);
1589 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
1590 struct ll_grouplock grouplock
;
1594 CWARN("group id for group lock must not be 0\n");
1598 if (ll_file_nolock(file
))
1601 spin_lock(&lli
->lli_lock
);
1602 if (fd
->fd_flags
& LL_FILE_GROUP_LOCKED
) {
1603 CWARN("group lock already existed with gid %lu\n",
1604 fd
->fd_grouplock
.lg_gid
);
1605 spin_unlock(&lli
->lli_lock
);
1608 LASSERT(!fd
->fd_grouplock
.lg_lock
);
1609 spin_unlock(&lli
->lli_lock
);
1611 rc
= cl_get_grouplock(ll_i2info(inode
)->lli_clob
,
1612 arg
, (file
->f_flags
& O_NONBLOCK
), &grouplock
);
1616 spin_lock(&lli
->lli_lock
);
1617 if (fd
->fd_flags
& LL_FILE_GROUP_LOCKED
) {
1618 spin_unlock(&lli
->lli_lock
);
1619 CERROR("another thread just won the race\n");
1620 cl_put_grouplock(&grouplock
);
1624 fd
->fd_flags
|= LL_FILE_GROUP_LOCKED
;
1625 fd
->fd_grouplock
= grouplock
;
1626 spin_unlock(&lli
->lli_lock
);
1628 CDEBUG(D_INFO
, "group lock %lu obtained\n", arg
);
1632 static int ll_put_grouplock(struct inode
*inode
, struct file
*file
,
1635 struct ll_inode_info
*lli
= ll_i2info(inode
);
1636 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
1637 struct ll_grouplock grouplock
;
1639 spin_lock(&lli
->lli_lock
);
1640 if (!(fd
->fd_flags
& LL_FILE_GROUP_LOCKED
)) {
1641 spin_unlock(&lli
->lli_lock
);
1642 CWARN("no group lock held\n");
1645 LASSERT(fd
->fd_grouplock
.lg_lock
);
1647 if (fd
->fd_grouplock
.lg_gid
!= arg
) {
1648 CWARN("group lock %lu doesn't match current id %lu\n",
1649 arg
, fd
->fd_grouplock
.lg_gid
);
1650 spin_unlock(&lli
->lli_lock
);
1654 grouplock
= fd
->fd_grouplock
;
1655 memset(&fd
->fd_grouplock
, 0, sizeof(fd
->fd_grouplock
));
1656 fd
->fd_flags
&= ~LL_FILE_GROUP_LOCKED
;
1657 spin_unlock(&lli
->lli_lock
);
1659 cl_put_grouplock(&grouplock
);
1660 CDEBUG(D_INFO
, "group lock %lu released\n", arg
);
1665 * Close inode open handle
1667 * \param inode [in] inode in question
1668 * \param it [in,out] intent which contains open info and result
1671 * \retval <0 failure
1673 int ll_release_openhandle(struct inode
*inode
, struct lookup_intent
*it
)
1675 struct obd_client_handle
*och
;
1680 /* Root ? Do nothing. */
1681 if (is_root_inode(inode
))
1684 /* No open handle to close? Move away */
1685 if (!it_disposition(it
, DISP_OPEN_OPEN
))
1688 LASSERT(it_open_error(DISP_OPEN_OPEN
, it
) == 0);
1690 och
= kzalloc(sizeof(*och
), GFP_NOFS
);
1696 ll_och_fill(ll_i2sbi(inode
)->ll_md_exp
, it
, och
);
1698 rc
= ll_close_inode_openhandle(ll_i2sbi(inode
)->ll_md_exp
,
1701 /* this one is in place of ll_file_open */
1702 if (it_disposition(it
, DISP_ENQ_OPEN_REF
)) {
1703 ptlrpc_req_finished(it
->it_request
);
1704 it_clear_disposition(it
, DISP_ENQ_OPEN_REF
);
1710 * Get size for inode for which FIEMAP mapping is requested.
1711 * Make the FIEMAP get_info call and returns the result.
1713 static int ll_do_fiemap(struct inode
*inode
, struct ll_user_fiemap
*fiemap
,
1716 struct obd_export
*exp
= ll_i2dtexp(inode
);
1717 struct lov_stripe_md
*lsm
= NULL
;
1718 struct ll_fiemap_info_key fm_key
= { .name
= KEY_FIEMAP
, };
1719 __u32 vallen
= num_bytes
;
1722 /* Checks for fiemap flags */
1723 if (fiemap
->fm_flags
& ~LUSTRE_FIEMAP_FLAGS_COMPAT
) {
1724 fiemap
->fm_flags
&= ~LUSTRE_FIEMAP_FLAGS_COMPAT
;
1728 /* Check for FIEMAP_FLAG_SYNC */
1729 if (fiemap
->fm_flags
& FIEMAP_FLAG_SYNC
) {
1730 rc
= filemap_fdatawrite(inode
->i_mapping
);
1735 lsm
= ccc_inode_lsm_get(inode
);
1739 /* If the stripe_count > 1 and the application does not understand
1740 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1742 if (lsm
->lsm_stripe_count
> 1 &&
1743 !(fiemap
->fm_flags
& FIEMAP_FLAG_DEVICE_ORDER
)) {
1748 fm_key
.oa
.o_oi
= lsm
->lsm_oi
;
1749 fm_key
.oa
.o_valid
= OBD_MD_FLID
| OBD_MD_FLGROUP
;
1751 if (i_size_read(inode
) == 0) {
1752 rc
= ll_glimpse_size(inode
);
1757 obdo_from_inode(&fm_key
.oa
, inode
, OBD_MD_FLSIZE
);
1758 obdo_set_parent_fid(&fm_key
.oa
, &ll_i2info(inode
)->lli_fid
);
1759 /* If filesize is 0, then there would be no objects for mapping */
1760 if (fm_key
.oa
.o_size
== 0) {
1761 fiemap
->fm_mapped_extents
= 0;
1766 memcpy(&fm_key
.fiemap
, fiemap
, sizeof(*fiemap
));
1768 rc
= obd_get_info(NULL
, exp
, sizeof(fm_key
), &fm_key
, &vallen
,
1771 CERROR("obd_get_info failed: rc = %d\n", rc
);
1774 ccc_inode_lsm_put(inode
, lsm
);
1778 int ll_fid2path(struct inode
*inode
, void __user
*arg
)
1780 struct obd_export
*exp
= ll_i2mdexp(inode
);
1781 const struct getinfo_fid2path __user
*gfin
= arg
;
1782 struct getinfo_fid2path
*gfout
;
1787 if (!capable(CFS_CAP_DAC_READ_SEARCH
) &&
1788 !(ll_i2sbi(inode
)->ll_flags
& LL_SBI_USER_FID2PATH
))
1791 /* Only need to get the buflen */
1792 if (get_user(pathlen
, &gfin
->gf_pathlen
))
1795 if (pathlen
> PATH_MAX
)
1798 outsize
= sizeof(*gfout
) + pathlen
;
1800 gfout
= kzalloc(outsize
, GFP_NOFS
);
1804 if (copy_from_user(gfout
, arg
, sizeof(*gfout
))) {
1809 /* Call mdc_iocontrol */
1810 rc
= obd_iocontrol(OBD_IOC_FID2PATH
, exp
, outsize
, gfout
, NULL
);
1814 if (copy_to_user(arg
, gfout
, outsize
))
1822 static int ll_ioctl_fiemap(struct inode
*inode
, unsigned long arg
)
1824 struct ll_user_fiemap
*fiemap_s
;
1825 size_t num_bytes
, ret_bytes
;
1826 unsigned int extent_count
;
1829 /* Get the extent count so we can calculate the size of
1830 * required fiemap buffer
1832 if (get_user(extent_count
,
1833 &((struct ll_user_fiemap __user
*)arg
)->fm_extent_count
))
1837 (SIZE_MAX
- sizeof(*fiemap_s
)) / sizeof(struct ll_fiemap_extent
))
1839 num_bytes
= sizeof(*fiemap_s
) + (extent_count
*
1840 sizeof(struct ll_fiemap_extent
));
1842 fiemap_s
= libcfs_kvzalloc(num_bytes
, GFP_NOFS
);
1846 /* get the fiemap value */
1847 if (copy_from_user(fiemap_s
, (struct ll_user_fiemap __user
*)arg
,
1848 sizeof(*fiemap_s
))) {
1853 /* If fm_extent_count is non-zero, read the first extent since
1854 * it is used to calculate end_offset and device from previous
1858 if (copy_from_user(&fiemap_s
->fm_extents
[0],
1859 (char __user
*)arg
+ sizeof(*fiemap_s
),
1860 sizeof(struct ll_fiemap_extent
))) {
1866 rc
= ll_do_fiemap(inode
, fiemap_s
, num_bytes
);
1870 ret_bytes
= sizeof(struct ll_user_fiemap
);
1872 if (extent_count
!= 0)
1873 ret_bytes
+= (fiemap_s
->fm_mapped_extents
*
1874 sizeof(struct ll_fiemap_extent
));
1876 if (copy_to_user((void __user
*)arg
, fiemap_s
, ret_bytes
))
1885 * Read the data_version for inode.
1887 * This value is computed using stripe object version on OST.
1888 * Version is computed using server side locking.
1890 * @param sync if do sync on the OST side;
1892 * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
1893 * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
1895 int ll_data_version(struct inode
*inode
, __u64
*data_version
, int flags
)
1897 struct lov_stripe_md
*lsm
= NULL
;
1898 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
1899 struct obdo
*obdo
= NULL
;
1902 /* If no stripe, we consider version is 0. */
1903 lsm
= ccc_inode_lsm_get(inode
);
1904 if (!lsm_has_objects(lsm
)) {
1906 CDEBUG(D_INODE
, "No object for inode\n");
1911 obdo
= kzalloc(sizeof(*obdo
), GFP_NOFS
);
1917 rc
= ll_lsm_getattr(lsm
, sbi
->ll_dt_exp
, obdo
, 0, flags
);
1919 if (!(obdo
->o_valid
& OBD_MD_FLDATAVERSION
))
1922 *data_version
= obdo
->o_data_version
;
1927 ccc_inode_lsm_put(inode
, lsm
);
1932 * Trigger a HSM release request for the provided inode.
1934 int ll_hsm_release(struct inode
*inode
)
1936 struct cl_env_nest nest
;
1938 struct obd_client_handle
*och
= NULL
;
1939 __u64 data_version
= 0;
1942 CDEBUG(D_INODE
, "%s: Releasing file "DFID
".\n",
1943 ll_get_fsname(inode
->i_sb
, NULL
, 0),
1944 PFID(&ll_i2info(inode
)->lli_fid
));
1946 och
= ll_lease_open(inode
, NULL
, FMODE_WRITE
, MDS_OPEN_RELEASE
);
1952 /* Grab latest data_version and [am]time values */
1953 rc
= ll_data_version(inode
, &data_version
, LL_DV_WR_FLUSH
);
1957 env
= cl_env_nested_get(&nest
);
1963 ll_merge_attr(env
, inode
);
1964 cl_env_nested_put(&nest
, env
);
1966 /* Release the file.
1967 * NB: lease lock handle is released in mdc_hsm_release_pack() because
1968 * we still need it to pack l_remote_handle to MDT.
1970 rc
= ll_close_inode_openhandle(ll_i2sbi(inode
)->ll_md_exp
, inode
, och
,
1975 if (och
&& !IS_ERR(och
)) /* close the file */
1976 ll_lease_close(och
, inode
, NULL
);
1981 struct ll_swap_stack
{
1982 struct iattr ia1
, ia2
;
1984 struct inode
*inode1
, *inode2
;
1985 bool check_dv1
, check_dv2
;
1988 static int ll_swap_layouts(struct file
*file1
, struct file
*file2
,
1989 struct lustre_swap_layouts
*lsl
)
1991 struct mdc_swap_layouts msl
;
1992 struct md_op_data
*op_data
;
1995 struct ll_swap_stack
*llss
= NULL
;
1998 llss
= kzalloc(sizeof(*llss
), GFP_NOFS
);
2002 llss
->inode1
= file_inode(file1
);
2003 llss
->inode2
= file_inode(file2
);
2005 if (!S_ISREG(llss
->inode2
->i_mode
)) {
2010 if (inode_permission(llss
->inode1
, MAY_WRITE
) ||
2011 inode_permission(llss
->inode2
, MAY_WRITE
)) {
2016 if (llss
->inode2
->i_sb
!= llss
->inode1
->i_sb
) {
2021 /* we use 2 bool because it is easier to swap than 2 bits */
2022 if (lsl
->sl_flags
& SWAP_LAYOUTS_CHECK_DV1
)
2023 llss
->check_dv1
= true;
2025 if (lsl
->sl_flags
& SWAP_LAYOUTS_CHECK_DV2
)
2026 llss
->check_dv2
= true;
2028 /* we cannot use lsl->sl_dvX directly because we may swap them */
2029 llss
->dv1
= lsl
->sl_dv1
;
2030 llss
->dv2
= lsl
->sl_dv2
;
2032 rc
= lu_fid_cmp(ll_inode2fid(llss
->inode1
), ll_inode2fid(llss
->inode2
));
2033 if (rc
== 0) /* same file, done! */ {
2038 if (rc
< 0) { /* sequentialize it */
2039 swap(llss
->inode1
, llss
->inode2
);
2041 swap(llss
->dv1
, llss
->dv2
);
2042 swap(llss
->check_dv1
, llss
->check_dv2
);
2046 if (gid
!= 0) { /* application asks to flush dirty cache */
2047 rc
= ll_get_grouplock(llss
->inode1
, file1
, gid
);
2051 rc
= ll_get_grouplock(llss
->inode2
, file2
, gid
);
2053 ll_put_grouplock(llss
->inode1
, file1
, gid
);
2058 /* to be able to restore mtime and atime after swap
2059 * we need to first save them
2062 (SWAP_LAYOUTS_KEEP_MTIME
| SWAP_LAYOUTS_KEEP_ATIME
)) {
2063 llss
->ia1
.ia_mtime
= llss
->inode1
->i_mtime
;
2064 llss
->ia1
.ia_atime
= llss
->inode1
->i_atime
;
2065 llss
->ia1
.ia_valid
= ATTR_MTIME
| ATTR_ATIME
;
2066 llss
->ia2
.ia_mtime
= llss
->inode2
->i_mtime
;
2067 llss
->ia2
.ia_atime
= llss
->inode2
->i_atime
;
2068 llss
->ia2
.ia_valid
= ATTR_MTIME
| ATTR_ATIME
;
2071 /* ultimate check, before swapping the layouts we check if
2072 * dataversion has changed (if requested)
2074 if (llss
->check_dv1
) {
2075 rc
= ll_data_version(llss
->inode1
, &dv
, 0);
2078 if (dv
!= llss
->dv1
) {
2084 if (llss
->check_dv2
) {
2085 rc
= ll_data_version(llss
->inode2
, &dv
, 0);
2088 if (dv
!= llss
->dv2
) {
2094 /* struct md_op_data is used to send the swap args to the mdt
2095 * only flags is missing, so we use struct mdc_swap_layouts
2096 * through the md_op_data->op_data
2098 /* flags from user space have to be converted before they are send to
2099 * server, no flag is sent today, they are only used on the client
2103 op_data
= ll_prep_md_op_data(NULL
, llss
->inode1
, llss
->inode2
, NULL
, 0,
2104 0, LUSTRE_OPC_ANY
, &msl
);
2105 if (IS_ERR(op_data
)) {
2106 rc
= PTR_ERR(op_data
);
2110 rc
= obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS
, ll_i2mdexp(llss
->inode1
),
2111 sizeof(*op_data
), op_data
, NULL
);
2112 ll_finish_md_op_data(op_data
);
2116 ll_put_grouplock(llss
->inode2
, file2
, gid
);
2117 ll_put_grouplock(llss
->inode1
, file1
, gid
);
2120 /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */
2124 /* clear useless flags */
2125 if (!(lsl
->sl_flags
& SWAP_LAYOUTS_KEEP_MTIME
)) {
2126 llss
->ia1
.ia_valid
&= ~ATTR_MTIME
;
2127 llss
->ia2
.ia_valid
&= ~ATTR_MTIME
;
2130 if (!(lsl
->sl_flags
& SWAP_LAYOUTS_KEEP_ATIME
)) {
2131 llss
->ia1
.ia_valid
&= ~ATTR_ATIME
;
2132 llss
->ia2
.ia_valid
&= ~ATTR_ATIME
;
2135 /* update time if requested */
2137 if (llss
->ia2
.ia_valid
!= 0) {
2138 inode_lock(llss
->inode1
);
2139 rc
= ll_setattr(file1
->f_path
.dentry
, &llss
->ia2
);
2140 inode_unlock(llss
->inode1
);
2143 if (llss
->ia1
.ia_valid
!= 0) {
2146 inode_lock(llss
->inode2
);
2147 rc1
= ll_setattr(file2
->f_path
.dentry
, &llss
->ia1
);
2148 inode_unlock(llss
->inode2
);
2159 static int ll_hsm_state_set(struct inode
*inode
, struct hsm_state_set
*hss
)
2161 struct md_op_data
*op_data
;
2164 /* Detect out-of range masks */
2165 if ((hss
->hss_setmask
| hss
->hss_clearmask
) & ~HSM_FLAGS_MASK
)
2168 /* Non-root users are forbidden to set or clear flags which are
2169 * NOT defined in HSM_USER_MASK.
2171 if (((hss
->hss_setmask
| hss
->hss_clearmask
) & ~HSM_USER_MASK
) &&
2172 !capable(CFS_CAP_SYS_ADMIN
))
2175 /* Detect out-of range archive id */
2176 if ((hss
->hss_valid
& HSS_ARCHIVE_ID
) &&
2177 (hss
->hss_archive_id
> LL_HSM_MAX_ARCHIVE
))
2180 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, NULL
, 0, 0,
2181 LUSTRE_OPC_ANY
, hss
);
2182 if (IS_ERR(op_data
))
2183 return PTR_ERR(op_data
);
2185 rc
= obd_iocontrol(LL_IOC_HSM_STATE_SET
, ll_i2mdexp(inode
),
2186 sizeof(*op_data
), op_data
, NULL
);
2188 ll_finish_md_op_data(op_data
);
2193 static int ll_hsm_import(struct inode
*inode
, struct file
*file
,
2194 struct hsm_user_import
*hui
)
2196 struct hsm_state_set
*hss
= NULL
;
2197 struct iattr
*attr
= NULL
;
2200 if (!S_ISREG(inode
->i_mode
))
2204 hss
= kzalloc(sizeof(*hss
), GFP_NOFS
);
2208 hss
->hss_valid
= HSS_SETMASK
| HSS_ARCHIVE_ID
;
2209 hss
->hss_archive_id
= hui
->hui_archive_id
;
2210 hss
->hss_setmask
= HS_ARCHIVED
| HS_EXISTS
| HS_RELEASED
;
2211 rc
= ll_hsm_state_set(inode
, hss
);
2215 attr
= kzalloc(sizeof(*attr
), GFP_NOFS
);
2221 attr
->ia_mode
= hui
->hui_mode
& (S_IRWXU
| S_IRWXG
| S_IRWXO
);
2222 attr
->ia_mode
|= S_IFREG
;
2223 attr
->ia_uid
= make_kuid(&init_user_ns
, hui
->hui_uid
);
2224 attr
->ia_gid
= make_kgid(&init_user_ns
, hui
->hui_gid
);
2225 attr
->ia_size
= hui
->hui_size
;
2226 attr
->ia_mtime
.tv_sec
= hui
->hui_mtime
;
2227 attr
->ia_mtime
.tv_nsec
= hui
->hui_mtime_ns
;
2228 attr
->ia_atime
.tv_sec
= hui
->hui_atime
;
2229 attr
->ia_atime
.tv_nsec
= hui
->hui_atime_ns
;
2231 attr
->ia_valid
= ATTR_SIZE
| ATTR_MODE
| ATTR_FORCE
|
2232 ATTR_UID
| ATTR_GID
|
2233 ATTR_MTIME
| ATTR_MTIME_SET
|
2234 ATTR_ATIME
| ATTR_ATIME_SET
;
2238 rc
= ll_setattr_raw(file
->f_path
.dentry
, attr
, true);
2242 inode_unlock(inode
);
2251 ll_file_ioctl(struct file
*file
, unsigned int cmd
, unsigned long arg
)
2253 struct inode
*inode
= file_inode(file
);
2254 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
2257 CDEBUG(D_VFSTRACE
, "VFS Op:inode="DFID
"(%p),cmd=%x\n",
2258 PFID(ll_inode2fid(inode
)), inode
, cmd
);
2259 ll_stats_ops_tally(ll_i2sbi(inode
), LPROC_LL_IOCTL
, 1);
2261 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2262 if (_IOC_TYPE(cmd
) == 'T' || _IOC_TYPE(cmd
) == 't') /* tty ioctls */
2266 case LL_IOC_GETFLAGS
:
2267 /* Get the current value of the file flags */
2268 return put_user(fd
->fd_flags
, (int __user
*)arg
);
2269 case LL_IOC_SETFLAGS
:
2270 case LL_IOC_CLRFLAGS
:
2271 /* Set or clear specific file flags */
2272 /* XXX This probably needs checks to ensure the flags are
2273 * not abused, and to handle any flag side effects.
2275 if (get_user(flags
, (int __user
*)arg
))
2278 if (cmd
== LL_IOC_SETFLAGS
) {
2279 if ((flags
& LL_FILE_IGNORE_LOCK
) &&
2280 !(file
->f_flags
& O_DIRECT
)) {
2281 CERROR("%s: unable to disable locking on non-O_DIRECT file\n",
2286 fd
->fd_flags
|= flags
;
2288 fd
->fd_flags
&= ~flags
;
2291 case LL_IOC_LOV_SETSTRIPE
:
2292 return ll_lov_setstripe(inode
, file
, arg
);
2293 case LL_IOC_LOV_SETEA
:
2294 return ll_lov_setea(inode
, file
, arg
);
2295 case LL_IOC_LOV_SWAP_LAYOUTS
: {
2297 struct lustre_swap_layouts lsl
;
2299 if (copy_from_user(&lsl
, (char __user
*)arg
,
2300 sizeof(struct lustre_swap_layouts
)))
2303 if ((file
->f_flags
& O_ACCMODE
) == 0) /* O_RDONLY */
2306 file2
= fget(lsl
.sl_fd
);
2311 if ((file2
->f_flags
& O_ACCMODE
) != 0) /* O_WRONLY or O_RDWR */
2312 rc
= ll_swap_layouts(file
, file2
, &lsl
);
2316 case LL_IOC_LOV_GETSTRIPE
:
2317 return ll_lov_getstripe(inode
, arg
);
2318 case LL_IOC_RECREATE_OBJ
:
2319 return ll_lov_recreate_obj(inode
, arg
);
2320 case LL_IOC_RECREATE_FID
:
2321 return ll_lov_recreate_fid(inode
, arg
);
2322 case FSFILT_IOC_FIEMAP
:
2323 return ll_ioctl_fiemap(inode
, arg
);
2324 case FSFILT_IOC_GETFLAGS
:
2325 case FSFILT_IOC_SETFLAGS
:
2326 return ll_iocontrol(inode
, file
, cmd
, arg
);
2327 case FSFILT_IOC_GETVERSION_OLD
:
2328 case FSFILT_IOC_GETVERSION
:
2329 return put_user(inode
->i_generation
, (int __user
*)arg
);
2330 case LL_IOC_GROUP_LOCK
:
2331 return ll_get_grouplock(inode
, file
, arg
);
2332 case LL_IOC_GROUP_UNLOCK
:
2333 return ll_put_grouplock(inode
, file
, arg
);
2334 case IOC_OBD_STATFS
:
2335 return ll_obd_statfs(inode
, (void __user
*)arg
);
2337 /* We need to special case any other ioctls we want to handle,
2338 * to send them to the MDS/OST as appropriate and to properly
2339 * network encode the arg field.
2340 case FSFILT_IOC_SETVERSION_OLD:
2341 case FSFILT_IOC_SETVERSION:
2343 case LL_IOC_FLUSHCTX
:
2344 return ll_flush_ctx(inode
);
2345 case LL_IOC_PATH2FID
: {
2346 if (copy_to_user((void __user
*)arg
, ll_inode2fid(inode
),
2347 sizeof(struct lu_fid
)))
2352 case OBD_IOC_FID2PATH
:
2353 return ll_fid2path(inode
, (void __user
*)arg
);
2354 case LL_IOC_DATA_VERSION
: {
2355 struct ioc_data_version idv
;
2358 if (copy_from_user(&idv
, (char __user
*)arg
, sizeof(idv
)))
2361 idv
.idv_flags
&= LL_DV_RD_FLUSH
| LL_DV_WR_FLUSH
;
2362 rc
= ll_data_version(inode
, &idv
.idv_version
, idv
.idv_flags
);
2363 if (rc
== 0 && copy_to_user((char __user
*)arg
, &idv
,
2370 case LL_IOC_GET_MDTIDX
: {
2373 mdtidx
= ll_get_mdt_idx(inode
);
2377 if (put_user(mdtidx
, (int __user
*)arg
))
2382 case OBD_IOC_GETDTNAME
:
2383 case OBD_IOC_GETMDNAME
:
2384 return ll_get_obd_name(inode
, cmd
, arg
);
2385 case LL_IOC_HSM_STATE_GET
: {
2386 struct md_op_data
*op_data
;
2387 struct hsm_user_state
*hus
;
2390 hus
= kzalloc(sizeof(*hus
), GFP_NOFS
);
2394 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, NULL
, 0, 0,
2395 LUSTRE_OPC_ANY
, hus
);
2396 if (IS_ERR(op_data
)) {
2398 return PTR_ERR(op_data
);
2401 rc
= obd_iocontrol(cmd
, ll_i2mdexp(inode
), sizeof(*op_data
),
2404 if (copy_to_user((void __user
*)arg
, hus
, sizeof(*hus
)))
2407 ll_finish_md_op_data(op_data
);
2411 case LL_IOC_HSM_STATE_SET
: {
2412 struct hsm_state_set
*hss
;
2415 hss
= memdup_user((char __user
*)arg
, sizeof(*hss
));
2417 return PTR_ERR(hss
);
2419 rc
= ll_hsm_state_set(inode
, hss
);
2424 case LL_IOC_HSM_ACTION
: {
2425 struct md_op_data
*op_data
;
2426 struct hsm_current_action
*hca
;
2429 hca
= kzalloc(sizeof(*hca
), GFP_NOFS
);
2433 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, NULL
, 0, 0,
2434 LUSTRE_OPC_ANY
, hca
);
2435 if (IS_ERR(op_data
)) {
2437 return PTR_ERR(op_data
);
2440 rc
= obd_iocontrol(cmd
, ll_i2mdexp(inode
), sizeof(*op_data
),
2443 if (copy_to_user((char __user
*)arg
, hca
, sizeof(*hca
)))
2446 ll_finish_md_op_data(op_data
);
2450 case LL_IOC_SET_LEASE
: {
2451 struct ll_inode_info
*lli
= ll_i2info(inode
);
2452 struct obd_client_handle
*och
= NULL
;
2458 if (!(file
->f_mode
& FMODE_WRITE
))
2463 if (!(file
->f_mode
& FMODE_READ
))
2468 mutex_lock(&lli
->lli_och_mutex
);
2469 if (fd
->fd_lease_och
) {
2470 och
= fd
->fd_lease_och
;
2471 fd
->fd_lease_och
= NULL
;
2473 mutex_unlock(&lli
->lli_och_mutex
);
2476 mode
= och
->och_flags
&
2477 (FMODE_READ
|FMODE_WRITE
);
2478 rc
= ll_lease_close(och
, inode
, &lease_broken
);
2479 if (rc
== 0 && lease_broken
)
2485 /* return the type of lease or error */
2486 return rc
< 0 ? rc
: (int)mode
;
2491 CDEBUG(D_INODE
, "Set lease with mode %d\n", mode
);
2493 /* apply for lease */
2494 och
= ll_lease_open(inode
, file
, mode
, 0);
2496 return PTR_ERR(och
);
2499 mutex_lock(&lli
->lli_och_mutex
);
2500 if (!fd
->fd_lease_och
) {
2501 fd
->fd_lease_och
= och
;
2504 mutex_unlock(&lli
->lli_och_mutex
);
2506 /* impossible now that only excl is supported for now */
2507 ll_lease_close(och
, inode
, &lease_broken
);
2512 case LL_IOC_GET_LEASE
: {
2513 struct ll_inode_info
*lli
= ll_i2info(inode
);
2514 struct ldlm_lock
*lock
= NULL
;
2517 mutex_lock(&lli
->lli_och_mutex
);
2518 if (fd
->fd_lease_och
) {
2519 struct obd_client_handle
*och
= fd
->fd_lease_och
;
2521 lock
= ldlm_handle2lock(&och
->och_lease_handle
);
2523 lock_res_and_lock(lock
);
2524 if (!ldlm_is_cancel(lock
))
2525 rc
= och
->och_flags
&
2526 (FMODE_READ
| FMODE_WRITE
);
2527 unlock_res_and_lock(lock
);
2528 LDLM_LOCK_PUT(lock
);
2531 mutex_unlock(&lli
->lli_och_mutex
);
2534 case LL_IOC_HSM_IMPORT
: {
2535 struct hsm_user_import
*hui
;
2537 hui
= memdup_user((void __user
*)arg
, sizeof(*hui
));
2539 return PTR_ERR(hui
);
2541 rc
= ll_hsm_import(inode
, file
, hui
);
2549 if (ll_iocontrol_call(inode
, file
, cmd
, arg
, &err
) ==
2553 return obd_iocontrol(cmd
, ll_i2dtexp(inode
), 0, NULL
,
2554 (void __user
*)arg
);
2559 static loff_t
ll_file_seek(struct file
*file
, loff_t offset
, int origin
)
2561 struct inode
*inode
= file_inode(file
);
2562 loff_t retval
, eof
= 0;
2564 retval
= offset
+ ((origin
== SEEK_END
) ? i_size_read(inode
) :
2565 (origin
== SEEK_CUR
) ? file
->f_pos
: 0);
2566 CDEBUG(D_VFSTRACE
, "VFS Op:inode="DFID
"(%p), to=%llu=%#llx(%d)\n",
2567 PFID(ll_inode2fid(inode
)), inode
, retval
, retval
, origin
);
2568 ll_stats_ops_tally(ll_i2sbi(inode
), LPROC_LL_LLSEEK
, 1);
2570 if (origin
== SEEK_END
|| origin
== SEEK_HOLE
|| origin
== SEEK_DATA
) {
2571 retval
= ll_glimpse_size(inode
);
2574 eof
= i_size_read(inode
);
2577 retval
= generic_file_llseek_size(file
, offset
, origin
,
2578 ll_file_maxbytes(inode
), eof
);
2582 static int ll_flush(struct file
*file
, fl_owner_t id
)
2584 struct inode
*inode
= file_inode(file
);
2585 struct ll_inode_info
*lli
= ll_i2info(inode
);
2586 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
2589 LASSERT(!S_ISDIR(inode
->i_mode
));
2591 /* catch async errors that were recorded back when async writeback
2592 * failed for pages in this mapping.
2594 rc
= lli
->lli_async_rc
;
2595 lli
->lli_async_rc
= 0;
2596 err
= lov_read_and_clear_async_rc(lli
->lli_clob
);
2600 /* The application has been told about write failure already.
2601 * Do not report failure again.
2603 if (fd
->fd_write_failed
)
2605 return rc
? -EIO
: 0;
2609 * Called to make sure a portion of file has been written out.
2610 * if @mode is not CL_FSYNC_LOCAL, it will send OST_SYNC RPCs to OST.
2612 * Return how many pages have been written.
2614 int cl_sync_file_range(struct inode
*inode
, loff_t start
, loff_t end
,
2615 enum cl_fsync_mode mode
, int ignore_layout
)
2617 struct cl_env_nest nest
;
2620 struct cl_fsync_io
*fio
;
2623 if (mode
!= CL_FSYNC_NONE
&& mode
!= CL_FSYNC_LOCAL
&&
2624 mode
!= CL_FSYNC_DISCARD
&& mode
!= CL_FSYNC_ALL
)
2627 env
= cl_env_nested_get(&nest
);
2629 return PTR_ERR(env
);
2631 io
= vvp_env_thread_io(env
);
2632 io
->ci_obj
= ll_i2info(inode
)->lli_clob
;
2633 io
->ci_ignore_layout
= ignore_layout
;
2635 /* initialize parameters for sync */
2636 fio
= &io
->u
.ci_fsync
;
2637 fio
->fi_start
= start
;
2639 fio
->fi_fid
= ll_inode2fid(inode
);
2640 fio
->fi_mode
= mode
;
2641 fio
->fi_nr_written
= 0;
2643 if (cl_io_init(env
, io
, CIT_FSYNC
, io
->ci_obj
) == 0)
2644 result
= cl_io_loop(env
, io
);
2646 result
= io
->ci_result
;
2648 result
= fio
->fi_nr_written
;
2649 cl_io_fini(env
, io
);
2650 cl_env_nested_put(&nest
, env
);
2655 int ll_fsync(struct file
*file
, loff_t start
, loff_t end
, int datasync
)
2657 struct inode
*inode
= file_inode(file
);
2658 struct ll_inode_info
*lli
= ll_i2info(inode
);
2659 struct ptlrpc_request
*req
;
2662 CDEBUG(D_VFSTRACE
, "VFS Op:inode="DFID
"(%p)\n",
2663 PFID(ll_inode2fid(inode
)), inode
);
2664 ll_stats_ops_tally(ll_i2sbi(inode
), LPROC_LL_FSYNC
, 1);
2666 rc
= filemap_write_and_wait_range(inode
->i_mapping
, start
, end
);
2669 /* catch async errors that were recorded back when async writeback
2670 * failed for pages in this mapping.
2672 if (!S_ISDIR(inode
->i_mode
)) {
2673 err
= lli
->lli_async_rc
;
2674 lli
->lli_async_rc
= 0;
2677 err
= lov_read_and_clear_async_rc(lli
->lli_clob
);
2682 err
= md_sync(ll_i2sbi(inode
)->ll_md_exp
, ll_inode2fid(inode
), &req
);
2686 ptlrpc_req_finished(req
);
2688 if (S_ISREG(inode
->i_mode
)) {
2689 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
2691 err
= cl_sync_file_range(inode
, start
, end
, CL_FSYNC_ALL
, 0);
2692 if (rc
== 0 && err
< 0)
2695 fd
->fd_write_failed
= true;
2697 fd
->fd_write_failed
= false;
2700 inode_unlock(inode
);
2705 ll_file_flock(struct file
*file
, int cmd
, struct file_lock
*file_lock
)
2707 struct inode
*inode
= file_inode(file
);
2708 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
2709 struct ldlm_enqueue_info einfo
= {
2710 .ei_type
= LDLM_FLOCK
,
2711 .ei_cb_cp
= ldlm_flock_completion_ast
,
2712 .ei_cbdata
= file_lock
,
2714 struct md_op_data
*op_data
;
2715 struct lustre_handle lockh
= {0};
2716 ldlm_policy_data_t flock
= { {0} };
2721 CDEBUG(D_VFSTRACE
, "VFS Op:inode="DFID
" file_lock=%p\n",
2722 PFID(ll_inode2fid(inode
)), file_lock
);
2724 ll_stats_ops_tally(ll_i2sbi(inode
), LPROC_LL_FLOCK
, 1);
2726 if (file_lock
->fl_flags
& FL_FLOCK
)
2727 LASSERT((cmd
== F_SETLKW
) || (cmd
== F_SETLK
));
2728 else if (!(file_lock
->fl_flags
& FL_POSIX
))
2731 flock
.l_flock
.owner
= (unsigned long)file_lock
->fl_owner
;
2732 flock
.l_flock
.pid
= file_lock
->fl_pid
;
2733 flock
.l_flock
.start
= file_lock
->fl_start
;
2734 flock
.l_flock
.end
= file_lock
->fl_end
;
2736 /* Somewhat ugly workaround for svc lockd.
2737 * lockd installs custom fl_lmops->lm_compare_owner that checks
2738 * for the fl_owner to be the same (which it always is on local node
2739 * I guess between lockd processes) and then compares pid.
2740 * As such we assign pid to the owner field to make it all work,
2741 * conflict with normal locks is unlikely since pid space and
2742 * pointer space for current->files are not intersecting
2744 if (file_lock
->fl_lmops
&& file_lock
->fl_lmops
->lm_compare_owner
)
2745 flock
.l_flock
.owner
= (unsigned long)file_lock
->fl_pid
;
2747 switch (file_lock
->fl_type
) {
2749 einfo
.ei_mode
= LCK_PR
;
2752 /* An unlock request may or may not have any relation to
2753 * existing locks so we may not be able to pass a lock handle
2754 * via a normal ldlm_lock_cancel() request. The request may even
2755 * unlock a byte range in the middle of an existing lock. In
2756 * order to process an unlock request we need all of the same
2757 * information that is given with a normal read or write record
2758 * lock request. To avoid creating another ldlm unlock (cancel)
2759 * message we'll treat a LCK_NL flock request as an unlock.
2761 einfo
.ei_mode
= LCK_NL
;
2764 einfo
.ei_mode
= LCK_PW
;
2767 CDEBUG(D_INFO
, "Unknown fcntl lock type: %d\n",
2768 file_lock
->fl_type
);
2783 flags
= LDLM_FL_BLOCK_NOWAIT
;
2789 flags
= LDLM_FL_TEST_LOCK
;
2790 /* Save the old mode so that if the mode in the lock changes we
2791 * can decrement the appropriate reader or writer refcount.
2793 file_lock
->fl_type
= einfo
.ei_mode
;
2796 CERROR("unknown fcntl lock command: %d\n", cmd
);
2800 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, NULL
, 0, 0,
2801 LUSTRE_OPC_ANY
, NULL
);
2802 if (IS_ERR(op_data
))
2803 return PTR_ERR(op_data
);
2805 CDEBUG(D_DLMTRACE
, "inode="DFID
", pid=%u, flags=%#llx, mode=%u, start=%llu, end=%llu\n",
2806 PFID(ll_inode2fid(inode
)), flock
.l_flock
.pid
, flags
,
2807 einfo
.ei_mode
, flock
.l_flock
.start
, flock
.l_flock
.end
);
2809 rc
= md_enqueue(sbi
->ll_md_exp
, &einfo
, NULL
,
2810 op_data
, &lockh
, &flock
, 0, NULL
/* req */, flags
);
2812 if ((rc
== 0 || file_lock
->fl_type
== F_UNLCK
) &&
2813 !(flags
& LDLM_FL_TEST_LOCK
))
2814 rc2
= locks_lock_file_wait(file
, file_lock
);
2816 if (rc2
&& file_lock
->fl_type
!= F_UNLCK
) {
2817 einfo
.ei_mode
= LCK_NL
;
2818 md_enqueue(sbi
->ll_md_exp
, &einfo
, NULL
,
2819 op_data
, &lockh
, &flock
, 0, NULL
/* req */, flags
);
2823 ll_finish_md_op_data(op_data
);
2829 ll_file_noflock(struct file
*file
, int cmd
, struct file_lock
*file_lock
)
2835 * test if some locks matching bits and l_req_mode are acquired
2836 * - bits can be in different locks
2837 * - if found clear the common lock bits in *bits
2838 * - the bits not found, are kept in *bits
2840 * \param bits [IN] searched lock bits [IN]
2841 * \param l_req_mode [IN] searched lock mode
2842 * \retval boolean, true iff all bits are found
2844 int ll_have_md_lock(struct inode
*inode
, __u64
*bits
,
2845 enum ldlm_mode l_req_mode
)
2847 struct lustre_handle lockh
;
2848 ldlm_policy_data_t policy
;
2849 enum ldlm_mode mode
= (l_req_mode
== LCK_MINMODE
) ?
2850 (LCK_CR
|LCK_CW
|LCK_PR
|LCK_PW
) : l_req_mode
;
2858 fid
= &ll_i2info(inode
)->lli_fid
;
2859 CDEBUG(D_INFO
, "trying to match res "DFID
" mode %s\n", PFID(fid
),
2860 ldlm_lockname
[mode
]);
2862 flags
= LDLM_FL_BLOCK_GRANTED
| LDLM_FL_CBPENDING
| LDLM_FL_TEST_LOCK
;
2863 for (i
= 0; i
<= MDS_INODELOCK_MAXSHIFT
&& *bits
!= 0; i
++) {
2864 policy
.l_inodebits
.bits
= *bits
& (1 << i
);
2865 if (policy
.l_inodebits
.bits
== 0)
2868 if (md_lock_match(ll_i2mdexp(inode
), flags
, fid
, LDLM_IBITS
,
2869 &policy
, mode
, &lockh
)) {
2870 struct ldlm_lock
*lock
;
2872 lock
= ldlm_handle2lock(&lockh
);
2875 ~(lock
->l_policy_data
.l_inodebits
.bits
);
2876 LDLM_LOCK_PUT(lock
);
2878 *bits
&= ~policy
.l_inodebits
.bits
;
2885 enum ldlm_mode
ll_take_md_lock(struct inode
*inode
, __u64 bits
,
2886 struct lustre_handle
*lockh
, __u64 flags
,
2887 enum ldlm_mode mode
)
2889 ldlm_policy_data_t policy
= { .l_inodebits
= {bits
} };
2893 fid
= &ll_i2info(inode
)->lli_fid
;
2894 CDEBUG(D_INFO
, "trying to match res "DFID
"\n", PFID(fid
));
2896 rc
= md_lock_match(ll_i2mdexp(inode
), flags
| LDLM_FL_BLOCK_GRANTED
,
2897 fid
, LDLM_IBITS
, &policy
, mode
, lockh
);
2902 static int ll_inode_revalidate_fini(struct inode
*inode
, int rc
)
2904 /* Already unlinked. Just update nlink and return success */
2905 if (rc
== -ENOENT
) {
2907 /* This path cannot be hit for regular files unless in
2908 * case of obscure races, so no need to validate size.
2910 if (!S_ISREG(inode
->i_mode
) && !S_ISDIR(inode
->i_mode
))
2912 } else if (rc
!= 0) {
2913 CDEBUG_LIMIT((rc
== -EACCES
|| rc
== -EIDRM
) ? D_INFO
: D_ERROR
,
2914 "%s: revalidate FID "DFID
" error: rc = %d\n",
2915 ll_get_fsname(inode
->i_sb
, NULL
, 0),
2916 PFID(ll_inode2fid(inode
)), rc
);
2922 static int __ll_inode_revalidate(struct dentry
*dentry
, __u64 ibits
)
2924 struct inode
*inode
= d_inode(dentry
);
2925 struct ptlrpc_request
*req
= NULL
;
2926 struct obd_export
*exp
;
2929 CDEBUG(D_VFSTRACE
, "VFS Op:inode="DFID
"(%p),name=%pd\n",
2930 PFID(ll_inode2fid(inode
)), inode
, dentry
);
2932 exp
= ll_i2mdexp(inode
);
2934 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
2935 * But under CMD case, it caused some lock issues, should be fixed
2936 * with new CMD ibits lock. See bug 12718
2938 if (exp_connect_flags(exp
) & OBD_CONNECT_ATTRFID
) {
2939 struct lookup_intent oit
= { .it_op
= IT_GETATTR
};
2940 struct md_op_data
*op_data
;
2942 if (ibits
== MDS_INODELOCK_LOOKUP
)
2943 oit
.it_op
= IT_LOOKUP
;
2945 /* Call getattr by fid, so do not provide name at all. */
2946 op_data
= ll_prep_md_op_data(NULL
, inode
,
2948 LUSTRE_OPC_ANY
, NULL
);
2949 if (IS_ERR(op_data
))
2950 return PTR_ERR(op_data
);
2952 oit
.it_create_mode
|= M_CHECK_STALE
;
2953 rc
= md_intent_lock(exp
, op_data
, NULL
, 0,
2954 /* we are not interested in name
2958 ll_md_blocking_ast
, 0);
2959 ll_finish_md_op_data(op_data
);
2960 oit
.it_create_mode
&= ~M_CHECK_STALE
;
2962 rc
= ll_inode_revalidate_fini(inode
, rc
);
2966 rc
= ll_revalidate_it_finish(req
, &oit
, inode
);
2968 ll_intent_release(&oit
);
2972 /* Unlinked? Unhash dentry, so it is not picked up later by
2973 * do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2974 * here to preserve get_cwd functionality on 2.6.
2977 if (!d_inode(dentry
)->i_nlink
) {
2978 spin_lock(&inode
->i_lock
);
2979 d_lustre_invalidate(dentry
, 0);
2980 spin_unlock(&inode
->i_lock
);
2983 ll_lookup_finish_locks(&oit
, inode
);
2984 } else if (!ll_have_md_lock(d_inode(dentry
), &ibits
, LCK_MINMODE
)) {
2985 struct ll_sb_info
*sbi
= ll_i2sbi(d_inode(dentry
));
2986 u64 valid
= OBD_MD_FLGETATTR
;
2987 struct md_op_data
*op_data
;
2990 if (S_ISREG(inode
->i_mode
)) {
2991 rc
= ll_get_default_mdsize(sbi
, &ealen
);
2994 valid
|= OBD_MD_FLEASIZE
| OBD_MD_FLMODEASIZE
;
2997 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, NULL
,
2998 0, ealen
, LUSTRE_OPC_ANY
,
3000 if (IS_ERR(op_data
))
3001 return PTR_ERR(op_data
);
3003 op_data
->op_valid
= valid
;
3004 rc
= md_getattr(sbi
->ll_md_exp
, op_data
, &req
);
3005 ll_finish_md_op_data(op_data
);
3007 rc
= ll_inode_revalidate_fini(inode
, rc
);
3011 rc
= ll_prep_inode(&inode
, req
, NULL
, NULL
);
3014 ptlrpc_req_finished(req
);
3018 static int ll_inode_revalidate(struct dentry
*dentry
, __u64 ibits
)
3020 struct inode
*inode
= d_inode(dentry
);
3023 rc
= __ll_inode_revalidate(dentry
, ibits
);
3027 /* if object isn't regular file, don't validate size */
3028 if (!S_ISREG(inode
->i_mode
)) {
3029 LTIME_S(inode
->i_atime
) = ll_i2info(inode
)->lli_atime
;
3030 LTIME_S(inode
->i_mtime
) = ll_i2info(inode
)->lli_mtime
;
3031 LTIME_S(inode
->i_ctime
) = ll_i2info(inode
)->lli_ctime
;
3033 /* In case of restore, the MDT has the right size and has
3034 * already send it back without granting the layout lock,
3035 * inode is up-to-date so glimpse is useless.
3036 * Also to glimpse we need the layout, in case of a running
3037 * restore the MDT holds the layout lock so the glimpse will
3038 * block up to the end of restore (getattr will block)
3040 if (!(ll_i2info(inode
)->lli_flags
& LLIF_FILE_RESTORING
))
3041 rc
= ll_glimpse_size(inode
);
3046 int ll_getattr(struct vfsmount
*mnt
, struct dentry
*de
, struct kstat
*stat
)
3048 struct inode
*inode
= d_inode(de
);
3049 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
3050 struct ll_inode_info
*lli
= ll_i2info(inode
);
3053 res
= ll_inode_revalidate(de
, MDS_INODELOCK_UPDATE
|
3054 MDS_INODELOCK_LOOKUP
);
3055 ll_stats_ops_tally(sbi
, LPROC_LL_GETATTR
, 1);
3060 stat
->dev
= inode
->i_sb
->s_dev
;
3061 if (ll_need_32bit_api(sbi
))
3062 stat
->ino
= cl_fid_build_ino(&lli
->lli_fid
, 1);
3064 stat
->ino
= inode
->i_ino
;
3065 stat
->mode
= inode
->i_mode
;
3066 stat
->nlink
= inode
->i_nlink
;
3067 stat
->uid
= inode
->i_uid
;
3068 stat
->gid
= inode
->i_gid
;
3069 stat
->rdev
= inode
->i_rdev
;
3070 stat
->atime
= inode
->i_atime
;
3071 stat
->mtime
= inode
->i_mtime
;
3072 stat
->ctime
= inode
->i_ctime
;
3073 stat
->blksize
= 1 << inode
->i_blkbits
;
3075 stat
->size
= i_size_read(inode
);
3076 stat
->blocks
= inode
->i_blocks
;
3081 static int ll_fiemap(struct inode
*inode
, struct fiemap_extent_info
*fieinfo
,
3082 __u64 start
, __u64 len
)
3086 struct ll_user_fiemap
*fiemap
;
3087 unsigned int extent_count
= fieinfo
->fi_extents_max
;
3089 num_bytes
= sizeof(*fiemap
) + (extent_count
*
3090 sizeof(struct ll_fiemap_extent
));
3091 fiemap
= libcfs_kvzalloc(num_bytes
, GFP_NOFS
);
3096 fiemap
->fm_flags
= fieinfo
->fi_flags
;
3097 fiemap
->fm_extent_count
= fieinfo
->fi_extents_max
;
3098 fiemap
->fm_start
= start
;
3099 fiemap
->fm_length
= len
;
3100 if (extent_count
> 0 &&
3101 copy_from_user(&fiemap
->fm_extents
[0], fieinfo
->fi_extents_start
,
3102 sizeof(struct ll_fiemap_extent
)) != 0) {
3107 rc
= ll_do_fiemap(inode
, fiemap
, num_bytes
);
3109 fieinfo
->fi_flags
= fiemap
->fm_flags
;
3110 fieinfo
->fi_extents_mapped
= fiemap
->fm_mapped_extents
;
3111 if (extent_count
> 0 &&
3112 copy_to_user(fieinfo
->fi_extents_start
, &fiemap
->fm_extents
[0],
3113 fiemap
->fm_mapped_extents
*
3114 sizeof(struct ll_fiemap_extent
)) != 0) {
3124 struct posix_acl
*ll_get_acl(struct inode
*inode
, int type
)
3126 struct ll_inode_info
*lli
= ll_i2info(inode
);
3127 struct posix_acl
*acl
= NULL
;
3129 spin_lock(&lli
->lli_lock
);
3130 /* VFS' acl_permission_check->check_acl will release the refcount */
3131 acl
= posix_acl_dup(lli
->lli_posix_acl
);
3132 #ifdef CONFIG_FS_POSIX_ACL
3133 forget_cached_acl(inode
, type
);
3135 spin_unlock(&lli
->lli_lock
);
3140 int ll_inode_permission(struct inode
*inode
, int mask
)
3144 if (mask
& MAY_NOT_BLOCK
)
3147 /* as root inode are NOT getting validated in lookup operation,
3148 * need to do it before permission check.
3151 if (is_root_inode(inode
)) {
3152 rc
= __ll_inode_revalidate(inode
->i_sb
->s_root
,
3153 MDS_INODELOCK_LOOKUP
);
3158 CDEBUG(D_VFSTRACE
, "VFS Op:inode="DFID
"(%p), inode mode %x mask %o\n",
3159 PFID(ll_inode2fid(inode
)), inode
, inode
->i_mode
, mask
);
3161 ll_stats_ops_tally(ll_i2sbi(inode
), LPROC_LL_INODE_PERM
, 1);
3162 rc
= generic_permission(inode
, mask
);
3167 /* -o localflock - only provides locally consistent flock locks */
3168 struct file_operations ll_file_operations
= {
3169 .read_iter
= ll_file_read_iter
,
3170 .write_iter
= ll_file_write_iter
,
3171 .unlocked_ioctl
= ll_file_ioctl
,
3172 .open
= ll_file_open
,
3173 .release
= ll_file_release
,
3174 .mmap
= ll_file_mmap
,
3175 .llseek
= ll_file_seek
,
3176 .splice_read
= ll_file_splice_read
,
3181 struct file_operations ll_file_operations_flock
= {
3182 .read_iter
= ll_file_read_iter
,
3183 .write_iter
= ll_file_write_iter
,
3184 .unlocked_ioctl
= ll_file_ioctl
,
3185 .open
= ll_file_open
,
3186 .release
= ll_file_release
,
3187 .mmap
= ll_file_mmap
,
3188 .llseek
= ll_file_seek
,
3189 .splice_read
= ll_file_splice_read
,
3192 .flock
= ll_file_flock
,
3193 .lock
= ll_file_flock
3196 /* These are for -o noflock - to return ENOSYS on flock calls */
3197 struct file_operations ll_file_operations_noflock
= {
3198 .read_iter
= ll_file_read_iter
,
3199 .write_iter
= ll_file_write_iter
,
3200 .unlocked_ioctl
= ll_file_ioctl
,
3201 .open
= ll_file_open
,
3202 .release
= ll_file_release
,
3203 .mmap
= ll_file_mmap
,
3204 .llseek
= ll_file_seek
,
3205 .splice_read
= ll_file_splice_read
,
3208 .flock
= ll_file_noflock
,
3209 .lock
= ll_file_noflock
3212 const struct inode_operations ll_file_inode_operations
= {
3213 .setattr
= ll_setattr
,
3214 .getattr
= ll_getattr
,
3215 .permission
= ll_inode_permission
,
3216 .setxattr
= ll_setxattr
,
3217 .getxattr
= ll_getxattr
,
3218 .listxattr
= ll_listxattr
,
3219 .removexattr
= ll_removexattr
,
3220 .fiemap
= ll_fiemap
,
3221 .get_acl
= ll_get_acl
,
3224 /* dynamic ioctl number support routines */
3225 static struct llioc_ctl_data
{
3226 struct rw_semaphore ioc_sem
;
3227 struct list_head ioc_head
;
3229 __RWSEM_INITIALIZER(llioc
.ioc_sem
),
3230 LIST_HEAD_INIT(llioc
.ioc_head
)
3234 struct list_head iocd_list
;
3235 unsigned int iocd_size
;
3236 llioc_callback_t iocd_cb
;
3237 unsigned int iocd_count
;
3238 unsigned int iocd_cmd
[0];
3241 void *ll_iocontrol_register(llioc_callback_t cb
, int count
, unsigned int *cmd
)
3244 struct llioc_data
*in_data
= NULL
;
3246 if (!cb
|| !cmd
|| count
> LLIOC_MAX_CMD
|| count
< 0)
3249 size
= sizeof(*in_data
) + count
* sizeof(unsigned int);
3250 in_data
= kzalloc(size
, GFP_NOFS
);
3254 memset(in_data
, 0, sizeof(*in_data
));
3255 in_data
->iocd_size
= size
;
3256 in_data
->iocd_cb
= cb
;
3257 in_data
->iocd_count
= count
;
3258 memcpy(in_data
->iocd_cmd
, cmd
, sizeof(unsigned int) * count
);
3260 down_write(&llioc
.ioc_sem
);
3261 list_add_tail(&in_data
->iocd_list
, &llioc
.ioc_head
);
3262 up_write(&llioc
.ioc_sem
);
3266 EXPORT_SYMBOL(ll_iocontrol_register
);
3268 void ll_iocontrol_unregister(void *magic
)
3270 struct llioc_data
*tmp
;
3275 down_write(&llioc
.ioc_sem
);
3276 list_for_each_entry(tmp
, &llioc
.ioc_head
, iocd_list
) {
3278 list_del(&tmp
->iocd_list
);
3279 up_write(&llioc
.ioc_sem
);
3285 up_write(&llioc
.ioc_sem
);
3287 CWARN("didn't find iocontrol register block with magic: %p\n", magic
);
3289 EXPORT_SYMBOL(ll_iocontrol_unregister
);
3291 static enum llioc_iter
3292 ll_iocontrol_call(struct inode
*inode
, struct file
*file
,
3293 unsigned int cmd
, unsigned long arg
, int *rcp
)
3295 enum llioc_iter ret
= LLIOC_CONT
;
3296 struct llioc_data
*data
;
3297 int rc
= -EINVAL
, i
;
3299 down_read(&llioc
.ioc_sem
);
3300 list_for_each_entry(data
, &llioc
.ioc_head
, iocd_list
) {
3301 for (i
= 0; i
< data
->iocd_count
; i
++) {
3302 if (cmd
!= data
->iocd_cmd
[i
])
3305 ret
= data
->iocd_cb(inode
, file
, cmd
, arg
, data
, &rc
);
3309 if (ret
== LLIOC_STOP
)
3312 up_read(&llioc
.ioc_sem
);
3319 int ll_layout_conf(struct inode
*inode
, const struct cl_object_conf
*conf
)
3321 struct ll_inode_info
*lli
= ll_i2info(inode
);
3322 struct cl_env_nest nest
;
3329 env
= cl_env_nested_get(&nest
);
3331 return PTR_ERR(env
);
3333 result
= cl_conf_set(env
, lli
->lli_clob
, conf
);
3334 cl_env_nested_put(&nest
, env
);
3336 if (conf
->coc_opc
== OBJECT_CONF_SET
) {
3337 struct ldlm_lock
*lock
= conf
->coc_lock
;
3340 LASSERT(ldlm_has_layout(lock
));
3342 /* it can only be allowed to match after layout is
3343 * applied to inode otherwise false layout would be
3344 * seen. Applying layout should happen before dropping
3347 ldlm_lock_allow_match(lock
);
3353 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
3354 static int ll_layout_fetch(struct inode
*inode
, struct ldlm_lock
*lock
)
3357 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
3358 struct ptlrpc_request
*req
;
3359 struct mdt_body
*body
;
3365 CDEBUG(D_INODE
, DFID
" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
3366 PFID(ll_inode2fid(inode
)), ldlm_is_lvb_ready(lock
),
3367 lock
->l_lvb_data
, lock
->l_lvb_len
);
3369 if (lock
->l_lvb_data
&& ldlm_is_lvb_ready(lock
))
3372 /* if layout lock was granted right away, the layout is returned
3373 * within DLM_LVB of dlm reply; otherwise if the lock was ever
3374 * blocked and then granted via completion ast, we have to fetch
3375 * layout here. Please note that we can't use the LVB buffer in
3376 * completion AST because it doesn't have a large enough buffer
3378 rc
= ll_get_default_mdsize(sbi
, &lmmsize
);
3380 rc
= md_getxattr(sbi
->ll_md_exp
, ll_inode2fid(inode
),
3381 OBD_MD_FLXATTR
, XATTR_NAME_LOV
, NULL
, 0,
3386 body
= req_capsule_server_get(&req
->rq_pill
, &RMF_MDT_BODY
);
3392 lmmsize
= body
->eadatasize
;
3393 if (lmmsize
== 0) /* empty layout */ {
3398 lmm
= req_capsule_server_sized_get(&req
->rq_pill
, &RMF_EADATA
, lmmsize
);
3404 lvbdata
= libcfs_kvzalloc(lmmsize
, GFP_NOFS
);
3410 memcpy(lvbdata
, lmm
, lmmsize
);
3411 lock_res_and_lock(lock
);
3412 if (lock
->l_lvb_data
)
3413 kvfree(lock
->l_lvb_data
);
3415 lock
->l_lvb_data
= lvbdata
;
3416 lock
->l_lvb_len
= lmmsize
;
3417 unlock_res_and_lock(lock
);
3420 ptlrpc_req_finished(req
);
3425 * Apply the layout to the inode. Layout lock is held and will be released
3428 static int ll_layout_lock_set(struct lustre_handle
*lockh
, enum ldlm_mode mode
,
3429 struct inode
*inode
, __u32
*gen
, bool reconf
)
3431 struct ll_inode_info
*lli
= ll_i2info(inode
);
3432 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
3433 struct ldlm_lock
*lock
;
3434 struct lustre_md md
= { NULL
};
3435 struct cl_object_conf conf
;
3438 bool wait_layout
= false;
3440 LASSERT(lustre_handle_is_used(lockh
));
3442 lock
= ldlm_handle2lock(lockh
);
3444 LASSERT(ldlm_has_layout(lock
));
3446 LDLM_DEBUG(lock
, "File "DFID
"(%p) being reconfigured: %d",
3447 PFID(&lli
->lli_fid
), inode
, reconf
);
3449 /* in case this is a caching lock and reinstate with new inode */
3450 md_set_lock_data(sbi
->ll_md_exp
, &lockh
->cookie
, inode
, NULL
);
3452 lock_res_and_lock(lock
);
3453 lvb_ready
= ldlm_is_lvb_ready(lock
);
3454 unlock_res_and_lock(lock
);
3455 /* checking lvb_ready is racy but this is okay. The worst case is
3456 * that multi processes may configure the file on the same time.
3458 if (lvb_ready
|| !reconf
) {
3461 /* layout_gen must be valid if layout lock is not
3462 * cancelled and stripe has already set
3464 *gen
= ll_layout_version_get(lli
);
3470 rc
= ll_layout_fetch(inode
, lock
);
3474 /* for layout lock, lmm is returned in lock's lvb.
3475 * lvb_data is immutable if the lock is held so it's safe to access it
3476 * without res lock. See the description in ldlm_lock_decref_internal()
3477 * for the condition to free lvb_data of layout lock
3479 if (lock
->l_lvb_data
) {
3480 rc
= obd_unpackmd(sbi
->ll_dt_exp
, &md
.lsm
,
3481 lock
->l_lvb_data
, lock
->l_lvb_len
);
3483 *gen
= LL_LAYOUT_GEN_EMPTY
;
3485 *gen
= md
.lsm
->lsm_layout_gen
;
3488 CERROR("%s: file " DFID
" unpackmd error: %d\n",
3489 ll_get_fsname(inode
->i_sb
, NULL
, 0),
3490 PFID(&lli
->lli_fid
), rc
);
3496 /* set layout to file. Unlikely this will fail as old layout was
3499 memset(&conf
, 0, sizeof(conf
));
3500 conf
.coc_opc
= OBJECT_CONF_SET
;
3501 conf
.coc_inode
= inode
;
3502 conf
.coc_lock
= lock
;
3503 conf
.u
.coc_md
= &md
;
3504 rc
= ll_layout_conf(inode
, &conf
);
3507 obd_free_memmd(sbi
->ll_dt_exp
, &md
.lsm
);
3509 /* refresh layout failed, need to wait */
3510 wait_layout
= rc
== -EBUSY
;
3513 LDLM_LOCK_PUT(lock
);
3514 ldlm_lock_decref(lockh
, mode
);
3516 /* wait for IO to complete if it's still being used. */
3518 CDEBUG(D_INODE
, "%s: "DFID
"(%p) wait for layout reconf\n",
3519 ll_get_fsname(inode
->i_sb
, NULL
, 0),
3520 PFID(&lli
->lli_fid
), inode
);
3522 memset(&conf
, 0, sizeof(conf
));
3523 conf
.coc_opc
= OBJECT_CONF_WAIT
;
3524 conf
.coc_inode
= inode
;
3525 rc
= ll_layout_conf(inode
, &conf
);
3529 CDEBUG(D_INODE
, "%s: file="DFID
" waiting layout return: %d.\n",
3530 ll_get_fsname(inode
->i_sb
, NULL
, 0),
3531 PFID(&lli
->lli_fid
), rc
);
3537 * This function checks if there exists a LAYOUT lock on the client side,
3538 * or enqueues it if it doesn't have one in cache.
3540 * This function will not hold layout lock so it may be revoked any time after
3541 * this function returns. Any operations depend on layout should be redone
3544 * This function should be called before lov_io_init() to get an uptodate
3545 * layout version, the caller should save the version number and after IO
3546 * is finished, this function should be called again to verify that layout
3547 * is not changed during IO time.
3549 int ll_layout_refresh(struct inode
*inode
, __u32
*gen
)
3551 struct ll_inode_info
*lli
= ll_i2info(inode
);
3552 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
3553 struct md_op_data
*op_data
;
3554 struct lookup_intent it
;
3555 struct lustre_handle lockh
;
3556 enum ldlm_mode mode
;
3557 struct ldlm_enqueue_info einfo
= {
3558 .ei_type
= LDLM_IBITS
,
3560 .ei_cb_bl
= ll_md_blocking_ast
,
3561 .ei_cb_cp
= ldlm_completion_ast
,
3565 *gen
= ll_layout_version_get(lli
);
3566 if (!(sbi
->ll_flags
& LL_SBI_LAYOUT_LOCK
) || *gen
!= LL_LAYOUT_GEN_NONE
)
3570 LASSERT(fid_is_sane(ll_inode2fid(inode
)));
3571 LASSERT(S_ISREG(inode
->i_mode
));
3573 /* take layout lock mutex to enqueue layout lock exclusively. */
3574 mutex_lock(&lli
->lli_layout_mutex
);
3577 /* mostly layout lock is caching on the local side, so try to match
3578 * it before grabbing layout lock mutex.
3580 mode
= ll_take_md_lock(inode
, MDS_INODELOCK_LAYOUT
, &lockh
, 0,
3581 LCK_CR
| LCK_CW
| LCK_PR
| LCK_PW
);
3582 if (mode
!= 0) { /* hit cached lock */
3583 rc
= ll_layout_lock_set(&lockh
, mode
, inode
, gen
, true);
3587 mutex_unlock(&lli
->lli_layout_mutex
);
3591 op_data
= ll_prep_md_op_data(NULL
, inode
, inode
, NULL
,
3592 0, 0, LUSTRE_OPC_ANY
, NULL
);
3593 if (IS_ERR(op_data
)) {
3594 mutex_unlock(&lli
->lli_layout_mutex
);
3595 return PTR_ERR(op_data
);
3598 /* have to enqueue one */
3599 memset(&it
, 0, sizeof(it
));
3600 it
.it_op
= IT_LAYOUT
;
3601 lockh
.cookie
= 0ULL;
3603 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file "DFID
"(%p)",
3604 ll_get_fsname(inode
->i_sb
, NULL
, 0),
3605 PFID(&lli
->lli_fid
), inode
);
3607 rc
= md_enqueue(sbi
->ll_md_exp
, &einfo
, &it
, op_data
, &lockh
,
3609 ptlrpc_req_finished(it
.it_request
);
3610 it
.it_request
= NULL
;
3612 ll_finish_md_op_data(op_data
);
3614 mode
= it
.it_lock_mode
;
3615 it
.it_lock_mode
= 0;
3616 ll_intent_drop_lock(&it
);
3619 /* set lock data in case this is a new lock */
3620 ll_set_lock_data(sbi
->ll_md_exp
, inode
, &it
, NULL
);
3621 rc
= ll_layout_lock_set(&lockh
, mode
, inode
, gen
, true);
3625 mutex_unlock(&lli
->lli_layout_mutex
);
3631 * This function send a restore request to the MDT
3633 int ll_layout_restore(struct inode
*inode
, loff_t offset
, __u64 length
)
3635 struct hsm_user_request
*hur
;
3638 len
= sizeof(struct hsm_user_request
) +
3639 sizeof(struct hsm_user_item
);
3640 hur
= kzalloc(len
, GFP_NOFS
);
3644 hur
->hur_request
.hr_action
= HUA_RESTORE
;
3645 hur
->hur_request
.hr_archive_id
= 0;
3646 hur
->hur_request
.hr_flags
= 0;
3647 memcpy(&hur
->hur_user_item
[0].hui_fid
, &ll_i2info(inode
)->lli_fid
,
3648 sizeof(hur
->hur_user_item
[0].hui_fid
));
3649 hur
->hur_user_item
[0].hui_extent
.offset
= offset
;
3650 hur
->hur_user_item
[0].hui_extent
.length
= length
;
3651 hur
->hur_request
.hr_itemcount
= 1;
3652 rc
= obd_iocontrol(LL_IOC_HSM_REQUEST
, ll_i2sbi(inode
)->ll_md_exp
,