Merge remote-tracking branch 'staging/staging-next'
[deliverable/linux.git] / drivers / staging / lustre / lustre / llite / file.c
1 /*
2 * GPL HEADER START
3 *
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
19 *
20 * GPL HEADER END
21 */
22 /*
23 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
25 *
26 * Copyright (c) 2011, 2015, Intel Corporation.
27 */
28 /*
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
31 *
32 * lustre/llite/file.c
33 *
34 * Author: Peter Braam <braam@clusterfs.com>
35 * Author: Phil Schwan <phil@clusterfs.com>
36 * Author: Andreas Dilger <adilger@clusterfs.com>
37 */
38
39 #define DEBUG_SUBSYSTEM S_LLITE
40 #include "../include/lustre_dlm.h"
41 #include "../include/lustre_lite.h"
42 #include <linux/pagemap.h>
43 #include <linux/file.h>
44 #include <linux/sched.h>
45 #include <linux/mount.h>
46 #include "llite_internal.h"
47 #include "../include/lustre/ll_fiemap.h"
48 #include "../include/lustre/lustre_ioctl.h"
49
50 #include "../include/cl_object.h"
51
52 static int
53 ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg);
54
55 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
56 bool *lease_broken);
57
58 static enum llioc_iter
59 ll_iocontrol_call(struct inode *inode, struct file *file,
60 unsigned int cmd, unsigned long arg, int *rcp);
61
62 static struct ll_file_data *ll_file_data_get(void)
63 {
64 struct ll_file_data *fd;
65
66 fd = kmem_cache_zalloc(ll_file_data_slab, GFP_NOFS);
67 if (!fd)
68 return NULL;
69 fd->fd_write_failed = false;
70 return fd;
71 }
72
73 static void ll_file_data_put(struct ll_file_data *fd)
74 {
75 if (fd)
76 kmem_cache_free(ll_file_data_slab, fd);
77 }
78
79 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
80 struct lustre_handle *fh)
81 {
82 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
83 op_data->op_attr.ia_mode = inode->i_mode;
84 op_data->op_attr.ia_atime = inode->i_atime;
85 op_data->op_attr.ia_mtime = inode->i_mtime;
86 op_data->op_attr.ia_ctime = inode->i_ctime;
87 op_data->op_attr.ia_size = i_size_read(inode);
88 op_data->op_attr_blocks = inode->i_blocks;
89 op_data->op_attr_flags = ll_inode_to_ext_flags(inode->i_flags);
90 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
91 if (fh)
92 op_data->op_handle = *fh;
93
94 if (ll_i2info(inode)->lli_flags & LLIF_DATA_MODIFIED)
95 op_data->op_bias |= MDS_DATA_MODIFIED;
96 }
97
98 /**
99 * Closes the IO epoch and packs all the attributes into @op_data for
100 * the CLOSE rpc.
101 */
102 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
103 struct obd_client_handle *och)
104 {
105 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
106 ATTR_MTIME | ATTR_MTIME_SET |
107 ATTR_CTIME | ATTR_CTIME_SET;
108
109 if (!(och->och_flags & FMODE_WRITE))
110 goto out;
111
112 if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode))
113 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
114 else
115 ll_ioepoch_close(inode, op_data, &och, 0);
116
117 out:
118 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
119 ll_prep_md_op_data(op_data, inode, NULL, NULL,
120 0, 0, LUSTRE_OPC_ANY, NULL);
121 }
122
123 static int ll_close_inode_openhandle(struct obd_export *md_exp,
124 struct inode *inode,
125 struct obd_client_handle *och,
126 const __u64 *data_version)
127 {
128 struct obd_export *exp = ll_i2mdexp(inode);
129 struct md_op_data *op_data;
130 struct ptlrpc_request *req = NULL;
131 struct obd_device *obd = class_exp2obd(exp);
132 int epoch_close = 1;
133 int rc;
134
135 if (!obd) {
136 /*
137 * XXX: in case of LMV, is this correct to access
138 * ->exp_handle?
139 */
140 CERROR("Invalid MDC connection handle %#llx\n",
141 ll_i2mdexp(inode)->exp_handle.h_cookie);
142 rc = 0;
143 goto out;
144 }
145
146 op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
147 if (!op_data) {
148 /* XXX We leak openhandle and request here. */
149 rc = -ENOMEM;
150 goto out;
151 }
152
153 ll_prepare_close(inode, op_data, och);
154 if (data_version) {
155 /* Pass in data_version implies release. */
156 op_data->op_bias |= MDS_HSM_RELEASE;
157 op_data->op_data_version = *data_version;
158 op_data->op_lease_handle = och->och_lease_handle;
159 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
160 }
161 epoch_close = op_data->op_flags & MF_EPOCH_CLOSE;
162 rc = md_close(md_exp, op_data, och->och_mod, &req);
163 if (rc == -EAGAIN) {
164 /* This close must have the epoch closed. */
165 LASSERT(epoch_close);
166 /* MDS has instructed us to obtain Size-on-MDS attribute from
167 * OSTs and send setattr to back to MDS.
168 */
169 rc = ll_som_update(inode, op_data);
170 if (rc) {
171 CERROR("%s: inode "DFID" mdc Size-on-MDS update failed: rc = %d\n",
172 ll_i2mdexp(inode)->exp_obd->obd_name,
173 PFID(ll_inode2fid(inode)), rc);
174 rc = 0;
175 }
176 } else if (rc) {
177 CERROR("%s: inode "DFID" mdc close failed: rc = %d\n",
178 ll_i2mdexp(inode)->exp_obd->obd_name,
179 PFID(ll_inode2fid(inode)), rc);
180 }
181
182 /* DATA_MODIFIED flag was successfully sent on close, cancel data
183 * modification flag.
184 */
185 if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) {
186 struct ll_inode_info *lli = ll_i2info(inode);
187
188 spin_lock(&lli->lli_lock);
189 lli->lli_flags &= ~LLIF_DATA_MODIFIED;
190 spin_unlock(&lli->lli_lock);
191 }
192
193 if (rc == 0) {
194 rc = ll_objects_destroy(req, inode);
195 if (rc)
196 CERROR("inode %lu ll_objects destroy: rc = %d\n",
197 inode->i_ino, rc);
198 }
199 if (rc == 0 && op_data->op_bias & MDS_HSM_RELEASE) {
200 struct mdt_body *body;
201
202 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
203 if (!(body->mbo_valid & OBD_MD_FLRELEASED))
204 rc = -EBUSY;
205 }
206
207 ll_finish_md_op_data(op_data);
208
209 out:
210 if (exp_connect_som(exp) && !epoch_close &&
211 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
212 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
213 } else {
214 md_clear_open_replay_data(md_exp, och);
215 /* Free @och if it is not waiting for DONE_WRITING. */
216 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
217 kfree(och);
218 }
219 if (req) /* This is close request */
220 ptlrpc_req_finished(req);
221 return rc;
222 }
223
224 int ll_md_real_close(struct inode *inode, fmode_t fmode)
225 {
226 struct ll_inode_info *lli = ll_i2info(inode);
227 struct obd_client_handle **och_p;
228 struct obd_client_handle *och;
229 __u64 *och_usecount;
230 int rc = 0;
231
232 if (fmode & FMODE_WRITE) {
233 och_p = &lli->lli_mds_write_och;
234 och_usecount = &lli->lli_open_fd_write_count;
235 } else if (fmode & FMODE_EXEC) {
236 och_p = &lli->lli_mds_exec_och;
237 och_usecount = &lli->lli_open_fd_exec_count;
238 } else {
239 LASSERT(fmode & FMODE_READ);
240 och_p = &lli->lli_mds_read_och;
241 och_usecount = &lli->lli_open_fd_read_count;
242 }
243
244 mutex_lock(&lli->lli_och_mutex);
245 if (*och_usecount > 0) {
246 /* There are still users of this handle, so skip
247 * freeing it.
248 */
249 mutex_unlock(&lli->lli_och_mutex);
250 return 0;
251 }
252
253 och = *och_p;
254 *och_p = NULL;
255 mutex_unlock(&lli->lli_och_mutex);
256
257 if (och) {
258 /* There might be a race and this handle may already
259 * be closed.
260 */
261 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
262 inode, och, NULL);
263 }
264
265 return rc;
266 }
267
268 static int ll_md_close(struct obd_export *md_exp, struct inode *inode,
269 struct file *file)
270 {
271 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
272 struct ll_inode_info *lli = ll_i2info(inode);
273 int lockmode;
274 __u64 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
275 struct lustre_handle lockh;
276 ldlm_policy_data_t policy = {.l_inodebits = {MDS_INODELOCK_OPEN} };
277 int rc = 0;
278
279 /* clear group lock, if present */
280 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
281 ll_put_grouplock(inode, file, fd->fd_grouplock.lg_gid);
282
283 if (fd->fd_lease_och) {
284 bool lease_broken;
285
286 /* Usually the lease is not released when the
287 * application crashed, we need to release here.
288 */
289 rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
290 CDEBUG(rc ? D_ERROR : D_INODE,
291 "Clean up lease " DFID " %d/%d\n",
292 PFID(&lli->lli_fid), rc, lease_broken);
293
294 fd->fd_lease_och = NULL;
295 }
296
297 if (fd->fd_och) {
298 rc = ll_close_inode_openhandle(md_exp, inode, fd->fd_och, NULL);
299 fd->fd_och = NULL;
300 goto out;
301 }
302
303 /* Let's see if we have good enough OPEN lock on the file and if
304 * we can skip talking to MDS
305 */
306
307 mutex_lock(&lli->lli_och_mutex);
308 if (fd->fd_omode & FMODE_WRITE) {
309 lockmode = LCK_CW;
310 LASSERT(lli->lli_open_fd_write_count);
311 lli->lli_open_fd_write_count--;
312 } else if (fd->fd_omode & FMODE_EXEC) {
313 lockmode = LCK_PR;
314 LASSERT(lli->lli_open_fd_exec_count);
315 lli->lli_open_fd_exec_count--;
316 } else {
317 lockmode = LCK_CR;
318 LASSERT(lli->lli_open_fd_read_count);
319 lli->lli_open_fd_read_count--;
320 }
321 mutex_unlock(&lli->lli_och_mutex);
322
323 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
324 LDLM_IBITS, &policy, lockmode, &lockh))
325 rc = ll_md_real_close(inode, fd->fd_omode);
326
327 out:
328 LUSTRE_FPRIVATE(file) = NULL;
329 ll_file_data_put(fd);
330
331 return rc;
332 }
333
334 /* While this returns an error code, fput() the caller does not, so we need
335 * to make every effort to clean up all of our state here. Also, applications
336 * rarely check close errors and even if an error is returned they will not
337 * re-try the close call.
338 */
339 int ll_file_release(struct inode *inode, struct file *file)
340 {
341 struct ll_file_data *fd;
342 struct ll_sb_info *sbi = ll_i2sbi(inode);
343 struct ll_inode_info *lli = ll_i2info(inode);
344 int rc;
345
346 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
347 PFID(ll_inode2fid(inode)), inode);
348
349 if (!is_root_inode(inode))
350 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
351 fd = LUSTRE_FPRIVATE(file);
352 LASSERT(fd);
353
354 /* The last ref on @file, maybe not be the owner pid of statahead.
355 * Different processes can open the same dir, "ll_opendir_key" means:
356 * it is me that should stop the statahead thread.
357 */
358 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd &&
359 lli->lli_opendir_pid != 0)
360 ll_stop_statahead(inode, lli->lli_opendir_key);
361
362 if (is_root_inode(inode)) {
363 LUSTRE_FPRIVATE(file) = NULL;
364 ll_file_data_put(fd);
365 return 0;
366 }
367
368 if (!S_ISDIR(inode->i_mode)) {
369 if (lli->lli_clob)
370 lov_read_and_clear_async_rc(lli->lli_clob);
371 lli->lli_async_rc = 0;
372 }
373
374 rc = ll_md_close(sbi->ll_md_exp, inode, file);
375
376 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
377 libcfs_debug_dumplog();
378
379 return rc;
380 }
381
382 static int ll_intent_file_open(struct dentry *de, void *lmm, int lmmsize,
383 struct lookup_intent *itp)
384 {
385 struct inode *inode = d_inode(de);
386 struct ll_sb_info *sbi = ll_i2sbi(inode);
387 struct dentry *parent = de->d_parent;
388 const char *name = NULL;
389 struct md_op_data *op_data;
390 struct ptlrpc_request *req = NULL;
391 int len = 0, rc;
392
393 LASSERT(parent);
394 LASSERT(itp->it_flags & MDS_OPEN_BY_FID);
395
396 /*
397 * if server supports open-by-fid, or file name is invalid, don't pack
398 * name in open request
399 */
400 if (!(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_OPEN_BY_FID) &&
401 lu_name_is_valid_2(de->d_name.name, de->d_name.len)) {
402 name = de->d_name.name;
403 len = de->d_name.len;
404 }
405
406 op_data = ll_prep_md_op_data(NULL, d_inode(parent), inode, name, len,
407 O_RDWR, LUSTRE_OPC_ANY, NULL);
408 if (IS_ERR(op_data))
409 return PTR_ERR(op_data);
410 op_data->op_data = lmm;
411 op_data->op_data_size = lmmsize;
412
413 rc = md_intent_lock(sbi->ll_md_exp, op_data, itp, &req,
414 &ll_md_blocking_ast, 0);
415 ll_finish_md_op_data(op_data);
416 if (rc == -ESTALE) {
417 /* reason for keep own exit path - don`t flood log
418 * with messages with -ESTALE errors.
419 */
420 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
421 it_open_error(DISP_OPEN_OPEN, itp))
422 goto out;
423 ll_release_openhandle(inode, itp);
424 goto out;
425 }
426
427 if (it_disposition(itp, DISP_LOOKUP_NEG)) {
428 rc = -ENOENT;
429 goto out;
430 }
431
432 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
433 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
434 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
435 goto out;
436 }
437
438 rc = ll_prep_inode(&inode, req, NULL, itp);
439 if (!rc && itp->it_lock_mode)
440 ll_set_lock_data(sbi->ll_md_exp, inode, itp, NULL);
441
442 out:
443 ptlrpc_req_finished(req);
444 ll_intent_drop_lock(itp);
445
446 return rc;
447 }
448
449 /**
450 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
451 * not believe attributes if a few ioepoch holders exist. Attributes for
452 * previous ioepoch if new one is opened are also skipped by MDS.
453 */
454 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
455 {
456 if (ioepoch && lli->lli_ioepoch != ioepoch) {
457 lli->lli_ioepoch = ioepoch;
458 CDEBUG(D_INODE, "Epoch %llu opened on "DFID"\n",
459 ioepoch, PFID(&lli->lli_fid));
460 }
461 }
462
463 static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
464 struct obd_client_handle *och)
465 {
466 struct mdt_body *body;
467
468 body = req_capsule_server_get(&it->it_request->rq_pill, &RMF_MDT_BODY);
469 och->och_fh = body->mbo_handle;
470 och->och_fid = body->mbo_fid1;
471 och->och_lease_handle.cookie = it->it_lock_handle;
472 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
473 och->och_flags = it->it_flags;
474
475 return md_set_open_replay_data(md_exp, och, it);
476 }
477
478 static int ll_local_open(struct file *file, struct lookup_intent *it,
479 struct ll_file_data *fd, struct obd_client_handle *och)
480 {
481 struct inode *inode = file_inode(file);
482 struct ll_inode_info *lli = ll_i2info(inode);
483
484 LASSERT(!LUSTRE_FPRIVATE(file));
485
486 LASSERT(fd);
487
488 if (och) {
489 struct mdt_body *body;
490 int rc;
491
492 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
493 if (rc != 0)
494 return rc;
495
496 body = req_capsule_server_get(&it->it_request->rq_pill,
497 &RMF_MDT_BODY);
498 ll_ioepoch_open(lli, body->mbo_ioepoch);
499 }
500
501 LUSTRE_FPRIVATE(file) = fd;
502 ll_readahead_init(inode, &fd->fd_ras);
503 fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
504
505 /* ll_cl_context initialize */
506 rwlock_init(&fd->fd_lock);
507 INIT_LIST_HEAD(&fd->fd_lccs);
508
509 return 0;
510 }
511
512 /* Open a file, and (for the very first open) create objects on the OSTs at
513 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
514 * creation or open until ll_lov_setstripe() ioctl is called.
515 *
516 * If we already have the stripe MD locally then we don't request it in
517 * md_open(), by passing a lmm_size = 0.
518 *
519 * It is up to the application to ensure no other processes open this file
520 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
521 * used. We might be able to avoid races of that sort by getting lli_open_sem
522 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
523 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
524 */
525 int ll_file_open(struct inode *inode, struct file *file)
526 {
527 struct ll_inode_info *lli = ll_i2info(inode);
528 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
529 .it_flags = file->f_flags };
530 struct obd_client_handle **och_p = NULL;
531 __u64 *och_usecount = NULL;
532 struct ll_file_data *fd;
533 int rc = 0, opendir_set = 0;
534
535 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), flags %o\n",
536 PFID(ll_inode2fid(inode)), inode, file->f_flags);
537
538 it = file->private_data; /* XXX: compat macro */
539 file->private_data = NULL; /* prevent ll_local_open assertion */
540
541 fd = ll_file_data_get();
542 if (!fd) {
543 rc = -ENOMEM;
544 goto out_openerr;
545 }
546
547 fd->fd_file = file;
548 if (S_ISDIR(inode->i_mode)) {
549 spin_lock(&lli->lli_sa_lock);
550 if (!lli->lli_opendir_key && !lli->lli_sai &&
551 lli->lli_opendir_pid == 0) {
552 lli->lli_opendir_key = fd;
553 lli->lli_opendir_pid = current_pid();
554 opendir_set = 1;
555 }
556 spin_unlock(&lli->lli_sa_lock);
557 }
558
559 if (is_root_inode(inode)) {
560 LUSTRE_FPRIVATE(file) = fd;
561 return 0;
562 }
563
564 if (!it || !it->it_disposition) {
565 /* Convert f_flags into access mode. We cannot use file->f_mode,
566 * because everything but O_ACCMODE mask was stripped from
567 * there
568 */
569 if ((oit.it_flags + 1) & O_ACCMODE)
570 oit.it_flags++;
571 if (file->f_flags & O_TRUNC)
572 oit.it_flags |= FMODE_WRITE;
573
574 /* kernel only call f_op->open in dentry_open. filp_open calls
575 * dentry_open after call to open_namei that checks permissions.
576 * Only nfsd_open call dentry_open directly without checking
577 * permissions and because of that this code below is safe.
578 */
579 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
580 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
581
582 /* We do not want O_EXCL here, presumably we opened the file
583 * already? XXX - NFS implications?
584 */
585 oit.it_flags &= ~O_EXCL;
586
587 /* bug20584, if "it_flags" contains O_CREAT, the file will be
588 * created if necessary, then "IT_CREAT" should be set to keep
589 * consistent with it
590 */
591 if (oit.it_flags & O_CREAT)
592 oit.it_op |= IT_CREAT;
593
594 it = &oit;
595 }
596
597 restart:
598 /* Let's see if we have file open on MDS already. */
599 if (it->it_flags & FMODE_WRITE) {
600 och_p = &lli->lli_mds_write_och;
601 och_usecount = &lli->lli_open_fd_write_count;
602 } else if (it->it_flags & FMODE_EXEC) {
603 och_p = &lli->lli_mds_exec_och;
604 och_usecount = &lli->lli_open_fd_exec_count;
605 } else {
606 och_p = &lli->lli_mds_read_och;
607 och_usecount = &lli->lli_open_fd_read_count;
608 }
609
610 mutex_lock(&lli->lli_och_mutex);
611 if (*och_p) { /* Open handle is present */
612 if (it_disposition(it, DISP_OPEN_OPEN)) {
613 /* Well, there's extra open request that we do not need,
614 * let's close it somehow. This will decref request.
615 */
616 rc = it_open_error(DISP_OPEN_OPEN, it);
617 if (rc) {
618 mutex_unlock(&lli->lli_och_mutex);
619 goto out_openerr;
620 }
621
622 ll_release_openhandle(inode, it);
623 }
624 (*och_usecount)++;
625
626 rc = ll_local_open(file, it, fd, NULL);
627 if (rc) {
628 (*och_usecount)--;
629 mutex_unlock(&lli->lli_och_mutex);
630 goto out_openerr;
631 }
632 } else {
633 LASSERT(*och_usecount == 0);
634 if (!it->it_disposition) {
635 /* We cannot just request lock handle now, new ELC code
636 * means that one of other OPEN locks for this file
637 * could be cancelled, and since blocking ast handler
638 * would attempt to grab och_mutex as well, that would
639 * result in a deadlock
640 */
641 mutex_unlock(&lli->lli_och_mutex);
642 /*
643 * Normally called under two situations:
644 * 1. NFS export.
645 * 2. revalidate with IT_OPEN (revalidate doesn't
646 * execute this intent any more).
647 *
648 * Always fetch MDS_OPEN_LOCK if this is not setstripe.
649 *
650 * Always specify MDS_OPEN_BY_FID because we don't want
651 * to get file with different fid.
652 */
653 it->it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID;
654 rc = ll_intent_file_open(file->f_path.dentry, NULL, 0, it);
655 if (rc)
656 goto out_openerr;
657
658 goto restart;
659 }
660 *och_p = kzalloc(sizeof(struct obd_client_handle), GFP_NOFS);
661 if (!*och_p) {
662 rc = -ENOMEM;
663 goto out_och_free;
664 }
665
666 (*och_usecount)++;
667
668 /* md_intent_lock() didn't get a request ref if there was an
669 * open error, so don't do cleanup on the request here
670 * (bug 3430)
671 */
672 /* XXX (green): Should not we bail out on any error here, not
673 * just open error?
674 */
675 rc = it_open_error(DISP_OPEN_OPEN, it);
676 if (rc)
677 goto out_och_free;
678
679 LASSERTF(it_disposition(it, DISP_ENQ_OPEN_REF),
680 "inode %p: disposition %x, status %d\n", inode,
681 it_disposition(it, ~0), it->it_status);
682
683 rc = ll_local_open(file, it, fd, *och_p);
684 if (rc)
685 goto out_och_free;
686 }
687 mutex_unlock(&lli->lli_och_mutex);
688 fd = NULL;
689
690 /* Must do this outside lli_och_mutex lock to prevent deadlock where
691 * different kind of OPEN lock for this same inode gets cancelled
692 * by ldlm_cancel_lru
693 */
694 if (!S_ISREG(inode->i_mode))
695 goto out_och_free;
696
697 if (!lli->lli_has_smd &&
698 (cl_is_lov_delay_create(file->f_flags) ||
699 (file->f_mode & FMODE_WRITE) == 0)) {
700 CDEBUG(D_INODE, "object creation was delayed\n");
701 goto out_och_free;
702 }
703 cl_lov_delay_create_clear(&file->f_flags);
704 goto out_och_free;
705
706 out_och_free:
707 if (rc) {
708 if (och_p && *och_p) {
709 kfree(*och_p);
710 *och_p = NULL;
711 (*och_usecount)--;
712 }
713 mutex_unlock(&lli->lli_och_mutex);
714
715 out_openerr:
716 if (opendir_set != 0)
717 ll_stop_statahead(inode, lli->lli_opendir_key);
718 ll_file_data_put(fd);
719 } else {
720 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
721 }
722
723 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
724 ptlrpc_req_finished(it->it_request);
725 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
726 }
727
728 return rc;
729 }
730
731 static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
732 struct ldlm_lock_desc *desc,
733 void *data, int flag)
734 {
735 int rc;
736 struct lustre_handle lockh;
737
738 switch (flag) {
739 case LDLM_CB_BLOCKING:
740 ldlm_lock2handle(lock, &lockh);
741 rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
742 if (rc < 0) {
743 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
744 return rc;
745 }
746 break;
747 case LDLM_CB_CANCELING:
748 /* do nothing */
749 break;
750 }
751 return 0;
752 }
753
754 /**
755 * Acquire a lease and open the file.
756 */
757 static struct obd_client_handle *
758 ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
759 __u64 open_flags)
760 {
761 struct lookup_intent it = { .it_op = IT_OPEN };
762 struct ll_sb_info *sbi = ll_i2sbi(inode);
763 struct md_op_data *op_data;
764 struct ptlrpc_request *req = NULL;
765 struct lustre_handle old_handle = { 0 };
766 struct obd_client_handle *och = NULL;
767 int rc;
768 int rc2;
769
770 if (fmode != FMODE_WRITE && fmode != FMODE_READ)
771 return ERR_PTR(-EINVAL);
772
773 if (file) {
774 struct ll_inode_info *lli = ll_i2info(inode);
775 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
776 struct obd_client_handle **och_p;
777 __u64 *och_usecount;
778
779 if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
780 return ERR_PTR(-EPERM);
781
782 /* Get the openhandle of the file */
783 rc = -EBUSY;
784 mutex_lock(&lli->lli_och_mutex);
785 if (fd->fd_lease_och) {
786 mutex_unlock(&lli->lli_och_mutex);
787 return ERR_PTR(rc);
788 }
789
790 if (!fd->fd_och) {
791 if (file->f_mode & FMODE_WRITE) {
792 LASSERT(lli->lli_mds_write_och);
793 och_p = &lli->lli_mds_write_och;
794 och_usecount = &lli->lli_open_fd_write_count;
795 } else {
796 LASSERT(lli->lli_mds_read_och);
797 och_p = &lli->lli_mds_read_och;
798 och_usecount = &lli->lli_open_fd_read_count;
799 }
800 if (*och_usecount == 1) {
801 fd->fd_och = *och_p;
802 *och_p = NULL;
803 *och_usecount = 0;
804 rc = 0;
805 }
806 }
807 mutex_unlock(&lli->lli_och_mutex);
808 if (rc < 0) /* more than 1 opener */
809 return ERR_PTR(rc);
810
811 LASSERT(fd->fd_och);
812 old_handle = fd->fd_och->och_fh;
813 }
814
815 och = kzalloc(sizeof(*och), GFP_NOFS);
816 if (!och)
817 return ERR_PTR(-ENOMEM);
818
819 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
820 LUSTRE_OPC_ANY, NULL);
821 if (IS_ERR(op_data)) {
822 rc = PTR_ERR(op_data);
823 goto out;
824 }
825
826 /* To tell the MDT this openhandle is from the same owner */
827 op_data->op_handle = old_handle;
828
829 it.it_flags = fmode | open_flags;
830 it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
831 rc = md_intent_lock(sbi->ll_md_exp, op_data, &it, &req,
832 &ll_md_blocking_lease_ast,
833 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
834 * it can be cancelled which may mislead applications that the lease is
835 * broken;
836 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
837 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
838 * doesn't deal with openhandle, so normal openhandle will be leaked.
839 */
840 LDLM_FL_NO_LRU | LDLM_FL_EXCL);
841 ll_finish_md_op_data(op_data);
842 ptlrpc_req_finished(req);
843 if (rc < 0)
844 goto out_release_it;
845
846 if (it_disposition(&it, DISP_LOOKUP_NEG)) {
847 rc = -ENOENT;
848 goto out_release_it;
849 }
850
851 rc = it_open_error(DISP_OPEN_OPEN, &it);
852 if (rc)
853 goto out_release_it;
854
855 LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
856 ll_och_fill(sbi->ll_md_exp, &it, och);
857
858 if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */ {
859 rc = -EOPNOTSUPP;
860 goto out_close;
861 }
862
863 /* already get lease, handle lease lock */
864 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
865 if (it.it_lock_mode == 0 ||
866 it.it_lock_bits != MDS_INODELOCK_OPEN) {
867 /* open lock must return for lease */
868 CERROR(DFID "lease granted but no open lock, %d/%llu.\n",
869 PFID(ll_inode2fid(inode)), it.it_lock_mode,
870 it.it_lock_bits);
871 rc = -EPROTO;
872 goto out_close;
873 }
874
875 ll_intent_release(&it);
876 return och;
877
878 out_close:
879 /* Cancel open lock */
880 if (it.it_lock_mode != 0) {
881 ldlm_lock_decref_and_cancel(&och->och_lease_handle,
882 it.it_lock_mode);
883 it.it_lock_mode = 0;
884 och->och_lease_handle.cookie = 0ULL;
885 }
886 rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL);
887 if (rc2 < 0)
888 CERROR("%s: error closing file "DFID": %d\n",
889 ll_get_fsname(inode->i_sb, NULL, 0),
890 PFID(&ll_i2info(inode)->lli_fid), rc2);
891 och = NULL; /* och has been freed in ll_close_inode_openhandle() */
892 out_release_it:
893 ll_intent_release(&it);
894 out:
895 kfree(och);
896 return ERR_PTR(rc);
897 }
898
899 /**
900 * Release lease and close the file.
901 * It will check if the lease has ever broken.
902 */
903 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
904 bool *lease_broken)
905 {
906 struct ldlm_lock *lock;
907 bool cancelled = true;
908 int rc;
909
910 lock = ldlm_handle2lock(&och->och_lease_handle);
911 if (lock) {
912 lock_res_and_lock(lock);
913 cancelled = ldlm_is_cancel(lock);
914 unlock_res_and_lock(lock);
915 LDLM_LOCK_PUT(lock);
916 }
917
918 CDEBUG(D_INODE, "lease for " DFID " broken? %d\n",
919 PFID(&ll_i2info(inode)->lli_fid), cancelled);
920
921 if (!cancelled)
922 ldlm_cli_cancel(&och->och_lease_handle, 0);
923 if (lease_broken)
924 *lease_broken = cancelled;
925
926 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
927 NULL);
928 return rc;
929 }
930
931 /* Fills the obdo with the attributes for the lsm */
932 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
933 struct obdo *obdo, __u64 ioepoch, int dv_flags)
934 {
935 struct ptlrpc_request_set *set;
936 struct obd_info oinfo = { };
937 int rc;
938
939 LASSERT(lsm);
940
941 oinfo.oi_md = lsm;
942 oinfo.oi_oa = obdo;
943 oinfo.oi_oa->o_oi = lsm->lsm_oi;
944 oinfo.oi_oa->o_mode = S_IFREG;
945 oinfo.oi_oa->o_ioepoch = ioepoch;
946 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
947 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
948 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
949 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
950 OBD_MD_FLGROUP | OBD_MD_FLEPOCH |
951 OBD_MD_FLDATAVERSION;
952 if (dv_flags & (LL_DV_WR_FLUSH | LL_DV_RD_FLUSH)) {
953 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
954 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
955 if (dv_flags & LL_DV_WR_FLUSH)
956 oinfo.oi_oa->o_flags |= OBD_FL_FLUSH;
957 }
958
959 set = ptlrpc_prep_set();
960 if (!set) {
961 CERROR("cannot allocate ptlrpc set: rc = %d\n", -ENOMEM);
962 rc = -ENOMEM;
963 } else {
964 rc = obd_getattr_async(exp, &oinfo, set);
965 if (rc == 0)
966 rc = ptlrpc_set_wait(set);
967 ptlrpc_set_destroy(set);
968 }
969 if (rc == 0) {
970 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
971 OBD_MD_FLATIME | OBD_MD_FLMTIME |
972 OBD_MD_FLCTIME | OBD_MD_FLSIZE |
973 OBD_MD_FLDATAVERSION | OBD_MD_FLFLAGS);
974 if (dv_flags & LL_DV_WR_FLUSH &&
975 !(oinfo.oi_oa->o_valid & OBD_MD_FLFLAGS &&
976 oinfo.oi_oa->o_flags & OBD_FL_FLUSH))
977 return -ENOTSUPP;
978 }
979 return rc;
980 }
981
982 /**
983 * Performs the getattr on the inode and updates its fields.
984 * If @sync != 0, perform the getattr under the server-side lock.
985 */
986 int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
987 __u64 ioepoch, int sync)
988 {
989 struct lov_stripe_md *lsm;
990 int rc;
991
992 lsm = ccc_inode_lsm_get(inode);
993 rc = ll_lsm_getattr(lsm, ll_i2dtexp(inode),
994 obdo, ioepoch, sync ? LL_DV_RD_FLUSH : 0);
995 if (rc == 0) {
996 struct ost_id *oi = lsm ? &lsm->lsm_oi : &obdo->o_oi;
997
998 obdo_refresh_inode(inode, obdo, obdo->o_valid);
999 CDEBUG(D_INODE, "objid " DOSTID " size %llu, blocks %llu, blksize %lu\n",
1000 POSTID(oi), i_size_read(inode),
1001 (unsigned long long)inode->i_blocks,
1002 1UL << inode->i_blkbits);
1003 }
1004 ccc_inode_lsm_put(inode, lsm);
1005 return rc;
1006 }
1007
1008 int ll_merge_attr(const struct lu_env *env, struct inode *inode)
1009 {
1010 struct ll_inode_info *lli = ll_i2info(inode);
1011 struct cl_object *obj = lli->lli_clob;
1012 struct cl_attr *attr = vvp_env_thread_attr(env);
1013 s64 atime;
1014 s64 mtime;
1015 s64 ctime;
1016 int rc = 0;
1017
1018 ll_inode_size_lock(inode);
1019
1020 /* merge timestamps the most recently obtained from mds with
1021 * timestamps obtained from osts
1022 */
1023 LTIME_S(inode->i_atime) = lli->lli_atime;
1024 LTIME_S(inode->i_mtime) = lli->lli_mtime;
1025 LTIME_S(inode->i_ctime) = lli->lli_ctime;
1026
1027 mtime = LTIME_S(inode->i_mtime);
1028 atime = LTIME_S(inode->i_atime);
1029 ctime = LTIME_S(inode->i_ctime);
1030
1031 cl_object_attr_lock(obj);
1032 rc = cl_object_attr_get(env, obj, attr);
1033 cl_object_attr_unlock(obj);
1034
1035 if (rc != 0)
1036 goto out_size_unlock;
1037
1038 if (atime < attr->cat_atime)
1039 atime = attr->cat_atime;
1040
1041 if (ctime < attr->cat_ctime)
1042 ctime = attr->cat_ctime;
1043
1044 if (mtime < attr->cat_mtime)
1045 mtime = attr->cat_mtime;
1046
1047 CDEBUG(D_VFSTRACE, DFID " updating i_size %llu\n",
1048 PFID(&lli->lli_fid), attr->cat_size);
1049
1050 i_size_write(inode, attr->cat_size);
1051
1052 inode->i_blocks = attr->cat_blocks;
1053
1054 LTIME_S(inode->i_mtime) = mtime;
1055 LTIME_S(inode->i_atime) = atime;
1056 LTIME_S(inode->i_ctime) = ctime;
1057
1058 out_size_unlock:
1059 ll_inode_size_unlock(inode);
1060
1061 return rc;
1062 }
1063
1064 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
1065 lstat_t *st)
1066 {
1067 struct obdo obdo = { 0 };
1068 int rc;
1069
1070 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, &obdo, 0, 0);
1071 if (rc == 0) {
1072 st->st_size = obdo.o_size;
1073 st->st_blocks = obdo.o_blocks;
1074 st->st_mtime = obdo.o_mtime;
1075 st->st_atime = obdo.o_atime;
1076 st->st_ctime = obdo.o_ctime;
1077 }
1078 return rc;
1079 }
1080
1081 static bool file_is_noatime(const struct file *file)
1082 {
1083 const struct vfsmount *mnt = file->f_path.mnt;
1084 const struct inode *inode = file_inode(file);
1085
1086 /* Adapted from file_accessed() and touch_atime().*/
1087 if (file->f_flags & O_NOATIME)
1088 return true;
1089
1090 if (inode->i_flags & S_NOATIME)
1091 return true;
1092
1093 if (IS_NOATIME(inode))
1094 return true;
1095
1096 if (mnt->mnt_flags & (MNT_NOATIME | MNT_READONLY))
1097 return true;
1098
1099 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1100 return true;
1101
1102 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
1103 return true;
1104
1105 return false;
1106 }
1107
1108 void ll_io_init(struct cl_io *io, const struct file *file, int write)
1109 {
1110 struct inode *inode = file_inode(file);
1111
1112 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
1113 if (write) {
1114 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
1115 io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
1116 file->f_flags & O_DIRECT ||
1117 IS_SYNC(inode);
1118 }
1119 io->ci_obj = ll_i2info(inode)->lli_clob;
1120 io->ci_lockreq = CILR_MAYBE;
1121 if (ll_file_nolock(file)) {
1122 io->ci_lockreq = CILR_NEVER;
1123 io->ci_no_srvlock = 1;
1124 } else if (file->f_flags & O_APPEND) {
1125 io->ci_lockreq = CILR_MANDATORY;
1126 }
1127
1128 io->ci_noatime = file_is_noatime(file);
1129 }
1130
1131 static ssize_t
1132 ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
1133 struct file *file, enum cl_io_type iot,
1134 loff_t *ppos, size_t count)
1135 {
1136 struct ll_inode_info *lli = ll_i2info(file_inode(file));
1137 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1138 struct cl_io *io;
1139 ssize_t result;
1140
1141 CDEBUG(D_VFSTRACE, "file: %pD, type: %d ppos: %llu, count: %zd\n",
1142 file, iot, *ppos, count);
1143
1144 restart:
1145 io = vvp_env_thread_io(env);
1146 ll_io_init(io, file, iot == CIT_WRITE);
1147
1148 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
1149 struct vvp_io *vio = vvp_env_io(env);
1150 int write_mutex_locked = 0;
1151
1152 vio->vui_fd = LUSTRE_FPRIVATE(file);
1153 vio->vui_io_subtype = args->via_io_subtype;
1154
1155 switch (vio->vui_io_subtype) {
1156 case IO_NORMAL:
1157 vio->vui_iter = args->u.normal.via_iter;
1158 vio->vui_iocb = args->u.normal.via_iocb;
1159 if ((iot == CIT_WRITE) &&
1160 !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1161 if (mutex_lock_interruptible(&lli->
1162 lli_write_mutex)) {
1163 result = -ERESTARTSYS;
1164 goto out;
1165 }
1166 write_mutex_locked = 1;
1167 }
1168 down_read(&lli->lli_trunc_sem);
1169 break;
1170 case IO_SPLICE:
1171 vio->u.splice.vui_pipe = args->u.splice.via_pipe;
1172 vio->u.splice.vui_flags = args->u.splice.via_flags;
1173 break;
1174 default:
1175 CERROR("Unknown IO type - %u\n", vio->vui_io_subtype);
1176 LBUG();
1177 }
1178 ll_cl_add(file, env, io);
1179 result = cl_io_loop(env, io);
1180 ll_cl_remove(file, env);
1181 if (args->via_io_subtype == IO_NORMAL)
1182 up_read(&lli->lli_trunc_sem);
1183 if (write_mutex_locked)
1184 mutex_unlock(&lli->lli_write_mutex);
1185 } else {
1186 /* cl_io_rw_init() handled IO */
1187 result = io->ci_result;
1188 }
1189
1190 if (io->ci_nob > 0) {
1191 result = io->ci_nob;
1192 *ppos = io->u.ci_wr.wr.crw_pos;
1193 }
1194 goto out;
1195 out:
1196 cl_io_fini(env, io);
1197 /* If any bit been read/written (result != 0), we just return
1198 * short read/write instead of restart io.
1199 */
1200 if ((result == 0 || result == -ENODATA) && io->ci_need_restart) {
1201 CDEBUG(D_VFSTRACE, "Restart %s on %pD from %lld, count:%zd\n",
1202 iot == CIT_READ ? "read" : "write",
1203 file, *ppos, count);
1204 LASSERTF(io->ci_nob == 0, "%zd\n", io->ci_nob);
1205 goto restart;
1206 }
1207
1208 if (iot == CIT_READ) {
1209 if (result >= 0)
1210 ll_stats_ops_tally(ll_i2sbi(file_inode(file)),
1211 LPROC_LL_READ_BYTES, result);
1212 } else if (iot == CIT_WRITE) {
1213 if (result >= 0) {
1214 ll_stats_ops_tally(ll_i2sbi(file_inode(file)),
1215 LPROC_LL_WRITE_BYTES, result);
1216 fd->fd_write_failed = false;
1217 } else if (result != -ERESTARTSYS) {
1218 fd->fd_write_failed = true;
1219 }
1220 }
1221 CDEBUG(D_VFSTRACE, "iot: %d, result: %zd\n", iot, result);
1222
1223 return result;
1224 }
1225
1226 static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
1227 {
1228 struct lu_env *env;
1229 struct vvp_io_args *args;
1230 ssize_t result;
1231 int refcheck;
1232
1233 env = cl_env_get(&refcheck);
1234 if (IS_ERR(env))
1235 return PTR_ERR(env);
1236
1237 args = ll_env_args(env, IO_NORMAL);
1238 args->u.normal.via_iter = to;
1239 args->u.normal.via_iocb = iocb;
1240
1241 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1242 &iocb->ki_pos, iov_iter_count(to));
1243 cl_env_put(env, &refcheck);
1244 return result;
1245 }
1246
1247 /*
1248 * Write to a file (through the page cache).
1249 */
1250 static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
1251 {
1252 struct lu_env *env;
1253 struct vvp_io_args *args;
1254 ssize_t result;
1255 int refcheck;
1256
1257 env = cl_env_get(&refcheck);
1258 if (IS_ERR(env))
1259 return PTR_ERR(env);
1260
1261 args = ll_env_args(env, IO_NORMAL);
1262 args->u.normal.via_iter = from;
1263 args->u.normal.via_iocb = iocb;
1264
1265 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1266 &iocb->ki_pos, iov_iter_count(from));
1267 cl_env_put(env, &refcheck);
1268 return result;
1269 }
1270
1271 /*
1272 * Send file content (through pagecache) somewhere with helper
1273 */
1274 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1275 struct pipe_inode_info *pipe, size_t count,
1276 unsigned int flags)
1277 {
1278 struct lu_env *env;
1279 struct vvp_io_args *args;
1280 ssize_t result;
1281 int refcheck;
1282
1283 env = cl_env_get(&refcheck);
1284 if (IS_ERR(env))
1285 return PTR_ERR(env);
1286
1287 args = ll_env_args(env, IO_SPLICE);
1288 args->u.splice.via_pipe = pipe;
1289 args->u.splice.via_flags = flags;
1290
1291 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1292 cl_env_put(env, &refcheck);
1293 return result;
1294 }
1295
1296 static int ll_lov_recreate(struct inode *inode, struct ost_id *oi, u32 ost_idx)
1297 {
1298 struct obd_export *exp = ll_i2dtexp(inode);
1299 struct obd_trans_info oti = { 0 };
1300 struct obdo *oa = NULL;
1301 int lsm_size;
1302 int rc = 0;
1303 struct lov_stripe_md *lsm = NULL, *lsm2;
1304
1305 oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS);
1306 if (!oa)
1307 return -ENOMEM;
1308
1309 lsm = ccc_inode_lsm_get(inode);
1310 if (!lsm_has_objects(lsm)) {
1311 rc = -ENOENT;
1312 goto out;
1313 }
1314
1315 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1316 (lsm->lsm_stripe_count));
1317
1318 lsm2 = libcfs_kvzalloc(lsm_size, GFP_NOFS);
1319 if (!lsm2) {
1320 rc = -ENOMEM;
1321 goto out;
1322 }
1323
1324 oa->o_oi = *oi;
1325 oa->o_nlink = ost_idx;
1326 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1327 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1328 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1329 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1330 obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid);
1331 memcpy(lsm2, lsm, lsm_size);
1332 ll_inode_size_lock(inode);
1333 rc = obd_create(NULL, exp, oa, &lsm2, &oti);
1334 ll_inode_size_unlock(inode);
1335
1336 kvfree(lsm2);
1337 goto out;
1338 out:
1339 ccc_inode_lsm_put(inode, lsm);
1340 kmem_cache_free(obdo_cachep, oa);
1341 return rc;
1342 }
1343
1344 static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg)
1345 {
1346 struct ll_recreate_obj ucreat;
1347 struct ost_id oi;
1348
1349 if (!capable(CFS_CAP_SYS_ADMIN))
1350 return -EPERM;
1351
1352 if (copy_from_user(&ucreat, (struct ll_recreate_obj __user *)arg,
1353 sizeof(ucreat)))
1354 return -EFAULT;
1355
1356 ostid_set_seq_mdt0(&oi);
1357 ostid_set_id(&oi, ucreat.lrc_id);
1358 return ll_lov_recreate(inode, &oi, ucreat.lrc_ost_idx);
1359 }
1360
1361 static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
1362 {
1363 struct lu_fid fid;
1364 struct ost_id oi;
1365 u32 ost_idx;
1366
1367 if (!capable(CFS_CAP_SYS_ADMIN))
1368 return -EPERM;
1369
1370 if (copy_from_user(&fid, (struct lu_fid __user *)arg, sizeof(fid)))
1371 return -EFAULT;
1372
1373 fid_to_ostid(&fid, &oi);
1374 ost_idx = (fid_seq(&fid) >> 16) & 0xffff;
1375 return ll_lov_recreate(inode, &oi, ost_idx);
1376 }
1377
1378 int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
1379 __u64 flags, struct lov_user_md *lum,
1380 int lum_size)
1381 {
1382 struct lov_stripe_md *lsm = NULL;
1383 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1384 int rc = 0;
1385
1386 lsm = ccc_inode_lsm_get(inode);
1387 if (lsm) {
1388 ccc_inode_lsm_put(inode, lsm);
1389 CDEBUG(D_IOCTL, "stripe already exists for inode "DFID"\n",
1390 PFID(ll_inode2fid(inode)));
1391 rc = -EEXIST;
1392 goto out;
1393 }
1394
1395 ll_inode_size_lock(inode);
1396 oit.it_flags |= MDS_OPEN_BY_FID;
1397 rc = ll_intent_file_open(dentry, lum, lum_size, &oit);
1398 if (rc)
1399 goto out_unlock;
1400 rc = oit.it_status;
1401 if (rc < 0)
1402 goto out_req_free;
1403
1404 ll_release_openhandle(inode, &oit);
1405
1406 out_unlock:
1407 ll_inode_size_unlock(inode);
1408 ll_intent_release(&oit);
1409 ccc_inode_lsm_put(inode, lsm);
1410 out:
1411 return rc;
1412 out_req_free:
1413 ptlrpc_req_finished((struct ptlrpc_request *)oit.it_request);
1414 goto out;
1415 }
1416
1417 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1418 struct lov_mds_md **lmmp, int *lmm_size,
1419 struct ptlrpc_request **request)
1420 {
1421 struct ll_sb_info *sbi = ll_i2sbi(inode);
1422 struct mdt_body *body;
1423 struct lov_mds_md *lmm = NULL;
1424 struct ptlrpc_request *req = NULL;
1425 struct md_op_data *op_data;
1426 int rc, lmmsize;
1427
1428 rc = ll_get_default_mdsize(sbi, &lmmsize);
1429 if (rc)
1430 return rc;
1431
1432 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1433 strlen(filename), lmmsize,
1434 LUSTRE_OPC_ANY, NULL);
1435 if (IS_ERR(op_data))
1436 return PTR_ERR(op_data);
1437
1438 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1439 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1440 ll_finish_md_op_data(op_data);
1441 if (rc < 0) {
1442 CDEBUG(D_INFO, "md_getattr_name failed on %s: rc %d\n",
1443 filename, rc);
1444 goto out;
1445 }
1446
1447 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1448
1449 lmmsize = body->mbo_eadatasize;
1450
1451 if (!(body->mbo_valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1452 lmmsize == 0) {
1453 rc = -ENODATA;
1454 goto out;
1455 }
1456
1457 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1458
1459 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1460 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1461 rc = -EPROTO;
1462 goto out;
1463 }
1464
1465 /*
1466 * This is coming from the MDS, so is probably in
1467 * little endian. We convert it to host endian before
1468 * passing it to userspace.
1469 */
1470 if (cpu_to_le32(LOV_MAGIC) != LOV_MAGIC) {
1471 int stripe_count;
1472
1473 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
1474 if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED)
1475 stripe_count = 0;
1476
1477 /* if function called for directory - we should
1478 * avoid swab not existent lsm objects
1479 */
1480 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1481 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1482 if (S_ISREG(body->mbo_mode))
1483 lustre_swab_lov_user_md_objects(
1484 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1485 stripe_count);
1486 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1487 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1488 if (S_ISREG(body->mbo_mode))
1489 lustre_swab_lov_user_md_objects(
1490 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1491 stripe_count);
1492 }
1493 }
1494
1495 out:
1496 *lmmp = lmm;
1497 *lmm_size = lmmsize;
1498 *request = req;
1499 return rc;
1500 }
1501
1502 static int ll_lov_setea(struct inode *inode, struct file *file,
1503 unsigned long arg)
1504 {
1505 __u64 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1506 struct lov_user_md *lump;
1507 int lum_size = sizeof(struct lov_user_md) +
1508 sizeof(struct lov_user_ost_data);
1509 int rc;
1510
1511 if (!capable(CFS_CAP_SYS_ADMIN))
1512 return -EPERM;
1513
1514 lump = libcfs_kvzalloc(lum_size, GFP_NOFS);
1515 if (!lump)
1516 return -ENOMEM;
1517
1518 if (copy_from_user(lump, (struct lov_user_md __user *)arg, lum_size)) {
1519 kvfree(lump);
1520 return -EFAULT;
1521 }
1522
1523 rc = ll_lov_setstripe_ea_info(inode, file->f_path.dentry, flags, lump,
1524 lum_size);
1525 cl_lov_delay_create_clear(&file->f_flags);
1526
1527 kvfree(lump);
1528 return rc;
1529 }
1530
1531 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1532 unsigned long arg)
1533 {
1534 struct lov_user_md_v3 lumv3;
1535 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1536 struct lov_user_md_v1 __user *lumv1p = (void __user *)arg;
1537 struct lov_user_md_v3 __user *lumv3p = (void __user *)arg;
1538 int lum_size, rc;
1539 __u64 flags = FMODE_WRITE;
1540
1541 /* first try with v1 which is smaller than v3 */
1542 lum_size = sizeof(struct lov_user_md_v1);
1543 if (copy_from_user(lumv1, lumv1p, lum_size))
1544 return -EFAULT;
1545
1546 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1547 lum_size = sizeof(struct lov_user_md_v3);
1548 if (copy_from_user(&lumv3, lumv3p, lum_size))
1549 return -EFAULT;
1550 }
1551
1552 rc = ll_lov_setstripe_ea_info(inode, file->f_path.dentry, flags, lumv1,
1553 lum_size);
1554 cl_lov_delay_create_clear(&file->f_flags);
1555 if (rc == 0) {
1556 struct lov_stripe_md *lsm;
1557 __u32 gen;
1558
1559 put_user(0, &lumv1p->lmm_stripe_count);
1560
1561 ll_layout_refresh(inode, &gen);
1562 lsm = ccc_inode_lsm_get(inode);
1563 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1564 0, lsm, (void __user *)arg);
1565 ccc_inode_lsm_put(inode, lsm);
1566 }
1567 return rc;
1568 }
1569
1570 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1571 {
1572 struct lov_stripe_md *lsm;
1573 int rc = -ENODATA;
1574
1575 lsm = ccc_inode_lsm_get(inode);
1576 if (lsm)
1577 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0,
1578 lsm, (void __user *)arg);
1579 ccc_inode_lsm_put(inode, lsm);
1580 return rc;
1581 }
1582
1583 static int
1584 ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1585 {
1586 struct ll_inode_info *lli = ll_i2info(inode);
1587 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1588 struct ll_grouplock grouplock;
1589 int rc;
1590
1591 if (arg == 0) {
1592 CWARN("group id for group lock must not be 0\n");
1593 return -EINVAL;
1594 }
1595
1596 if (ll_file_nolock(file))
1597 return -EOPNOTSUPP;
1598
1599 spin_lock(&lli->lli_lock);
1600 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1601 CWARN("group lock already existed with gid %lu\n",
1602 fd->fd_grouplock.lg_gid);
1603 spin_unlock(&lli->lli_lock);
1604 return -EINVAL;
1605 }
1606 LASSERT(!fd->fd_grouplock.lg_lock);
1607 spin_unlock(&lli->lli_lock);
1608
1609 rc = cl_get_grouplock(ll_i2info(inode)->lli_clob,
1610 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1611 if (rc)
1612 return rc;
1613
1614 spin_lock(&lli->lli_lock);
1615 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1616 spin_unlock(&lli->lli_lock);
1617 CERROR("another thread just won the race\n");
1618 cl_put_grouplock(&grouplock);
1619 return -EINVAL;
1620 }
1621
1622 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1623 fd->fd_grouplock = grouplock;
1624 spin_unlock(&lli->lli_lock);
1625
1626 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1627 return 0;
1628 }
1629
1630 static int ll_put_grouplock(struct inode *inode, struct file *file,
1631 unsigned long arg)
1632 {
1633 struct ll_inode_info *lli = ll_i2info(inode);
1634 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1635 struct ll_grouplock grouplock;
1636
1637 spin_lock(&lli->lli_lock);
1638 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1639 spin_unlock(&lli->lli_lock);
1640 CWARN("no group lock held\n");
1641 return -EINVAL;
1642 }
1643 LASSERT(fd->fd_grouplock.lg_lock);
1644
1645 if (fd->fd_grouplock.lg_gid != arg) {
1646 CWARN("group lock %lu doesn't match current id %lu\n",
1647 arg, fd->fd_grouplock.lg_gid);
1648 spin_unlock(&lli->lli_lock);
1649 return -EINVAL;
1650 }
1651
1652 grouplock = fd->fd_grouplock;
1653 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1654 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1655 spin_unlock(&lli->lli_lock);
1656
1657 cl_put_grouplock(&grouplock);
1658 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1659 return 0;
1660 }
1661
1662 /**
1663 * Close inode open handle
1664 *
1665 * \param inode [in] inode in question
1666 * \param it [in,out] intent which contains open info and result
1667 *
1668 * \retval 0 success
1669 * \retval <0 failure
1670 */
1671 int ll_release_openhandle(struct inode *inode, struct lookup_intent *it)
1672 {
1673 struct obd_client_handle *och;
1674 int rc;
1675
1676 LASSERT(inode);
1677
1678 /* Root ? Do nothing. */
1679 if (is_root_inode(inode))
1680 return 0;
1681
1682 /* No open handle to close? Move away */
1683 if (!it_disposition(it, DISP_OPEN_OPEN))
1684 return 0;
1685
1686 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1687
1688 och = kzalloc(sizeof(*och), GFP_NOFS);
1689 if (!och) {
1690 rc = -ENOMEM;
1691 goto out;
1692 }
1693
1694 ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
1695
1696 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1697 inode, och, NULL);
1698 out:
1699 /* this one is in place of ll_file_open */
1700 if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
1701 ptlrpc_req_finished(it->it_request);
1702 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1703 }
1704 return rc;
1705 }
1706
1707 /**
1708 * Get size for inode for which FIEMAP mapping is requested.
1709 * Make the FIEMAP get_info call and returns the result.
1710 */
1711 static int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1712 size_t num_bytes)
1713 {
1714 struct obd_export *exp = ll_i2dtexp(inode);
1715 struct lov_stripe_md *lsm = NULL;
1716 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1717 __u32 vallen = num_bytes;
1718 int rc;
1719
1720 /* Checks for fiemap flags */
1721 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1722 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1723 return -EBADR;
1724 }
1725
1726 /* Check for FIEMAP_FLAG_SYNC */
1727 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1728 rc = filemap_fdatawrite(inode->i_mapping);
1729 if (rc)
1730 return rc;
1731 }
1732
1733 lsm = ccc_inode_lsm_get(inode);
1734 if (!lsm)
1735 return -ENOENT;
1736
1737 /* If the stripe_count > 1 and the application does not understand
1738 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1739 */
1740 if (lsm->lsm_stripe_count > 1 &&
1741 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER)) {
1742 rc = -EOPNOTSUPP;
1743 goto out;
1744 }
1745
1746 fm_key.oa.o_oi = lsm->lsm_oi;
1747 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1748
1749 if (i_size_read(inode) == 0) {
1750 rc = ll_glimpse_size(inode);
1751 if (rc)
1752 goto out;
1753 }
1754
1755 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLSIZE);
1756 obdo_set_parent_fid(&fm_key.oa, &ll_i2info(inode)->lli_fid);
1757 /* If filesize is 0, then there would be no objects for mapping */
1758 if (fm_key.oa.o_size == 0) {
1759 fiemap->fm_mapped_extents = 0;
1760 rc = 0;
1761 goto out;
1762 }
1763
1764 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1765
1766 rc = obd_get_info(NULL, exp, sizeof(fm_key), &fm_key, &vallen,
1767 fiemap, lsm);
1768 if (rc)
1769 CERROR("obd_get_info failed: rc = %d\n", rc);
1770
1771 out:
1772 ccc_inode_lsm_put(inode, lsm);
1773 return rc;
1774 }
1775
1776 int ll_fid2path(struct inode *inode, void __user *arg)
1777 {
1778 struct obd_export *exp = ll_i2mdexp(inode);
1779 const struct getinfo_fid2path __user *gfin = arg;
1780 struct getinfo_fid2path *gfout;
1781 u32 pathlen;
1782 size_t outsize;
1783 int rc;
1784
1785 if (!capable(CFS_CAP_DAC_READ_SEARCH) &&
1786 !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
1787 return -EPERM;
1788
1789 /* Only need to get the buflen */
1790 if (get_user(pathlen, &gfin->gf_pathlen))
1791 return -EFAULT;
1792
1793 if (pathlen > PATH_MAX)
1794 return -EINVAL;
1795
1796 outsize = sizeof(*gfout) + pathlen;
1797
1798 gfout = kzalloc(outsize, GFP_NOFS);
1799 if (!gfout)
1800 return -ENOMEM;
1801
1802 if (copy_from_user(gfout, arg, sizeof(*gfout))) {
1803 rc = -EFAULT;
1804 goto gf_free;
1805 }
1806
1807 /* Call mdc_iocontrol */
1808 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1809 if (rc != 0)
1810 goto gf_free;
1811
1812 if (copy_to_user(arg, gfout, outsize))
1813 rc = -EFAULT;
1814
1815 gf_free:
1816 kfree(gfout);
1817 return rc;
1818 }
1819
1820 static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
1821 {
1822 struct ll_user_fiemap *fiemap_s;
1823 size_t num_bytes, ret_bytes;
1824 unsigned int extent_count;
1825 int rc = 0;
1826
1827 /* Get the extent count so we can calculate the size of
1828 * required fiemap buffer
1829 */
1830 if (get_user(extent_count,
1831 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1832 return -EFAULT;
1833
1834 if (extent_count >=
1835 (SIZE_MAX - sizeof(*fiemap_s)) / sizeof(struct ll_fiemap_extent))
1836 return -EINVAL;
1837 num_bytes = sizeof(*fiemap_s) + (extent_count *
1838 sizeof(struct ll_fiemap_extent));
1839
1840 fiemap_s = libcfs_kvzalloc(num_bytes, GFP_NOFS);
1841 if (!fiemap_s)
1842 return -ENOMEM;
1843
1844 /* get the fiemap value */
1845 if (copy_from_user(fiemap_s, (struct ll_user_fiemap __user *)arg,
1846 sizeof(*fiemap_s))) {
1847 rc = -EFAULT;
1848 goto error;
1849 }
1850
1851 /* If fm_extent_count is non-zero, read the first extent since
1852 * it is used to calculate end_offset and device from previous
1853 * fiemap call.
1854 */
1855 if (extent_count) {
1856 if (copy_from_user(&fiemap_s->fm_extents[0],
1857 (char __user *)arg + sizeof(*fiemap_s),
1858 sizeof(struct ll_fiemap_extent))) {
1859 rc = -EFAULT;
1860 goto error;
1861 }
1862 }
1863
1864 rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
1865 if (rc)
1866 goto error;
1867
1868 ret_bytes = sizeof(struct ll_user_fiemap);
1869
1870 if (extent_count != 0)
1871 ret_bytes += (fiemap_s->fm_mapped_extents *
1872 sizeof(struct ll_fiemap_extent));
1873
1874 if (copy_to_user((void __user *)arg, fiemap_s, ret_bytes))
1875 rc = -EFAULT;
1876
1877 error:
1878 kvfree(fiemap_s);
1879 return rc;
1880 }
1881
1882 /*
1883 * Read the data_version for inode.
1884 *
1885 * This value is computed using stripe object version on OST.
1886 * Version is computed using server side locking.
1887 *
1888 * @param sync if do sync on the OST side;
1889 * 0: no sync
1890 * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
1891 * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
1892 */
1893 int ll_data_version(struct inode *inode, __u64 *data_version, int flags)
1894 {
1895 struct lov_stripe_md *lsm = NULL;
1896 struct ll_sb_info *sbi = ll_i2sbi(inode);
1897 struct obdo *obdo = NULL;
1898 int rc;
1899
1900 /* If no stripe, we consider version is 0. */
1901 lsm = ccc_inode_lsm_get(inode);
1902 if (!lsm_has_objects(lsm)) {
1903 *data_version = 0;
1904 CDEBUG(D_INODE, "No object for inode\n");
1905 rc = 0;
1906 goto out;
1907 }
1908
1909 obdo = kzalloc(sizeof(*obdo), GFP_NOFS);
1910 if (!obdo) {
1911 rc = -ENOMEM;
1912 goto out;
1913 }
1914
1915 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, obdo, 0, flags);
1916 if (rc == 0) {
1917 if (!(obdo->o_valid & OBD_MD_FLDATAVERSION))
1918 rc = -EOPNOTSUPP;
1919 else
1920 *data_version = obdo->o_data_version;
1921 }
1922
1923 kfree(obdo);
1924 out:
1925 ccc_inode_lsm_put(inode, lsm);
1926 return rc;
1927 }
1928
1929 /*
1930 * Trigger a HSM release request for the provided inode.
1931 */
1932 int ll_hsm_release(struct inode *inode)
1933 {
1934 struct cl_env_nest nest;
1935 struct lu_env *env;
1936 struct obd_client_handle *och = NULL;
1937 __u64 data_version = 0;
1938 int rc;
1939
1940 CDEBUG(D_INODE, "%s: Releasing file "DFID".\n",
1941 ll_get_fsname(inode->i_sb, NULL, 0),
1942 PFID(&ll_i2info(inode)->lli_fid));
1943
1944 och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE);
1945 if (IS_ERR(och)) {
1946 rc = PTR_ERR(och);
1947 goto out;
1948 }
1949
1950 /* Grab latest data_version and [am]time values */
1951 rc = ll_data_version(inode, &data_version, LL_DV_WR_FLUSH);
1952 if (rc != 0)
1953 goto out;
1954
1955 env = cl_env_nested_get(&nest);
1956 if (IS_ERR(env)) {
1957 rc = PTR_ERR(env);
1958 goto out;
1959 }
1960
1961 ll_merge_attr(env, inode);
1962 cl_env_nested_put(&nest, env);
1963
1964 /* Release the file.
1965 * NB: lease lock handle is released in mdc_hsm_release_pack() because
1966 * we still need it to pack l_remote_handle to MDT.
1967 */
1968 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
1969 &data_version);
1970 och = NULL;
1971
1972 out:
1973 if (och && !IS_ERR(och)) /* close the file */
1974 ll_lease_close(och, inode, NULL);
1975
1976 return rc;
1977 }
1978
1979 struct ll_swap_stack {
1980 struct iattr ia1, ia2;
1981 __u64 dv1, dv2;
1982 struct inode *inode1, *inode2;
1983 bool check_dv1, check_dv2;
1984 };
1985
1986 static int ll_swap_layouts(struct file *file1, struct file *file2,
1987 struct lustre_swap_layouts *lsl)
1988 {
1989 struct mdc_swap_layouts msl;
1990 struct md_op_data *op_data;
1991 __u32 gid;
1992 __u64 dv;
1993 struct ll_swap_stack *llss = NULL;
1994 int rc;
1995
1996 llss = kzalloc(sizeof(*llss), GFP_NOFS);
1997 if (!llss)
1998 return -ENOMEM;
1999
2000 llss->inode1 = file_inode(file1);
2001 llss->inode2 = file_inode(file2);
2002
2003 if (!S_ISREG(llss->inode2->i_mode)) {
2004 rc = -EINVAL;
2005 goto free;
2006 }
2007
2008 if (inode_permission(llss->inode1, MAY_WRITE) ||
2009 inode_permission(llss->inode2, MAY_WRITE)) {
2010 rc = -EPERM;
2011 goto free;
2012 }
2013
2014 if (llss->inode2->i_sb != llss->inode1->i_sb) {
2015 rc = -EXDEV;
2016 goto free;
2017 }
2018
2019 /* we use 2 bool because it is easier to swap than 2 bits */
2020 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
2021 llss->check_dv1 = true;
2022
2023 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
2024 llss->check_dv2 = true;
2025
2026 /* we cannot use lsl->sl_dvX directly because we may swap them */
2027 llss->dv1 = lsl->sl_dv1;
2028 llss->dv2 = lsl->sl_dv2;
2029
2030 rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
2031 if (rc == 0) /* same file, done! */ {
2032 rc = 0;
2033 goto free;
2034 }
2035
2036 if (rc < 0) { /* sequentialize it */
2037 swap(llss->inode1, llss->inode2);
2038 swap(file1, file2);
2039 swap(llss->dv1, llss->dv2);
2040 swap(llss->check_dv1, llss->check_dv2);
2041 }
2042
2043 gid = lsl->sl_gid;
2044 if (gid != 0) { /* application asks to flush dirty cache */
2045 rc = ll_get_grouplock(llss->inode1, file1, gid);
2046 if (rc < 0)
2047 goto free;
2048
2049 rc = ll_get_grouplock(llss->inode2, file2, gid);
2050 if (rc < 0) {
2051 ll_put_grouplock(llss->inode1, file1, gid);
2052 goto free;
2053 }
2054 }
2055
2056 /* to be able to restore mtime and atime after swap
2057 * we need to first save them
2058 */
2059 if (lsl->sl_flags &
2060 (SWAP_LAYOUTS_KEEP_MTIME | SWAP_LAYOUTS_KEEP_ATIME)) {
2061 llss->ia1.ia_mtime = llss->inode1->i_mtime;
2062 llss->ia1.ia_atime = llss->inode1->i_atime;
2063 llss->ia1.ia_valid = ATTR_MTIME | ATTR_ATIME;
2064 llss->ia2.ia_mtime = llss->inode2->i_mtime;
2065 llss->ia2.ia_atime = llss->inode2->i_atime;
2066 llss->ia2.ia_valid = ATTR_MTIME | ATTR_ATIME;
2067 }
2068
2069 /* ultimate check, before swapping the layouts we check if
2070 * dataversion has changed (if requested)
2071 */
2072 if (llss->check_dv1) {
2073 rc = ll_data_version(llss->inode1, &dv, 0);
2074 if (rc)
2075 goto putgl;
2076 if (dv != llss->dv1) {
2077 rc = -EAGAIN;
2078 goto putgl;
2079 }
2080 }
2081
2082 if (llss->check_dv2) {
2083 rc = ll_data_version(llss->inode2, &dv, 0);
2084 if (rc)
2085 goto putgl;
2086 if (dv != llss->dv2) {
2087 rc = -EAGAIN;
2088 goto putgl;
2089 }
2090 }
2091
2092 /* struct md_op_data is used to send the swap args to the mdt
2093 * only flags is missing, so we use struct mdc_swap_layouts
2094 * through the md_op_data->op_data
2095 */
2096 /* flags from user space have to be converted before they are send to
2097 * server, no flag is sent today, they are only used on the client
2098 */
2099 msl.msl_flags = 0;
2100 rc = -ENOMEM;
2101 op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
2102 0, LUSTRE_OPC_ANY, &msl);
2103 if (IS_ERR(op_data)) {
2104 rc = PTR_ERR(op_data);
2105 goto free;
2106 }
2107
2108 rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1),
2109 sizeof(*op_data), op_data, NULL);
2110 ll_finish_md_op_data(op_data);
2111
2112 putgl:
2113 if (gid != 0) {
2114 ll_put_grouplock(llss->inode2, file2, gid);
2115 ll_put_grouplock(llss->inode1, file1, gid);
2116 }
2117
2118 /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */
2119 if (rc != 0)
2120 goto free;
2121
2122 /* clear useless flags */
2123 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_MTIME)) {
2124 llss->ia1.ia_valid &= ~ATTR_MTIME;
2125 llss->ia2.ia_valid &= ~ATTR_MTIME;
2126 }
2127
2128 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_ATIME)) {
2129 llss->ia1.ia_valid &= ~ATTR_ATIME;
2130 llss->ia2.ia_valid &= ~ATTR_ATIME;
2131 }
2132
2133 /* update time if requested */
2134 rc = 0;
2135 if (llss->ia2.ia_valid != 0) {
2136 inode_lock(llss->inode1);
2137 rc = ll_setattr(file1->f_path.dentry, &llss->ia2);
2138 inode_unlock(llss->inode1);
2139 }
2140
2141 if (llss->ia1.ia_valid != 0) {
2142 int rc1;
2143
2144 inode_lock(llss->inode2);
2145 rc1 = ll_setattr(file2->f_path.dentry, &llss->ia1);
2146 inode_unlock(llss->inode2);
2147 if (rc == 0)
2148 rc = rc1;
2149 }
2150
2151 free:
2152 kfree(llss);
2153
2154 return rc;
2155 }
2156
2157 static int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
2158 {
2159 struct md_op_data *op_data;
2160 int rc;
2161
2162 /* Detect out-of range masks */
2163 if ((hss->hss_setmask | hss->hss_clearmask) & ~HSM_FLAGS_MASK)
2164 return -EINVAL;
2165
2166 /* Non-root users are forbidden to set or clear flags which are
2167 * NOT defined in HSM_USER_MASK.
2168 */
2169 if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) &&
2170 !capable(CFS_CAP_SYS_ADMIN))
2171 return -EPERM;
2172
2173 /* Detect out-of range archive id */
2174 if ((hss->hss_valid & HSS_ARCHIVE_ID) &&
2175 (hss->hss_archive_id > LL_HSM_MAX_ARCHIVE))
2176 return -EINVAL;
2177
2178 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2179 LUSTRE_OPC_ANY, hss);
2180 if (IS_ERR(op_data))
2181 return PTR_ERR(op_data);
2182
2183 rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode),
2184 sizeof(*op_data), op_data, NULL);
2185
2186 ll_finish_md_op_data(op_data);
2187
2188 return rc;
2189 }
2190
2191 static int ll_hsm_import(struct inode *inode, struct file *file,
2192 struct hsm_user_import *hui)
2193 {
2194 struct hsm_state_set *hss = NULL;
2195 struct iattr *attr = NULL;
2196 int rc;
2197
2198 if (!S_ISREG(inode->i_mode))
2199 return -EINVAL;
2200
2201 /* set HSM flags */
2202 hss = kzalloc(sizeof(*hss), GFP_NOFS);
2203 if (!hss)
2204 return -ENOMEM;
2205
2206 hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID;
2207 hss->hss_archive_id = hui->hui_archive_id;
2208 hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED;
2209 rc = ll_hsm_state_set(inode, hss);
2210 if (rc != 0)
2211 goto free_hss;
2212
2213 attr = kzalloc(sizeof(*attr), GFP_NOFS);
2214 if (!attr) {
2215 rc = -ENOMEM;
2216 goto free_hss;
2217 }
2218
2219 attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
2220 attr->ia_mode |= S_IFREG;
2221 attr->ia_uid = make_kuid(&init_user_ns, hui->hui_uid);
2222 attr->ia_gid = make_kgid(&init_user_ns, hui->hui_gid);
2223 attr->ia_size = hui->hui_size;
2224 attr->ia_mtime.tv_sec = hui->hui_mtime;
2225 attr->ia_mtime.tv_nsec = hui->hui_mtime_ns;
2226 attr->ia_atime.tv_sec = hui->hui_atime;
2227 attr->ia_atime.tv_nsec = hui->hui_atime_ns;
2228
2229 attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE |
2230 ATTR_UID | ATTR_GID |
2231 ATTR_MTIME | ATTR_MTIME_SET |
2232 ATTR_ATIME | ATTR_ATIME_SET;
2233
2234 inode_lock(inode);
2235
2236 rc = ll_setattr_raw(file->f_path.dentry, attr, true);
2237 if (rc == -ENODATA)
2238 rc = 0;
2239
2240 inode_unlock(inode);
2241
2242 kfree(attr);
2243 free_hss:
2244 kfree(hss);
2245 return rc;
2246 }
2247
2248 static long
2249 ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2250 {
2251 struct inode *inode = file_inode(file);
2252 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2253 int flags, rc;
2254
2255 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),cmd=%x\n",
2256 PFID(ll_inode2fid(inode)), inode, cmd);
2257 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
2258
2259 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2260 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
2261 return -ENOTTY;
2262
2263 switch (cmd) {
2264 case LL_IOC_GETFLAGS:
2265 /* Get the current value of the file flags */
2266 return put_user(fd->fd_flags, (int __user *)arg);
2267 case LL_IOC_SETFLAGS:
2268 case LL_IOC_CLRFLAGS:
2269 /* Set or clear specific file flags */
2270 /* XXX This probably needs checks to ensure the flags are
2271 * not abused, and to handle any flag side effects.
2272 */
2273 if (get_user(flags, (int __user *)arg))
2274 return -EFAULT;
2275
2276 if (cmd == LL_IOC_SETFLAGS) {
2277 if ((flags & LL_FILE_IGNORE_LOCK) &&
2278 !(file->f_flags & O_DIRECT)) {
2279 CERROR("%s: unable to disable locking on non-O_DIRECT file\n",
2280 current->comm);
2281 return -EINVAL;
2282 }
2283
2284 fd->fd_flags |= flags;
2285 } else {
2286 fd->fd_flags &= ~flags;
2287 }
2288 return 0;
2289 case LL_IOC_LOV_SETSTRIPE:
2290 return ll_lov_setstripe(inode, file, arg);
2291 case LL_IOC_LOV_SETEA:
2292 return ll_lov_setea(inode, file, arg);
2293 case LL_IOC_LOV_SWAP_LAYOUTS: {
2294 struct file *file2;
2295 struct lustre_swap_layouts lsl;
2296
2297 if (copy_from_user(&lsl, (char __user *)arg,
2298 sizeof(struct lustre_swap_layouts)))
2299 return -EFAULT;
2300
2301 if ((file->f_flags & O_ACCMODE) == 0) /* O_RDONLY */
2302 return -EPERM;
2303
2304 file2 = fget(lsl.sl_fd);
2305 if (!file2)
2306 return -EBADF;
2307
2308 rc = -EPERM;
2309 if ((file2->f_flags & O_ACCMODE) != 0) /* O_WRONLY or O_RDWR */
2310 rc = ll_swap_layouts(file, file2, &lsl);
2311 fput(file2);
2312 return rc;
2313 }
2314 case LL_IOC_LOV_GETSTRIPE:
2315 return ll_lov_getstripe(inode, arg);
2316 case LL_IOC_RECREATE_OBJ:
2317 return ll_lov_recreate_obj(inode, arg);
2318 case LL_IOC_RECREATE_FID:
2319 return ll_lov_recreate_fid(inode, arg);
2320 case FSFILT_IOC_FIEMAP:
2321 return ll_ioctl_fiemap(inode, arg);
2322 case FSFILT_IOC_GETFLAGS:
2323 case FSFILT_IOC_SETFLAGS:
2324 return ll_iocontrol(inode, file, cmd, arg);
2325 case FSFILT_IOC_GETVERSION_OLD:
2326 case FSFILT_IOC_GETVERSION:
2327 return put_user(inode->i_generation, (int __user *)arg);
2328 case LL_IOC_GROUP_LOCK:
2329 return ll_get_grouplock(inode, file, arg);
2330 case LL_IOC_GROUP_UNLOCK:
2331 return ll_put_grouplock(inode, file, arg);
2332 case IOC_OBD_STATFS:
2333 return ll_obd_statfs(inode, (void __user *)arg);
2334
2335 /* We need to special case any other ioctls we want to handle,
2336 * to send them to the MDS/OST as appropriate and to properly
2337 * network encode the arg field.
2338 case FSFILT_IOC_SETVERSION_OLD:
2339 case FSFILT_IOC_SETVERSION:
2340 */
2341 case LL_IOC_FLUSHCTX:
2342 return ll_flush_ctx(inode);
2343 case LL_IOC_PATH2FID: {
2344 if (copy_to_user((void __user *)arg, ll_inode2fid(inode),
2345 sizeof(struct lu_fid)))
2346 return -EFAULT;
2347
2348 return 0;
2349 }
2350 case OBD_IOC_FID2PATH:
2351 return ll_fid2path(inode, (void __user *)arg);
2352 case LL_IOC_DATA_VERSION: {
2353 struct ioc_data_version idv;
2354 int rc;
2355
2356 if (copy_from_user(&idv, (char __user *)arg, sizeof(idv)))
2357 return -EFAULT;
2358
2359 idv.idv_flags &= LL_DV_RD_FLUSH | LL_DV_WR_FLUSH;
2360 rc = ll_data_version(inode, &idv.idv_version, idv.idv_flags);
2361 if (rc == 0 && copy_to_user((char __user *)arg, &idv,
2362 sizeof(idv)))
2363 return -EFAULT;
2364
2365 return rc;
2366 }
2367
2368 case LL_IOC_GET_MDTIDX: {
2369 int mdtidx;
2370
2371 mdtidx = ll_get_mdt_idx(inode);
2372 if (mdtidx < 0)
2373 return mdtidx;
2374
2375 if (put_user(mdtidx, (int __user *)arg))
2376 return -EFAULT;
2377
2378 return 0;
2379 }
2380 case OBD_IOC_GETDTNAME:
2381 case OBD_IOC_GETMDNAME:
2382 return ll_get_obd_name(inode, cmd, arg);
2383 case LL_IOC_HSM_STATE_GET: {
2384 struct md_op_data *op_data;
2385 struct hsm_user_state *hus;
2386 int rc;
2387
2388 hus = kzalloc(sizeof(*hus), GFP_NOFS);
2389 if (!hus)
2390 return -ENOMEM;
2391
2392 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2393 LUSTRE_OPC_ANY, hus);
2394 if (IS_ERR(op_data)) {
2395 kfree(hus);
2396 return PTR_ERR(op_data);
2397 }
2398
2399 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2400 op_data, NULL);
2401
2402 if (copy_to_user((void __user *)arg, hus, sizeof(*hus)))
2403 rc = -EFAULT;
2404
2405 ll_finish_md_op_data(op_data);
2406 kfree(hus);
2407 return rc;
2408 }
2409 case LL_IOC_HSM_STATE_SET: {
2410 struct hsm_state_set *hss;
2411 int rc;
2412
2413 hss = memdup_user((char __user *)arg, sizeof(*hss));
2414 if (IS_ERR(hss))
2415 return PTR_ERR(hss);
2416
2417 rc = ll_hsm_state_set(inode, hss);
2418
2419 kfree(hss);
2420 return rc;
2421 }
2422 case LL_IOC_HSM_ACTION: {
2423 struct md_op_data *op_data;
2424 struct hsm_current_action *hca;
2425 int rc;
2426
2427 hca = kzalloc(sizeof(*hca), GFP_NOFS);
2428 if (!hca)
2429 return -ENOMEM;
2430
2431 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2432 LUSTRE_OPC_ANY, hca);
2433 if (IS_ERR(op_data)) {
2434 kfree(hca);
2435 return PTR_ERR(op_data);
2436 }
2437
2438 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2439 op_data, NULL);
2440
2441 if (copy_to_user((char __user *)arg, hca, sizeof(*hca)))
2442 rc = -EFAULT;
2443
2444 ll_finish_md_op_data(op_data);
2445 kfree(hca);
2446 return rc;
2447 }
2448 case LL_IOC_SET_LEASE: {
2449 struct ll_inode_info *lli = ll_i2info(inode);
2450 struct obd_client_handle *och = NULL;
2451 bool lease_broken;
2452 fmode_t mode = 0;
2453
2454 switch (arg) {
2455 case F_WRLCK:
2456 if (!(file->f_mode & FMODE_WRITE))
2457 return -EPERM;
2458 mode = FMODE_WRITE;
2459 break;
2460 case F_RDLCK:
2461 if (!(file->f_mode & FMODE_READ))
2462 return -EPERM;
2463 mode = FMODE_READ;
2464 break;
2465 case F_UNLCK:
2466 mutex_lock(&lli->lli_och_mutex);
2467 if (fd->fd_lease_och) {
2468 och = fd->fd_lease_och;
2469 fd->fd_lease_och = NULL;
2470 }
2471 mutex_unlock(&lli->lli_och_mutex);
2472
2473 if (och) {
2474 mode = och->och_flags &
2475 (FMODE_READ | FMODE_WRITE);
2476 rc = ll_lease_close(och, inode, &lease_broken);
2477 if (rc == 0 && lease_broken)
2478 mode = 0;
2479 } else {
2480 rc = -ENOLCK;
2481 }
2482
2483 /* return the type of lease or error */
2484 return rc < 0 ? rc : (int)mode;
2485 default:
2486 return -EINVAL;
2487 }
2488
2489 CDEBUG(D_INODE, "Set lease with mode %d\n", mode);
2490
2491 /* apply for lease */
2492 och = ll_lease_open(inode, file, mode, 0);
2493 if (IS_ERR(och))
2494 return PTR_ERR(och);
2495
2496 rc = 0;
2497 mutex_lock(&lli->lli_och_mutex);
2498 if (!fd->fd_lease_och) {
2499 fd->fd_lease_och = och;
2500 och = NULL;
2501 }
2502 mutex_unlock(&lli->lli_och_mutex);
2503 if (och) {
2504 /* impossible now that only excl is supported for now */
2505 ll_lease_close(och, inode, &lease_broken);
2506 rc = -EBUSY;
2507 }
2508 return rc;
2509 }
2510 case LL_IOC_GET_LEASE: {
2511 struct ll_inode_info *lli = ll_i2info(inode);
2512 struct ldlm_lock *lock = NULL;
2513
2514 rc = 0;
2515 mutex_lock(&lli->lli_och_mutex);
2516 if (fd->fd_lease_och) {
2517 struct obd_client_handle *och = fd->fd_lease_och;
2518
2519 lock = ldlm_handle2lock(&och->och_lease_handle);
2520 if (lock) {
2521 lock_res_and_lock(lock);
2522 if (!ldlm_is_cancel(lock))
2523 rc = och->och_flags &
2524 (FMODE_READ | FMODE_WRITE);
2525 unlock_res_and_lock(lock);
2526 LDLM_LOCK_PUT(lock);
2527 }
2528 }
2529 mutex_unlock(&lli->lli_och_mutex);
2530 return rc;
2531 }
2532 case LL_IOC_HSM_IMPORT: {
2533 struct hsm_user_import *hui;
2534
2535 hui = memdup_user((void __user *)arg, sizeof(*hui));
2536 if (IS_ERR(hui))
2537 return PTR_ERR(hui);
2538
2539 rc = ll_hsm_import(inode, file, hui);
2540
2541 kfree(hui);
2542 return rc;
2543 }
2544 default: {
2545 int err;
2546
2547 if (ll_iocontrol_call(inode, file, cmd, arg, &err) ==
2548 LLIOC_STOP)
2549 return err;
2550
2551 return obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
2552 (void __user *)arg);
2553 }
2554 }
2555 }
2556
2557 static loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
2558 {
2559 struct inode *inode = file_inode(file);
2560 loff_t retval, eof = 0;
2561
2562 retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
2563 (origin == SEEK_CUR) ? file->f_pos : 0);
2564 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), to=%llu=%#llx(%d)\n",
2565 PFID(ll_inode2fid(inode)), inode, retval, retval, origin);
2566 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
2567
2568 if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
2569 retval = ll_glimpse_size(inode);
2570 if (retval != 0)
2571 return retval;
2572 eof = i_size_read(inode);
2573 }
2574
2575 retval = generic_file_llseek_size(file, offset, origin,
2576 ll_file_maxbytes(inode), eof);
2577 return retval;
2578 }
2579
2580 static int ll_flush(struct file *file, fl_owner_t id)
2581 {
2582 struct inode *inode = file_inode(file);
2583 struct ll_inode_info *lli = ll_i2info(inode);
2584 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2585 int rc, err;
2586
2587 LASSERT(!S_ISDIR(inode->i_mode));
2588
2589 /* catch async errors that were recorded back when async writeback
2590 * failed for pages in this mapping.
2591 */
2592 rc = lli->lli_async_rc;
2593 lli->lli_async_rc = 0;
2594 if (lli->lli_clob) {
2595 err = lov_read_and_clear_async_rc(lli->lli_clob);
2596 if (!rc)
2597 rc = err;
2598 }
2599
2600 /* The application has been told about write failure already.
2601 * Do not report failure again.
2602 */
2603 if (fd->fd_write_failed)
2604 return 0;
2605 return rc ? -EIO : 0;
2606 }
2607
2608 /**
2609 * Called to make sure a portion of file has been written out.
2610 * if @mode is not CL_FSYNC_LOCAL, it will send OST_SYNC RPCs to OST.
2611 *
2612 * Return how many pages have been written.
2613 */
2614 int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
2615 enum cl_fsync_mode mode, int ignore_layout)
2616 {
2617 struct cl_env_nest nest;
2618 struct lu_env *env;
2619 struct cl_io *io;
2620 struct cl_fsync_io *fio;
2621 int result;
2622
2623 if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
2624 mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
2625 return -EINVAL;
2626
2627 env = cl_env_nested_get(&nest);
2628 if (IS_ERR(env))
2629 return PTR_ERR(env);
2630
2631 io = vvp_env_thread_io(env);
2632 io->ci_obj = ll_i2info(inode)->lli_clob;
2633 io->ci_ignore_layout = ignore_layout;
2634
2635 /* initialize parameters for sync */
2636 fio = &io->u.ci_fsync;
2637 fio->fi_start = start;
2638 fio->fi_end = end;
2639 fio->fi_fid = ll_inode2fid(inode);
2640 fio->fi_mode = mode;
2641 fio->fi_nr_written = 0;
2642
2643 if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
2644 result = cl_io_loop(env, io);
2645 else
2646 result = io->ci_result;
2647 if (result == 0)
2648 result = fio->fi_nr_written;
2649 cl_io_fini(env, io);
2650 cl_env_nested_put(&nest, env);
2651
2652 return result;
2653 }
2654
2655 int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2656 {
2657 struct inode *inode = file_inode(file);
2658 struct ll_inode_info *lli = ll_i2info(inode);
2659 struct ptlrpc_request *req;
2660 int rc, err;
2661
2662 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
2663 PFID(ll_inode2fid(inode)), inode);
2664 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2665
2666 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2667 inode_lock(inode);
2668
2669 /* catch async errors that were recorded back when async writeback
2670 * failed for pages in this mapping.
2671 */
2672 if (!S_ISDIR(inode->i_mode)) {
2673 err = lli->lli_async_rc;
2674 lli->lli_async_rc = 0;
2675 if (rc == 0)
2676 rc = err;
2677 err = lov_read_and_clear_async_rc(lli->lli_clob);
2678 if (rc == 0)
2679 rc = err;
2680 }
2681
2682 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), &req);
2683 if (!rc)
2684 rc = err;
2685 if (!err)
2686 ptlrpc_req_finished(req);
2687
2688 if (S_ISREG(inode->i_mode)) {
2689 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2690
2691 err = cl_sync_file_range(inode, start, end, CL_FSYNC_ALL, 0);
2692 if (rc == 0 && err < 0)
2693 rc = err;
2694 if (rc < 0)
2695 fd->fd_write_failed = true;
2696 else
2697 fd->fd_write_failed = false;
2698 }
2699
2700 inode_unlock(inode);
2701 return rc;
2702 }
2703
2704 static int
2705 ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2706 {
2707 struct inode *inode = file_inode(file);
2708 struct ll_sb_info *sbi = ll_i2sbi(inode);
2709 struct ldlm_enqueue_info einfo = {
2710 .ei_type = LDLM_FLOCK,
2711 .ei_cb_cp = ldlm_flock_completion_ast,
2712 .ei_cbdata = file_lock,
2713 };
2714 struct md_op_data *op_data;
2715 struct lustre_handle lockh = {0};
2716 ldlm_policy_data_t flock = { {0} };
2717 int fl_type = file_lock->fl_type;
2718 __u64 flags = 0;
2719 int rc;
2720 int rc2 = 0;
2721
2722 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID" file_lock=%p\n",
2723 PFID(ll_inode2fid(inode)), file_lock);
2724
2725 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2726
2727 if (file_lock->fl_flags & FL_FLOCK)
2728 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2729 else if (!(file_lock->fl_flags & FL_POSIX))
2730 return -EINVAL;
2731
2732 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2733 flock.l_flock.pid = file_lock->fl_pid;
2734 flock.l_flock.start = file_lock->fl_start;
2735 flock.l_flock.end = file_lock->fl_end;
2736
2737 /* Somewhat ugly workaround for svc lockd.
2738 * lockd installs custom fl_lmops->lm_compare_owner that checks
2739 * for the fl_owner to be the same (which it always is on local node
2740 * I guess between lockd processes) and then compares pid.
2741 * As such we assign pid to the owner field to make it all work,
2742 * conflict with normal locks is unlikely since pid space and
2743 * pointer space for current->files are not intersecting
2744 */
2745 if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
2746 flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
2747
2748 switch (fl_type) {
2749 case F_RDLCK:
2750 einfo.ei_mode = LCK_PR;
2751 break;
2752 case F_UNLCK:
2753 /* An unlock request may or may not have any relation to
2754 * existing locks so we may not be able to pass a lock handle
2755 * via a normal ldlm_lock_cancel() request. The request may even
2756 * unlock a byte range in the middle of an existing lock. In
2757 * order to process an unlock request we need all of the same
2758 * information that is given with a normal read or write record
2759 * lock request. To avoid creating another ldlm unlock (cancel)
2760 * message we'll treat a LCK_NL flock request as an unlock.
2761 */
2762 einfo.ei_mode = LCK_NL;
2763 break;
2764 case F_WRLCK:
2765 einfo.ei_mode = LCK_PW;
2766 break;
2767 default:
2768 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n", fl_type);
2769 return -ENOTSUPP;
2770 }
2771
2772 switch (cmd) {
2773 case F_SETLKW:
2774 #ifdef F_SETLKW64
2775 case F_SETLKW64:
2776 #endif
2777 flags = 0;
2778 break;
2779 case F_SETLK:
2780 #ifdef F_SETLK64
2781 case F_SETLK64:
2782 #endif
2783 flags = LDLM_FL_BLOCK_NOWAIT;
2784 break;
2785 case F_GETLK:
2786 #ifdef F_GETLK64
2787 case F_GETLK64:
2788 #endif
2789 flags = LDLM_FL_TEST_LOCK;
2790 break;
2791 default:
2792 CERROR("unknown fcntl lock command: %d\n", cmd);
2793 return -EINVAL;
2794 }
2795
2796 /*
2797 * Save the old mode so that if the mode in the lock changes we
2798 * can decrement the appropriate reader or writer refcount.
2799 */
2800 file_lock->fl_type = einfo.ei_mode;
2801
2802 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2803 LUSTRE_OPC_ANY, NULL);
2804 if (IS_ERR(op_data))
2805 return PTR_ERR(op_data);
2806
2807 CDEBUG(D_DLMTRACE, "inode="DFID", pid=%u, flags=%#llx, mode=%u, start=%llu, end=%llu\n",
2808 PFID(ll_inode2fid(inode)), flock.l_flock.pid, flags,
2809 einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
2810
2811 rc = md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data, &lockh,
2812 flags);
2813
2814 /* Restore the file lock type if not TEST lock. */
2815 if (!(flags & LDLM_FL_TEST_LOCK))
2816 file_lock->fl_type = fl_type;
2817
2818 if ((rc == 0 || file_lock->fl_type == F_UNLCK) &&
2819 !(flags & LDLM_FL_TEST_LOCK))
2820 rc2 = locks_lock_file_wait(file, file_lock);
2821
2822 if (rc2 && file_lock->fl_type != F_UNLCK) {
2823 einfo.ei_mode = LCK_NL;
2824 md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data,
2825 &lockh, flags);
2826 rc = rc2;
2827 }
2828
2829 ll_finish_md_op_data(op_data);
2830
2831 return rc;
2832 }
2833
2834 int ll_get_fid_by_name(struct inode *parent, const char *name,
2835 int namelen, struct lu_fid *fid)
2836 {
2837 struct md_op_data *op_data = NULL;
2838 struct ptlrpc_request *req;
2839 struct mdt_body *body;
2840 int rc;
2841
2842 op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen, 0,
2843 LUSTRE_OPC_ANY, NULL);
2844 if (IS_ERR(op_data))
2845 return PTR_ERR(op_data);
2846
2847 op_data->op_valid = OBD_MD_FLID;
2848 rc = md_getattr_name(ll_i2sbi(parent)->ll_md_exp, op_data, &req);
2849 ll_finish_md_op_data(op_data);
2850 if (rc < 0)
2851 return rc;
2852
2853 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
2854 if (!body) {
2855 rc = -EFAULT;
2856 goto out_req;
2857 }
2858 if (fid)
2859 *fid = body->mbo_fid1;
2860 out_req:
2861 ptlrpc_req_finished(req);
2862 return rc;
2863 }
2864
2865 int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
2866 const char *name, int namelen)
2867 {
2868 struct ptlrpc_request *request = NULL;
2869 struct inode *child_inode = NULL;
2870 struct dentry *dchild = NULL;
2871 struct md_op_data *op_data;
2872 struct qstr qstr;
2873 int rc;
2874
2875 CDEBUG(D_VFSTRACE, "migrate %s under "DFID" to MDT%d\n",
2876 name, PFID(ll_inode2fid(parent)), mdtidx);
2877
2878 op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen,
2879 0, LUSTRE_OPC_ANY, NULL);
2880 if (IS_ERR(op_data))
2881 return PTR_ERR(op_data);
2882
2883 /* Get child FID first */
2884 qstr.hash = full_name_hash(parent, name, namelen);
2885 qstr.name = name;
2886 qstr.len = namelen;
2887 dchild = d_lookup(file_dentry(file), &qstr);
2888 if (dchild && dchild->d_inode) {
2889 op_data->op_fid3 = *ll_inode2fid(dchild->d_inode);
2890 if (dchild->d_inode) {
2891 child_inode = igrab(dchild->d_inode);
2892 ll_invalidate_aliases(child_inode);
2893 }
2894 dput(dchild);
2895 } else {
2896 rc = ll_get_fid_by_name(parent, name, namelen,
2897 &op_data->op_fid3);
2898 if (rc)
2899 goto out_free;
2900 }
2901
2902 if (!fid_is_sane(&op_data->op_fid3)) {
2903 CERROR("%s: migrate %s, but fid "DFID" is insane\n",
2904 ll_get_fsname(parent->i_sb, NULL, 0), name,
2905 PFID(&op_data->op_fid3));
2906 rc = -EINVAL;
2907 goto out_free;
2908 }
2909
2910 rc = ll_get_mdt_idx_by_fid(ll_i2sbi(parent), &op_data->op_fid3);
2911 if (rc < 0)
2912 goto out_free;
2913
2914 if (rc == mdtidx) {
2915 CDEBUG(D_INFO, "%s:"DFID" is already on MDT%d.\n", name,
2916 PFID(&op_data->op_fid3), mdtidx);
2917 rc = 0;
2918 goto out_free;
2919 }
2920
2921 op_data->op_mds = mdtidx;
2922 op_data->op_cli_flags = CLI_MIGRATE;
2923 rc = md_rename(ll_i2sbi(parent)->ll_md_exp, op_data, name,
2924 namelen, name, namelen, &request);
2925 if (!rc)
2926 ll_update_times(request, parent);
2927
2928 ptlrpc_req_finished(request);
2929
2930 out_free:
2931 if (child_inode) {
2932 clear_nlink(child_inode);
2933 iput(child_inode);
2934 }
2935
2936 ll_finish_md_op_data(op_data);
2937 return rc;
2938 }
2939
2940 static int
2941 ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2942 {
2943 return -ENOSYS;
2944 }
2945
2946 /**
2947 * test if some locks matching bits and l_req_mode are acquired
2948 * - bits can be in different locks
2949 * - if found clear the common lock bits in *bits
2950 * - the bits not found, are kept in *bits
2951 * \param inode [IN]
2952 * \param bits [IN] searched lock bits [IN]
2953 * \param l_req_mode [IN] searched lock mode
2954 * \retval boolean, true iff all bits are found
2955 */
2956 int ll_have_md_lock(struct inode *inode, __u64 *bits,
2957 enum ldlm_mode l_req_mode)
2958 {
2959 struct lustre_handle lockh;
2960 ldlm_policy_data_t policy;
2961 enum ldlm_mode mode = (l_req_mode == LCK_MINMODE) ?
2962 (LCK_CR | LCK_CW | LCK_PR | LCK_PW) : l_req_mode;
2963 struct lu_fid *fid;
2964 __u64 flags;
2965 int i;
2966
2967 if (!inode)
2968 return 0;
2969
2970 fid = &ll_i2info(inode)->lli_fid;
2971 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
2972 ldlm_lockname[mode]);
2973
2974 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
2975 for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
2976 policy.l_inodebits.bits = *bits & (1 << i);
2977 if (policy.l_inodebits.bits == 0)
2978 continue;
2979
2980 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
2981 &policy, mode, &lockh)) {
2982 struct ldlm_lock *lock;
2983
2984 lock = ldlm_handle2lock(&lockh);
2985 if (lock) {
2986 *bits &=
2987 ~(lock->l_policy_data.l_inodebits.bits);
2988 LDLM_LOCK_PUT(lock);
2989 } else {
2990 *bits &= ~policy.l_inodebits.bits;
2991 }
2992 }
2993 }
2994 return *bits == 0;
2995 }
2996
2997 enum ldlm_mode ll_take_md_lock(struct inode *inode, __u64 bits,
2998 struct lustre_handle *lockh, __u64 flags,
2999 enum ldlm_mode mode)
3000 {
3001 ldlm_policy_data_t policy = { .l_inodebits = {bits} };
3002 struct lu_fid *fid;
3003 enum ldlm_mode rc;
3004
3005 fid = &ll_i2info(inode)->lli_fid;
3006 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
3007
3008 rc = md_lock_match(ll_i2mdexp(inode), flags | LDLM_FL_BLOCK_GRANTED,
3009 fid, LDLM_IBITS, &policy, mode, lockh);
3010
3011 return rc;
3012 }
3013
3014 static int ll_inode_revalidate_fini(struct inode *inode, int rc)
3015 {
3016 /* Already unlinked. Just update nlink and return success */
3017 if (rc == -ENOENT) {
3018 clear_nlink(inode);
3019 /* This path cannot be hit for regular files unless in
3020 * case of obscure races, so no need to validate size.
3021 */
3022 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
3023 return 0;
3024 } else if (rc != 0) {
3025 CDEBUG_LIMIT((rc == -EACCES || rc == -EIDRM) ? D_INFO : D_ERROR,
3026 "%s: revalidate FID "DFID" error: rc = %d\n",
3027 ll_get_fsname(inode->i_sb, NULL, 0),
3028 PFID(ll_inode2fid(inode)), rc);
3029 }
3030
3031 return rc;
3032 }
3033
3034 static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
3035 {
3036 struct inode *inode = d_inode(dentry);
3037 struct ptlrpc_request *req = NULL;
3038 struct obd_export *exp;
3039 int rc = 0;
3040
3041 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),name=%pd\n",
3042 PFID(ll_inode2fid(inode)), inode, dentry);
3043
3044 exp = ll_i2mdexp(inode);
3045
3046 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
3047 * But under CMD case, it caused some lock issues, should be fixed
3048 * with new CMD ibits lock. See bug 12718
3049 */
3050 if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
3051 struct lookup_intent oit = { .it_op = IT_GETATTR };
3052 struct md_op_data *op_data;
3053
3054 if (ibits == MDS_INODELOCK_LOOKUP)
3055 oit.it_op = IT_LOOKUP;
3056
3057 /* Call getattr by fid, so do not provide name at all. */
3058 op_data = ll_prep_md_op_data(NULL, inode,
3059 inode, NULL, 0, 0,
3060 LUSTRE_OPC_ANY, NULL);
3061 if (IS_ERR(op_data))
3062 return PTR_ERR(op_data);
3063
3064 rc = md_intent_lock(exp, op_data, &oit, &req,
3065 &ll_md_blocking_ast, 0);
3066 ll_finish_md_op_data(op_data);
3067 if (rc < 0) {
3068 rc = ll_inode_revalidate_fini(inode, rc);
3069 goto out;
3070 }
3071
3072 rc = ll_revalidate_it_finish(req, &oit, inode);
3073 if (rc != 0) {
3074 ll_intent_release(&oit);
3075 goto out;
3076 }
3077
3078 /* Unlinked? Unhash dentry, so it is not picked up later by
3079 * do_lookup() -> ll_revalidate_it(). We cannot use d_drop
3080 * here to preserve get_cwd functionality on 2.6.
3081 * Bug 10503
3082 */
3083 if (!d_inode(dentry)->i_nlink) {
3084 spin_lock(&inode->i_lock);
3085 d_lustre_invalidate(dentry, 0);
3086 spin_unlock(&inode->i_lock);
3087 }
3088
3089 ll_lookup_finish_locks(&oit, inode);
3090 } else if (!ll_have_md_lock(d_inode(dentry), &ibits, LCK_MINMODE)) {
3091 struct ll_sb_info *sbi = ll_i2sbi(d_inode(dentry));
3092 u64 valid = OBD_MD_FLGETATTR;
3093 struct md_op_data *op_data;
3094 int ealen = 0;
3095
3096 if (S_ISREG(inode->i_mode)) {
3097 rc = ll_get_default_mdsize(sbi, &ealen);
3098 if (rc)
3099 return rc;
3100 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
3101 }
3102
3103 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
3104 0, ealen, LUSTRE_OPC_ANY,
3105 NULL);
3106 if (IS_ERR(op_data))
3107 return PTR_ERR(op_data);
3108
3109 op_data->op_valid = valid;
3110 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
3111 ll_finish_md_op_data(op_data);
3112 if (rc) {
3113 rc = ll_inode_revalidate_fini(inode, rc);
3114 return rc;
3115 }
3116
3117 rc = ll_prep_inode(&inode, req, NULL, NULL);
3118 }
3119 out:
3120 ptlrpc_req_finished(req);
3121 return rc;
3122 }
3123
3124 static int ll_merge_md_attr(struct inode *inode)
3125 {
3126 struct cl_attr attr = { 0 };
3127 int rc;
3128
3129 LASSERT(ll_i2info(inode)->lli_lsm_md);
3130 rc = md_merge_attr(ll_i2mdexp(inode), ll_i2info(inode)->lli_lsm_md,
3131 &attr);
3132 if (rc)
3133 return rc;
3134
3135 ll_i2info(inode)->lli_stripe_dir_size = attr.cat_size;
3136 ll_i2info(inode)->lli_stripe_dir_nlink = attr.cat_nlink;
3137
3138 ll_i2info(inode)->lli_atime = attr.cat_atime;
3139 ll_i2info(inode)->lli_mtime = attr.cat_mtime;
3140 ll_i2info(inode)->lli_ctime = attr.cat_ctime;
3141
3142 return 0;
3143 }
3144
3145 static int ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
3146 {
3147 struct inode *inode = d_inode(dentry);
3148 int rc;
3149
3150 rc = __ll_inode_revalidate(dentry, ibits);
3151 if (rc != 0)
3152 return rc;
3153
3154 /* if object isn't regular file, don't validate size */
3155 if (!S_ISREG(inode->i_mode)) {
3156 if (S_ISDIR(inode->i_mode) &&
3157 ll_i2info(inode)->lli_lsm_md) {
3158 rc = ll_merge_md_attr(inode);
3159 if (rc)
3160 return rc;
3161 }
3162
3163 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_atime;
3164 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_mtime;
3165 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_ctime;
3166 } else {
3167 /* In case of restore, the MDT has the right size and has
3168 * already send it back without granting the layout lock,
3169 * inode is up-to-date so glimpse is useless.
3170 * Also to glimpse we need the layout, in case of a running
3171 * restore the MDT holds the layout lock so the glimpse will
3172 * block up to the end of restore (getattr will block)
3173 */
3174 if (!(ll_i2info(inode)->lli_flags & LLIF_FILE_RESTORING))
3175 rc = ll_glimpse_size(inode);
3176 }
3177 return rc;
3178 }
3179
3180 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
3181 {
3182 struct inode *inode = d_inode(de);
3183 struct ll_sb_info *sbi = ll_i2sbi(inode);
3184 struct ll_inode_info *lli = ll_i2info(inode);
3185 int res;
3186
3187 res = ll_inode_revalidate(de, MDS_INODELOCK_UPDATE |
3188 MDS_INODELOCK_LOOKUP);
3189 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
3190
3191 if (res)
3192 return res;
3193
3194 OBD_FAIL_TIMEOUT(OBD_FAIL_GETATTR_DELAY, 30);
3195
3196 stat->dev = inode->i_sb->s_dev;
3197 if (ll_need_32bit_api(sbi))
3198 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
3199 else
3200 stat->ino = inode->i_ino;
3201 stat->mode = inode->i_mode;
3202 stat->uid = inode->i_uid;
3203 stat->gid = inode->i_gid;
3204 stat->rdev = inode->i_rdev;
3205 stat->atime = inode->i_atime;
3206 stat->mtime = inode->i_mtime;
3207 stat->ctime = inode->i_ctime;
3208 stat->blksize = 1 << inode->i_blkbits;
3209 stat->blocks = inode->i_blocks;
3210
3211 if (S_ISDIR(inode->i_mode) &&
3212 ll_i2info(inode)->lli_lsm_md) {
3213 stat->nlink = lli->lli_stripe_dir_nlink;
3214 stat->size = lli->lli_stripe_dir_size;
3215 } else {
3216 stat->nlink = inode->i_nlink;
3217 stat->size = i_size_read(inode);
3218 }
3219
3220 return 0;
3221 }
3222
3223 static int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3224 __u64 start, __u64 len)
3225 {
3226 int rc;
3227 size_t num_bytes;
3228 struct ll_user_fiemap *fiemap;
3229 unsigned int extent_count = fieinfo->fi_extents_max;
3230
3231 num_bytes = sizeof(*fiemap) + (extent_count *
3232 sizeof(struct ll_fiemap_extent));
3233 fiemap = libcfs_kvzalloc(num_bytes, GFP_NOFS);
3234
3235 if (!fiemap)
3236 return -ENOMEM;
3237
3238 fiemap->fm_flags = fieinfo->fi_flags;
3239 fiemap->fm_extent_count = fieinfo->fi_extents_max;
3240 fiemap->fm_start = start;
3241 fiemap->fm_length = len;
3242 if (extent_count > 0 &&
3243 copy_from_user(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
3244 sizeof(struct ll_fiemap_extent)) != 0) {
3245 rc = -EFAULT;
3246 goto out;
3247 }
3248
3249 rc = ll_do_fiemap(inode, fiemap, num_bytes);
3250
3251 fieinfo->fi_flags = fiemap->fm_flags;
3252 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
3253 if (extent_count > 0 &&
3254 copy_to_user(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
3255 fiemap->fm_mapped_extents *
3256 sizeof(struct ll_fiemap_extent)) != 0) {
3257 rc = -EFAULT;
3258 goto out;
3259 }
3260
3261 out:
3262 kvfree(fiemap);
3263 return rc;
3264 }
3265
3266 struct posix_acl *ll_get_acl(struct inode *inode, int type)
3267 {
3268 struct ll_inode_info *lli = ll_i2info(inode);
3269 struct posix_acl *acl = NULL;
3270
3271 spin_lock(&lli->lli_lock);
3272 /* VFS' acl_permission_check->check_acl will release the refcount */
3273 acl = posix_acl_dup(lli->lli_posix_acl);
3274 #ifdef CONFIG_FS_POSIX_ACL
3275 forget_cached_acl(inode, type);
3276 #endif
3277 spin_unlock(&lli->lli_lock);
3278
3279 return acl;
3280 }
3281
3282 int ll_inode_permission(struct inode *inode, int mask)
3283 {
3284 struct ll_sb_info *sbi;
3285 struct root_squash_info *squash;
3286 const struct cred *old_cred = NULL;
3287 struct cred *cred = NULL;
3288 bool squash_id = false;
3289 cfs_cap_t cap;
3290 int rc = 0;
3291
3292 if (mask & MAY_NOT_BLOCK)
3293 return -ECHILD;
3294
3295 /* as root inode are NOT getting validated in lookup operation,
3296 * need to do it before permission check.
3297 */
3298
3299 if (is_root_inode(inode)) {
3300 rc = __ll_inode_revalidate(inode->i_sb->s_root,
3301 MDS_INODELOCK_LOOKUP);
3302 if (rc)
3303 return rc;
3304 }
3305
3306 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), inode mode %x mask %o\n",
3307 PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask);
3308
3309 /* squash fsuid/fsgid if needed */
3310 sbi = ll_i2sbi(inode);
3311 squash = &sbi->ll_squash;
3312 if (unlikely(squash->rsi_uid &&
3313 uid_eq(current_fsuid(), GLOBAL_ROOT_UID) &&
3314 !(sbi->ll_flags & LL_SBI_NOROOTSQUASH))) {
3315 squash_id = true;
3316 }
3317
3318 if (squash_id) {
3319 CDEBUG(D_OTHER, "squash creds (%d:%d)=>(%d:%d)\n",
3320 __kuid_val(current_fsuid()), __kgid_val(current_fsgid()),
3321 squash->rsi_uid, squash->rsi_gid);
3322
3323 /*
3324 * update current process's credentials
3325 * and FS capability
3326 */
3327 cred = prepare_creds();
3328 if (!cred)
3329 return -ENOMEM;
3330
3331 cred->fsuid = make_kuid(&init_user_ns, squash->rsi_uid);
3332 cred->fsgid = make_kgid(&init_user_ns, squash->rsi_gid);
3333 for (cap = 0; cap < sizeof(cfs_cap_t) * 8; cap++) {
3334 if ((1 << cap) & CFS_CAP_FS_MASK)
3335 cap_lower(cred->cap_effective, cap);
3336 }
3337 old_cred = override_creds(cred);
3338 }
3339
3340 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
3341 rc = generic_permission(inode, mask);
3342
3343 /* restore current process's credentials and FS capability */
3344 if (squash_id) {
3345 revert_creds(old_cred);
3346 put_cred(cred);
3347 }
3348
3349 return rc;
3350 }
3351
3352 /* -o localflock - only provides locally consistent flock locks */
3353 struct file_operations ll_file_operations = {
3354 .read_iter = ll_file_read_iter,
3355 .write_iter = ll_file_write_iter,
3356 .unlocked_ioctl = ll_file_ioctl,
3357 .open = ll_file_open,
3358 .release = ll_file_release,
3359 .mmap = ll_file_mmap,
3360 .llseek = ll_file_seek,
3361 .splice_read = ll_file_splice_read,
3362 .fsync = ll_fsync,
3363 .flush = ll_flush
3364 };
3365
3366 struct file_operations ll_file_operations_flock = {
3367 .read_iter = ll_file_read_iter,
3368 .write_iter = ll_file_write_iter,
3369 .unlocked_ioctl = ll_file_ioctl,
3370 .open = ll_file_open,
3371 .release = ll_file_release,
3372 .mmap = ll_file_mmap,
3373 .llseek = ll_file_seek,
3374 .splice_read = ll_file_splice_read,
3375 .fsync = ll_fsync,
3376 .flush = ll_flush,
3377 .flock = ll_file_flock,
3378 .lock = ll_file_flock
3379 };
3380
3381 /* These are for -o noflock - to return ENOSYS on flock calls */
3382 struct file_operations ll_file_operations_noflock = {
3383 .read_iter = ll_file_read_iter,
3384 .write_iter = ll_file_write_iter,
3385 .unlocked_ioctl = ll_file_ioctl,
3386 .open = ll_file_open,
3387 .release = ll_file_release,
3388 .mmap = ll_file_mmap,
3389 .llseek = ll_file_seek,
3390 .splice_read = ll_file_splice_read,
3391 .fsync = ll_fsync,
3392 .flush = ll_flush,
3393 .flock = ll_file_noflock,
3394 .lock = ll_file_noflock
3395 };
3396
3397 const struct inode_operations ll_file_inode_operations = {
3398 .setattr = ll_setattr,
3399 .getattr = ll_getattr,
3400 .permission = ll_inode_permission,
3401 .setxattr = generic_setxattr,
3402 .getxattr = generic_getxattr,
3403 .listxattr = ll_listxattr,
3404 .removexattr = generic_removexattr,
3405 .fiemap = ll_fiemap,
3406 .get_acl = ll_get_acl,
3407 };
3408
3409 /* dynamic ioctl number support routines */
3410 static struct llioc_ctl_data {
3411 struct rw_semaphore ioc_sem;
3412 struct list_head ioc_head;
3413 } llioc = {
3414 __RWSEM_INITIALIZER(llioc.ioc_sem),
3415 LIST_HEAD_INIT(llioc.ioc_head)
3416 };
3417
3418 struct llioc_data {
3419 struct list_head iocd_list;
3420 unsigned int iocd_size;
3421 llioc_callback_t iocd_cb;
3422 unsigned int iocd_count;
3423 unsigned int iocd_cmd[0];
3424 };
3425
3426 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
3427 {
3428 unsigned int size;
3429 struct llioc_data *in_data = NULL;
3430
3431 if (!cb || !cmd || count > LLIOC_MAX_CMD || count < 0)
3432 return NULL;
3433
3434 size = sizeof(*in_data) + count * sizeof(unsigned int);
3435 in_data = kzalloc(size, GFP_NOFS);
3436 if (!in_data)
3437 return NULL;
3438
3439 in_data->iocd_size = size;
3440 in_data->iocd_cb = cb;
3441 in_data->iocd_count = count;
3442 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
3443
3444 down_write(&llioc.ioc_sem);
3445 list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
3446 up_write(&llioc.ioc_sem);
3447
3448 return in_data;
3449 }
3450 EXPORT_SYMBOL(ll_iocontrol_register);
3451
3452 void ll_iocontrol_unregister(void *magic)
3453 {
3454 struct llioc_data *tmp;
3455
3456 if (!magic)
3457 return;
3458
3459 down_write(&llioc.ioc_sem);
3460 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
3461 if (tmp == magic) {
3462 list_del(&tmp->iocd_list);
3463 up_write(&llioc.ioc_sem);
3464
3465 kfree(tmp);
3466 return;
3467 }
3468 }
3469 up_write(&llioc.ioc_sem);
3470
3471 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
3472 }
3473 EXPORT_SYMBOL(ll_iocontrol_unregister);
3474
3475 static enum llioc_iter
3476 ll_iocontrol_call(struct inode *inode, struct file *file,
3477 unsigned int cmd, unsigned long arg, int *rcp)
3478 {
3479 enum llioc_iter ret = LLIOC_CONT;
3480 struct llioc_data *data;
3481 int rc = -EINVAL, i;
3482
3483 down_read(&llioc.ioc_sem);
3484 list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
3485 for (i = 0; i < data->iocd_count; i++) {
3486 if (cmd != data->iocd_cmd[i])
3487 continue;
3488
3489 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
3490 break;
3491 }
3492
3493 if (ret == LLIOC_STOP)
3494 break;
3495 }
3496 up_read(&llioc.ioc_sem);
3497
3498 if (rcp)
3499 *rcp = rc;
3500 return ret;
3501 }
3502
3503 int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
3504 {
3505 struct ll_inode_info *lli = ll_i2info(inode);
3506 struct cl_env_nest nest;
3507 struct lu_env *env;
3508 int result;
3509
3510 if (!lli->lli_clob)
3511 return 0;
3512
3513 env = cl_env_nested_get(&nest);
3514 if (IS_ERR(env))
3515 return PTR_ERR(env);
3516
3517 result = cl_conf_set(env, lli->lli_clob, conf);
3518 cl_env_nested_put(&nest, env);
3519
3520 if (conf->coc_opc == OBJECT_CONF_SET) {
3521 struct ldlm_lock *lock = conf->coc_lock;
3522
3523 LASSERT(lock);
3524 LASSERT(ldlm_has_layout(lock));
3525 if (result == 0) {
3526 /* it can only be allowed to match after layout is
3527 * applied to inode otherwise false layout would be
3528 * seen. Applying layout should happen before dropping
3529 * the intent lock.
3530 */
3531 ldlm_lock_allow_match(lock);
3532 }
3533 }
3534 return result;
3535 }
3536
3537 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
3538 static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
3539
3540 {
3541 struct ll_sb_info *sbi = ll_i2sbi(inode);
3542 struct ptlrpc_request *req;
3543 struct mdt_body *body;
3544 void *lvbdata;
3545 void *lmm;
3546 int lmmsize;
3547 int rc;
3548
3549 CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
3550 PFID(ll_inode2fid(inode)), ldlm_is_lvb_ready(lock),
3551 lock->l_lvb_data, lock->l_lvb_len);
3552
3553 if (lock->l_lvb_data && ldlm_is_lvb_ready(lock))
3554 return 0;
3555
3556 /* if layout lock was granted right away, the layout is returned
3557 * within DLM_LVB of dlm reply; otherwise if the lock was ever
3558 * blocked and then granted via completion ast, we have to fetch
3559 * layout here. Please note that we can't use the LVB buffer in
3560 * completion AST because it doesn't have a large enough buffer
3561 */
3562 rc = ll_get_default_mdsize(sbi, &lmmsize);
3563 if (rc == 0)
3564 rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode),
3565 OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0,
3566 lmmsize, 0, &req);
3567 if (rc < 0)
3568 return rc;
3569
3570 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
3571 if (!body) {
3572 rc = -EPROTO;
3573 goto out;
3574 }
3575
3576 lmmsize = body->mbo_eadatasize;
3577 if (lmmsize == 0) /* empty layout */ {
3578 rc = 0;
3579 goto out;
3580 }
3581
3582 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize);
3583 if (!lmm) {
3584 rc = -EFAULT;
3585 goto out;
3586 }
3587
3588 lvbdata = libcfs_kvzalloc(lmmsize, GFP_NOFS);
3589 if (!lvbdata) {
3590 rc = -ENOMEM;
3591 goto out;
3592 }
3593
3594 memcpy(lvbdata, lmm, lmmsize);
3595 lock_res_and_lock(lock);
3596 if (lock->l_lvb_data)
3597 kvfree(lock->l_lvb_data);
3598
3599 lock->l_lvb_data = lvbdata;
3600 lock->l_lvb_len = lmmsize;
3601 unlock_res_and_lock(lock);
3602
3603 out:
3604 ptlrpc_req_finished(req);
3605 return rc;
3606 }
3607
3608 /**
3609 * Apply the layout to the inode. Layout lock is held and will be released
3610 * in this function.
3611 */
3612 static int ll_layout_lock_set(struct lustre_handle *lockh, enum ldlm_mode mode,
3613 struct inode *inode, __u32 *gen, bool reconf)
3614 {
3615 struct ll_inode_info *lli = ll_i2info(inode);
3616 struct ll_sb_info *sbi = ll_i2sbi(inode);
3617 struct ldlm_lock *lock;
3618 struct lustre_md md = { NULL };
3619 struct cl_object_conf conf;
3620 int rc = 0;
3621 bool lvb_ready;
3622 bool wait_layout = false;
3623
3624 LASSERT(lustre_handle_is_used(lockh));
3625
3626 lock = ldlm_handle2lock(lockh);
3627 LASSERT(lock);
3628 LASSERT(ldlm_has_layout(lock));
3629
3630 LDLM_DEBUG(lock, "File "DFID"(%p) being reconfigured: %d",
3631 PFID(&lli->lli_fid), inode, reconf);
3632
3633 /* in case this is a caching lock and reinstate with new inode */
3634 md_set_lock_data(sbi->ll_md_exp, lockh, inode, NULL);
3635
3636 lock_res_and_lock(lock);
3637 lvb_ready = ldlm_is_lvb_ready(lock);
3638 unlock_res_and_lock(lock);
3639 /* checking lvb_ready is racy but this is okay. The worst case is
3640 * that multi processes may configure the file on the same time.
3641 */
3642 if (lvb_ready || !reconf) {
3643 rc = -ENODATA;
3644 if (lvb_ready) {
3645 /* layout_gen must be valid if layout lock is not
3646 * cancelled and stripe has already set
3647 */
3648 *gen = ll_layout_version_get(lli);
3649 rc = 0;
3650 }
3651 goto out;
3652 }
3653
3654 rc = ll_layout_fetch(inode, lock);
3655 if (rc < 0)
3656 goto out;
3657
3658 /* for layout lock, lmm is returned in lock's lvb.
3659 * lvb_data is immutable if the lock is held so it's safe to access it
3660 * without res lock. See the description in ldlm_lock_decref_internal()
3661 * for the condition to free lvb_data of layout lock
3662 */
3663 if (lock->l_lvb_data) {
3664 rc = obd_unpackmd(sbi->ll_dt_exp, &md.lsm,
3665 lock->l_lvb_data, lock->l_lvb_len);
3666 if (rc >= 0) {
3667 *gen = LL_LAYOUT_GEN_EMPTY;
3668 if (md.lsm)
3669 *gen = md.lsm->lsm_layout_gen;
3670 rc = 0;
3671 } else {
3672 CERROR("%s: file " DFID " unpackmd error: %d\n",
3673 ll_get_fsname(inode->i_sb, NULL, 0),
3674 PFID(&lli->lli_fid), rc);
3675 }
3676 }
3677 if (rc < 0)
3678 goto out;
3679
3680 /* set layout to file. Unlikely this will fail as old layout was
3681 * surely eliminated
3682 */
3683 memset(&conf, 0, sizeof(conf));
3684 conf.coc_opc = OBJECT_CONF_SET;
3685 conf.coc_inode = inode;
3686 conf.coc_lock = lock;
3687 conf.u.coc_md = &md;
3688 rc = ll_layout_conf(inode, &conf);
3689
3690 if (md.lsm)
3691 obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
3692
3693 /* refresh layout failed, need to wait */
3694 wait_layout = rc == -EBUSY;
3695
3696 out:
3697 LDLM_LOCK_PUT(lock);
3698 ldlm_lock_decref(lockh, mode);
3699
3700 /* wait for IO to complete if it's still being used. */
3701 if (wait_layout) {
3702 CDEBUG(D_INODE, "%s: "DFID"(%p) wait for layout reconf\n",
3703 ll_get_fsname(inode->i_sb, NULL, 0),
3704 PFID(&lli->lli_fid), inode);
3705
3706 memset(&conf, 0, sizeof(conf));
3707 conf.coc_opc = OBJECT_CONF_WAIT;
3708 conf.coc_inode = inode;
3709 rc = ll_layout_conf(inode, &conf);
3710 if (rc == 0)
3711 rc = -EAGAIN;
3712
3713 CDEBUG(D_INODE, "%s: file="DFID" waiting layout return: %d.\n",
3714 ll_get_fsname(inode->i_sb, NULL, 0),
3715 PFID(&lli->lli_fid), rc);
3716 }
3717 return rc;
3718 }
3719
3720 /**
3721 * This function checks if there exists a LAYOUT lock on the client side,
3722 * or enqueues it if it doesn't have one in cache.
3723 *
3724 * This function will not hold layout lock so it may be revoked any time after
3725 * this function returns. Any operations depend on layout should be redone
3726 * in that case.
3727 *
3728 * This function should be called before lov_io_init() to get an uptodate
3729 * layout version, the caller should save the version number and after IO
3730 * is finished, this function should be called again to verify that layout
3731 * is not changed during IO time.
3732 */
3733 int ll_layout_refresh(struct inode *inode, __u32 *gen)
3734 {
3735 struct ll_inode_info *lli = ll_i2info(inode);
3736 struct ll_sb_info *sbi = ll_i2sbi(inode);
3737 struct md_op_data *op_data;
3738 struct lookup_intent it;
3739 struct lustre_handle lockh;
3740 enum ldlm_mode mode;
3741 struct ldlm_enqueue_info einfo = {
3742 .ei_type = LDLM_IBITS,
3743 .ei_mode = LCK_CR,
3744 .ei_cb_bl = &ll_md_blocking_ast,
3745 .ei_cb_cp = &ldlm_completion_ast,
3746 };
3747 int rc;
3748
3749 *gen = ll_layout_version_get(lli);
3750 if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK) || *gen != LL_LAYOUT_GEN_NONE)
3751 return 0;
3752
3753 /* sanity checks */
3754 LASSERT(fid_is_sane(ll_inode2fid(inode)));
3755 LASSERT(S_ISREG(inode->i_mode));
3756
3757 /* take layout lock mutex to enqueue layout lock exclusively. */
3758 mutex_lock(&lli->lli_layout_mutex);
3759
3760 again:
3761 /* mostly layout lock is caching on the local side, so try to match
3762 * it before grabbing layout lock mutex.
3763 */
3764 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3765 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3766 if (mode != 0) { /* hit cached lock */
3767 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3768 if (rc == -EAGAIN)
3769 goto again;
3770
3771 mutex_unlock(&lli->lli_layout_mutex);
3772 return rc;
3773 }
3774
3775 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
3776 0, 0, LUSTRE_OPC_ANY, NULL);
3777 if (IS_ERR(op_data)) {
3778 mutex_unlock(&lli->lli_layout_mutex);
3779 return PTR_ERR(op_data);
3780 }
3781
3782 /* have to enqueue one */
3783 memset(&it, 0, sizeof(it));
3784 it.it_op = IT_LAYOUT;
3785 lockh.cookie = 0ULL;
3786
3787 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file "DFID"(%p)",
3788 ll_get_fsname(inode->i_sb, NULL, 0),
3789 PFID(&lli->lli_fid), inode);
3790
3791 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL, &it, op_data, &lockh, 0);
3792 ptlrpc_req_finished(it.it_request);
3793 it.it_request = NULL;
3794
3795 ll_finish_md_op_data(op_data);
3796
3797 mode = it.it_lock_mode;
3798 it.it_lock_mode = 0;
3799 ll_intent_drop_lock(&it);
3800
3801 if (rc == 0) {
3802 /* set lock data in case this is a new lock */
3803 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
3804 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3805 if (rc == -EAGAIN)
3806 goto again;
3807 }
3808 mutex_unlock(&lli->lli_layout_mutex);
3809
3810 return rc;
3811 }
3812
3813 /**
3814 * This function send a restore request to the MDT
3815 */
3816 int ll_layout_restore(struct inode *inode, loff_t offset, __u64 length)
3817 {
3818 struct hsm_user_request *hur;
3819 int len, rc;
3820
3821 len = sizeof(struct hsm_user_request) +
3822 sizeof(struct hsm_user_item);
3823 hur = kzalloc(len, GFP_NOFS);
3824 if (!hur)
3825 return -ENOMEM;
3826
3827 hur->hur_request.hr_action = HUA_RESTORE;
3828 hur->hur_request.hr_archive_id = 0;
3829 hur->hur_request.hr_flags = 0;
3830 memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
3831 sizeof(hur->hur_user_item[0].hui_fid));
3832 hur->hur_user_item[0].hui_extent.offset = offset;
3833 hur->hur_user_item[0].hui_extent.length = length;
3834 hur->hur_request.hr_itemcount = 1;
3835 rc = obd_iocontrol(LL_IOC_HSM_REQUEST, ll_i2sbi(inode)->ll_md_exp,
3836 len, hur, NULL);
3837 kfree(hur);
3838 return rc;
3839 }
This page took 0.118549 seconds and 5 git commands to generate.