Merge remote-tracking branch 'staging/staging-next'
[deliverable/linux.git] / drivers / staging / lustre / lustre / llite / file.c
CommitLineData
d7e09d03
PT
1/*
2 * GPL HEADER START
3 *
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
6a5b99a4 18 * http://www.gnu.org/licenses/gpl-2.0.html
d7e09d03 19 *
d7e09d03
PT
20 * GPL HEADER END
21 */
22/*
23 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
25 *
1dc563a6 26 * Copyright (c) 2011, 2015, Intel Corporation.
d7e09d03
PT
27 */
28/*
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
31 *
32 * lustre/llite/file.c
33 *
34 * Author: Peter Braam <braam@clusterfs.com>
35 * Author: Phil Schwan <phil@clusterfs.com>
36 * Author: Andreas Dilger <adilger@clusterfs.com>
37 */
38
39#define DEBUG_SUBSYSTEM S_LLITE
67a235f5
GKH
40#include "../include/lustre_dlm.h"
41#include "../include/lustre_lite.h"
d7e09d03
PT
42#include <linux/pagemap.h>
43#include <linux/file.h>
c948390f 44#include <linux/sched.h>
bb41292b 45#include <linux/mount.h>
d7e09d03 46#include "llite_internal.h"
67a235f5 47#include "../include/lustre/ll_fiemap.h"
8877d3bf 48#include "../include/lustre/lustre_ioctl.h"
d7e09d03 49
67a235f5 50#include "../include/cl_object.h"
d7e09d03 51
2d95f10e
JH
52static int
53ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg);
54
55static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
56 bool *lease_broken);
57
58static enum llioc_iter
59ll_iocontrol_call(struct inode *inode, struct file *file,
60 unsigned int cmd, unsigned long arg, int *rcp);
61
62static struct ll_file_data *ll_file_data_get(void)
d7e09d03
PT
63{
64 struct ll_file_data *fd;
65
21068c46 66 fd = kmem_cache_zalloc(ll_file_data_slab, GFP_NOFS);
6e16818b 67 if (!fd)
73863d83 68 return NULL;
d7e09d03
PT
69 fd->fd_write_failed = false;
70 return fd;
71}
72
73static void ll_file_data_put(struct ll_file_data *fd)
74{
6e16818b 75 if (fd)
50d30362 76 kmem_cache_free(ll_file_data_slab, fd);
d7e09d03
PT
77}
78
79void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
80 struct lustre_handle *fh)
81{
82 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
83 op_data->op_attr.ia_mode = inode->i_mode;
84 op_data->op_attr.ia_atime = inode->i_atime;
85 op_data->op_attr.ia_mtime = inode->i_mtime;
86 op_data->op_attr.ia_ctime = inode->i_ctime;
87 op_data->op_attr.ia_size = i_size_read(inode);
88 op_data->op_attr_blocks = inode->i_blocks;
bb41292b 89 op_data->op_attr_flags = ll_inode_to_ext_flags(inode->i_flags);
d7e09d03
PT
90 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
91 if (fh)
92 op_data->op_handle = *fh;
d7e09d03 93
1f6eaf83 94 if (ll_i2info(inode)->lli_flags & LLIF_DATA_MODIFIED)
d7e09d03
PT
95 op_data->op_bias |= MDS_DATA_MODIFIED;
96}
97
98/**
99 * Closes the IO epoch and packs all the attributes into @op_data for
100 * the CLOSE rpc.
101 */
102static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
103 struct obd_client_handle *och)
104{
f57d9a72
EL
105 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
106 ATTR_MTIME | ATTR_MTIME_SET |
107 ATTR_CTIME | ATTR_CTIME_SET;
d7e09d03
PT
108
109 if (!(och->och_flags & FMODE_WRITE))
110 goto out;
111
112 if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode))
113 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
114 else
115 ll_ioepoch_close(inode, op_data, &och, 0);
116
117out:
118 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
119 ll_prep_md_op_data(op_data, inode, NULL, NULL,
120 0, 0, LUSTRE_OPC_ANY, NULL);
d7e09d03
PT
121}
122
123static int ll_close_inode_openhandle(struct obd_export *md_exp,
124 struct inode *inode,
48d23e61
JX
125 struct obd_client_handle *och,
126 const __u64 *data_version)
d7e09d03
PT
127{
128 struct obd_export *exp = ll_i2mdexp(inode);
129 struct md_op_data *op_data;
130 struct ptlrpc_request *req = NULL;
131 struct obd_device *obd = class_exp2obd(exp);
132 int epoch_close = 1;
133 int rc;
d7e09d03 134
6e16818b 135 if (!obd) {
d7e09d03
PT
136 /*
137 * XXX: in case of LMV, is this correct to access
138 * ->exp_handle?
139 */
55f5a824 140 CERROR("Invalid MDC connection handle %#llx\n",
d7e09d03 141 ll_i2mdexp(inode)->exp_handle.h_cookie);
34e1f2bb
JL
142 rc = 0;
143 goto out;
d7e09d03
PT
144 }
145
496a51bd
JL
146 op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
147 if (!op_data) {
34e1f2bb
JL
148 /* XXX We leak openhandle and request here. */
149 rc = -ENOMEM;
150 goto out;
151 }
d7e09d03
PT
152
153 ll_prepare_close(inode, op_data, och);
6e16818b 154 if (data_version) {
48d23e61
JX
155 /* Pass in data_version implies release. */
156 op_data->op_bias |= MDS_HSM_RELEASE;
157 op_data->op_data_version = *data_version;
158 op_data->op_lease_handle = och->och_lease_handle;
159 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
160 }
b6ee3824 161 epoch_close = op_data->op_flags & MF_EPOCH_CLOSE;
d7e09d03
PT
162 rc = md_close(md_exp, op_data, och->och_mod, &req);
163 if (rc == -EAGAIN) {
164 /* This close must have the epoch closed. */
165 LASSERT(epoch_close);
166 /* MDS has instructed us to obtain Size-on-MDS attribute from
c0894c6c
OD
167 * OSTs and send setattr to back to MDS.
168 */
d7e09d03
PT
169 rc = ll_som_update(inode, op_data);
170 if (rc) {
97a075cd
JN
171 CERROR("%s: inode "DFID" mdc Size-on-MDS update failed: rc = %d\n",
172 ll_i2mdexp(inode)->exp_obd->obd_name,
173 PFID(ll_inode2fid(inode)), rc);
d7e09d03
PT
174 rc = 0;
175 }
176 } else if (rc) {
97a075cd
JN
177 CERROR("%s: inode "DFID" mdc close failed: rc = %d\n",
178 ll_i2mdexp(inode)->exp_obd->obd_name,
179 PFID(ll_inode2fid(inode)), rc);
d7e09d03
PT
180 }
181
182 /* DATA_MODIFIED flag was successfully sent on close, cancel data
c0894c6c
OD
183 * modification flag.
184 */
d7e09d03
PT
185 if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) {
186 struct ll_inode_info *lli = ll_i2info(inode);
187
188 spin_lock(&lli->lli_lock);
189 lli->lli_flags &= ~LLIF_DATA_MODIFIED;
190 spin_unlock(&lli->lli_lock);
191 }
192
d7e09d03
PT
193 if (rc == 0) {
194 rc = ll_objects_destroy(req, inode);
195 if (rc)
196 CERROR("inode %lu ll_objects destroy: rc = %d\n",
197 inode->i_ino, rc);
198 }
48d23e61
JX
199 if (rc == 0 && op_data->op_bias & MDS_HSM_RELEASE) {
200 struct mdt_body *body;
cea812cd 201
48d23e61 202 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
2e1b5b8b 203 if (!(body->mbo_valid & OBD_MD_FLRELEASED))
48d23e61
JX
204 rc = -EBUSY;
205 }
206
207 ll_finish_md_op_data(op_data);
d7e09d03 208
d7e09d03 209out:
d7e09d03
PT
210 if (exp_connect_som(exp) && !epoch_close &&
211 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
212 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
213 } else {
214 md_clear_open_replay_data(md_exp, och);
215 /* Free @och if it is not waiting for DONE_WRITING. */
216 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
97903a26 217 kfree(och);
d7e09d03
PT
218 }
219 if (req) /* This is close request */
220 ptlrpc_req_finished(req);
221 return rc;
222}
223
45b2a010 224int ll_md_real_close(struct inode *inode, fmode_t fmode)
d7e09d03
PT
225{
226 struct ll_inode_info *lli = ll_i2info(inode);
227 struct obd_client_handle **och_p;
228 struct obd_client_handle *och;
229 __u64 *och_usecount;
230 int rc = 0;
d7e09d03 231
45b2a010 232 if (fmode & FMODE_WRITE) {
d7e09d03
PT
233 och_p = &lli->lli_mds_write_och;
234 och_usecount = &lli->lli_open_fd_write_count;
45b2a010 235 } else if (fmode & FMODE_EXEC) {
d7e09d03
PT
236 och_p = &lli->lli_mds_exec_och;
237 och_usecount = &lli->lli_open_fd_exec_count;
238 } else {
45b2a010 239 LASSERT(fmode & FMODE_READ);
d7e09d03
PT
240 och_p = &lli->lli_mds_read_och;
241 och_usecount = &lli->lli_open_fd_read_count;
242 }
243
244 mutex_lock(&lli->lli_och_mutex);
45b2a010
JH
245 if (*och_usecount > 0) {
246 /* There are still users of this handle, so skip
c0894c6c
OD
247 * freeing it.
248 */
d7e09d03 249 mutex_unlock(&lli->lli_och_mutex);
0a3bdb00 250 return 0;
d7e09d03 251 }
45b2a010 252
57303e76 253 och = *och_p;
d7e09d03
PT
254 *och_p = NULL;
255 mutex_unlock(&lli->lli_och_mutex);
256
6e16818b 257 if (och) {
45b2a010 258 /* There might be a race and this handle may already
c0894c6c
OD
259 * be closed.
260 */
d7e09d03 261 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
48d23e61 262 inode, och, NULL);
d7e09d03
PT
263 }
264
0a3bdb00 265 return rc;
d7e09d03
PT
266}
267
2d95f10e
JH
268static int ll_md_close(struct obd_export *md_exp, struct inode *inode,
269 struct file *file)
d7e09d03
PT
270{
271 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
272 struct ll_inode_info *lli = ll_i2info(inode);
74d01958
AV
273 int lockmode;
274 __u64 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
275 struct lustre_handle lockh;
8369cfff 276 ldlm_policy_data_t policy = {.l_inodebits = {MDS_INODELOCK_OPEN} };
d7e09d03 277 int rc = 0;
d7e09d03
PT
278
279 /* clear group lock, if present */
280 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
98eae5e7 281 ll_put_grouplock(inode, file, fd->fd_grouplock.lg_gid);
d7e09d03 282
6e16818b 283 if (fd->fd_lease_och) {
d3a8a4e2
JX
284 bool lease_broken;
285
286 /* Usually the lease is not released when the
c0894c6c
OD
287 * application crashed, we need to release here.
288 */
d3a8a4e2 289 rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
e15ba45d
OD
290 CDEBUG(rc ? D_ERROR : D_INODE,
291 "Clean up lease " DFID " %d/%d\n",
292 PFID(&lli->lli_fid), rc, lease_broken);
d3a8a4e2
JX
293
294 fd->fd_lease_och = NULL;
295 }
296
6e16818b 297 if (fd->fd_och) {
48d23e61 298 rc = ll_close_inode_openhandle(md_exp, inode, fd->fd_och, NULL);
d3a8a4e2 299 fd->fd_och = NULL;
34e1f2bb 300 goto out;
d3a8a4e2
JX
301 }
302
d7e09d03 303 /* Let's see if we have good enough OPEN lock on the file and if
c0894c6c
OD
304 * we can skip talking to MDS
305 */
d7e09d03 306
74d01958
AV
307 mutex_lock(&lli->lli_och_mutex);
308 if (fd->fd_omode & FMODE_WRITE) {
309 lockmode = LCK_CW;
310 LASSERT(lli->lli_open_fd_write_count);
311 lli->lli_open_fd_write_count--;
312 } else if (fd->fd_omode & FMODE_EXEC) {
313 lockmode = LCK_PR;
314 LASSERT(lli->lli_open_fd_exec_count);
315 lli->lli_open_fd_exec_count--;
d7e09d03 316 } else {
74d01958
AV
317 lockmode = LCK_CR;
318 LASSERT(lli->lli_open_fd_read_count);
319 lli->lli_open_fd_read_count--;
d7e09d03 320 }
74d01958
AV
321 mutex_unlock(&lli->lli_och_mutex);
322
323 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
324 LDLM_IBITS, &policy, lockmode, &lockh))
325 rc = ll_md_real_close(inode, fd->fd_omode);
d7e09d03 326
d3a8a4e2 327out:
d7e09d03
PT
328 LUSTRE_FPRIVATE(file) = NULL;
329 ll_file_data_put(fd);
d7e09d03 330
0a3bdb00 331 return rc;
d7e09d03
PT
332}
333
334/* While this returns an error code, fput() the caller does not, so we need
335 * to make every effort to clean up all of our state here. Also, applications
336 * rarely check close errors and even if an error is returned they will not
337 * re-try the close call.
338 */
339int ll_file_release(struct inode *inode, struct file *file)
340{
341 struct ll_file_data *fd;
342 struct ll_sb_info *sbi = ll_i2sbi(inode);
343 struct ll_inode_info *lli = ll_i2info(inode);
344 int rc;
d7e09d03 345
97a075cd
JN
346 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
347 PFID(ll_inode2fid(inode)), inode);
d7e09d03 348
f76c23da 349 if (!is_root_inode(inode))
d7e09d03
PT
350 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
351 fd = LUSTRE_FPRIVATE(file);
6e16818b 352 LASSERT(fd);
d7e09d03 353
c0894c6c 354 /* The last ref on @file, maybe not be the owner pid of statahead.
d7e09d03 355 * Different processes can open the same dir, "ll_opendir_key" means:
c0894c6c
OD
356 * it is me that should stop the statahead thread.
357 */
d7e09d03
PT
358 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd &&
359 lli->lli_opendir_pid != 0)
360 ll_stop_statahead(inode, lli->lli_opendir_key);
361
f76c23da 362 if (is_root_inode(inode)) {
d7e09d03
PT
363 LUSTRE_FPRIVATE(file) = NULL;
364 ll_file_data_put(fd);
0a3bdb00 365 return 0;
d7e09d03
PT
366 }
367
368 if (!S_ISDIR(inode->i_mode)) {
79496845 369 if (lli->lli_clob)
370 lov_read_and_clear_async_rc(lli->lli_clob);
d7e09d03
PT
371 lli->lli_async_rc = 0;
372 }
373
374 rc = ll_md_close(sbi->ll_md_exp, inode, file);
375
376 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
377 libcfs_debug_dumplog();
378
0a3bdb00 379 return rc;
d7e09d03
PT
380}
381
c1b66fcc
LS
382static int ll_intent_file_open(struct dentry *de, void *lmm, int lmmsize,
383 struct lookup_intent *itp)
d7e09d03 384{
c1b66fcc 385 struct inode *inode = d_inode(de);
48eddfd5 386 struct ll_sb_info *sbi = ll_i2sbi(inode);
c1b66fcc
LS
387 struct dentry *parent = de->d_parent;
388 const char *name = NULL;
d7e09d03 389 struct md_op_data *op_data;
70a251f6 390 struct ptlrpc_request *req = NULL;
c1b66fcc 391 int len = 0, rc;
d7e09d03 392
c1b66fcc
LS
393 LASSERT(parent);
394 LASSERT(itp->it_flags & MDS_OPEN_BY_FID);
395
396 /*
397 * if server supports open-by-fid, or file name is invalid, don't pack
398 * name in open request
c0894c6c 399 */
c1b66fcc
LS
400 if (!(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_OPEN_BY_FID) &&
401 lu_name_is_valid_2(de->d_name.name, de->d_name.len)) {
402 name = de->d_name.name;
403 len = de->d_name.len;
d7e09d03
PT
404 }
405
c1b66fcc
LS
406 op_data = ll_prep_md_op_data(NULL, d_inode(parent), inode, name, len,
407 O_RDWR, LUSTRE_OPC_ANY, NULL);
d7e09d03 408 if (IS_ERR(op_data))
0a3bdb00 409 return PTR_ERR(op_data);
70a251f6
JH
410 op_data->op_data = lmm;
411 op_data->op_data_size = lmmsize;
d7e09d03 412
70a251f6
JH
413 rc = md_intent_lock(sbi->ll_md_exp, op_data, itp, &req,
414 &ll_md_blocking_ast, 0);
d7e09d03
PT
415 ll_finish_md_op_data(op_data);
416 if (rc == -ESTALE) {
417 /* reason for keep own exit path - don`t flood log
418 * with messages with -ESTALE errors.
419 */
420 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
e15ba45d 421 it_open_error(DISP_OPEN_OPEN, itp))
34e1f2bb 422 goto out;
e22fdcc8 423 ll_release_openhandle(inode, itp);
34e1f2bb 424 goto out;
d7e09d03
PT
425 }
426
34e1f2bb
JL
427 if (it_disposition(itp, DISP_LOOKUP_NEG)) {
428 rc = -ENOENT;
429 goto out;
430 }
d7e09d03
PT
431
432 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
433 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
434 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
34e1f2bb 435 goto out;
d7e09d03
PT
436 }
437
48eddfd5 438 rc = ll_prep_inode(&inode, req, NULL, itp);
e476f2e5 439 if (!rc && itp->it_lock_mode)
48eddfd5 440 ll_set_lock_data(sbi->ll_md_exp, inode, itp, NULL);
d7e09d03
PT
441
442out:
f236f69b 443 ptlrpc_req_finished(req);
d7e09d03
PT
444 ll_intent_drop_lock(itp);
445
0a3bdb00 446 return rc;
d7e09d03
PT
447}
448
449/**
450 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
451 * not believe attributes if a few ioepoch holders exist. Attributes for
452 * previous ioepoch if new one is opened are also skipped by MDS.
453 */
454void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
455{
456 if (ioepoch && lli->lli_ioepoch != ioepoch) {
457 lli->lli_ioepoch = ioepoch;
b0f5aad5 458 CDEBUG(D_INODE, "Epoch %llu opened on "DFID"\n",
d7e09d03
PT
459 ioepoch, PFID(&lli->lli_fid));
460 }
461}
462
ea1db081
JH
463static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
464 struct obd_client_handle *och)
d7e09d03 465{
d7e09d03
PT
466 struct mdt_body *body;
467
8bf86fd9 468 body = req_capsule_server_get(&it->it_request->rq_pill, &RMF_MDT_BODY);
2e1b5b8b
JH
469 och->och_fh = body->mbo_handle;
470 och->och_fid = body->mbo_fid1;
e476f2e5 471 och->och_lease_handle.cookie = it->it_lock_handle;
d7e09d03 472 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
d7e09d03 473 och->och_flags = it->it_flags;
d7e09d03 474
63d42578 475 return md_set_open_replay_data(md_exp, och, it);
d7e09d03
PT
476}
477
2d95f10e
JH
478static int ll_local_open(struct file *file, struct lookup_intent *it,
479 struct ll_file_data *fd, struct obd_client_handle *och)
d7e09d03 480{
2a8a3597 481 struct inode *inode = file_inode(file);
d7e09d03 482 struct ll_inode_info *lli = ll_i2info(inode);
d7e09d03
PT
483
484 LASSERT(!LUSTRE_FPRIVATE(file));
485
6e16818b 486 LASSERT(fd);
d7e09d03
PT
487
488 if (och) {
d7e09d03
PT
489 struct mdt_body *body;
490 int rc;
491
ea1db081
JH
492 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
493 if (rc != 0)
0a3bdb00 494 return rc;
d7e09d03 495
8bf86fd9
JH
496 body = req_capsule_server_get(&it->it_request->rq_pill,
497 &RMF_MDT_BODY);
2e1b5b8b 498 ll_ioepoch_open(lli, body->mbo_ioepoch);
d7e09d03
PT
499 }
500
501 LUSTRE_FPRIVATE(file) = fd;
502 ll_readahead_init(inode, &fd->fd_ras);
d3a8a4e2 503 fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
966c4a8f
JX
504
505 /* ll_cl_context initialize */
506 rwlock_init(&fd->fd_lock);
507 INIT_LIST_HEAD(&fd->fd_lccs);
508
0a3bdb00 509 return 0;
d7e09d03
PT
510}
511
512/* Open a file, and (for the very first open) create objects on the OSTs at
513 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
514 * creation or open until ll_lov_setstripe() ioctl is called.
515 *
516 * If we already have the stripe MD locally then we don't request it in
517 * md_open(), by passing a lmm_size = 0.
518 *
519 * It is up to the application to ensure no other processes open this file
520 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
521 * used. We might be able to avoid races of that sort by getting lli_open_sem
522 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
523 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
524 */
525int ll_file_open(struct inode *inode, struct file *file)
526{
527 struct ll_inode_info *lli = ll_i2info(inode);
528 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
529 .it_flags = file->f_flags };
530 struct obd_client_handle **och_p = NULL;
531 __u64 *och_usecount = NULL;
532 struct ll_file_data *fd;
533 int rc = 0, opendir_set = 0;
d7e09d03 534
97a075cd
JN
535 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), flags %o\n",
536 PFID(ll_inode2fid(inode)), inode, file->f_flags);
d7e09d03
PT
537
538 it = file->private_data; /* XXX: compat macro */
539 file->private_data = NULL; /* prevent ll_local_open assertion */
540
541 fd = ll_file_data_get();
6e16818b 542 if (!fd) {
34e1f2bb
JL
543 rc = -ENOMEM;
544 goto out_openerr;
545 }
d7e09d03
PT
546
547 fd->fd_file = file;
548 if (S_ISDIR(inode->i_mode)) {
549 spin_lock(&lli->lli_sa_lock);
6e16818b 550 if (!lli->lli_opendir_key && !lli->lli_sai &&
d7e09d03
PT
551 lli->lli_opendir_pid == 0) {
552 lli->lli_opendir_key = fd;
553 lli->lli_opendir_pid = current_pid();
554 opendir_set = 1;
555 }
556 spin_unlock(&lli->lli_sa_lock);
557 }
558
f76c23da 559 if (is_root_inode(inode)) {
d7e09d03 560 LUSTRE_FPRIVATE(file) = fd;
0a3bdb00 561 return 0;
d7e09d03
PT
562 }
563
e476f2e5 564 if (!it || !it->it_disposition) {
d7e09d03
PT
565 /* Convert f_flags into access mode. We cannot use file->f_mode,
566 * because everything but O_ACCMODE mask was stripped from
c0894c6c
OD
567 * there
568 */
d7e09d03
PT
569 if ((oit.it_flags + 1) & O_ACCMODE)
570 oit.it_flags++;
571 if (file->f_flags & O_TRUNC)
572 oit.it_flags |= FMODE_WRITE;
573
574 /* kernel only call f_op->open in dentry_open. filp_open calls
575 * dentry_open after call to open_namei that checks permissions.
576 * Only nfsd_open call dentry_open directly without checking
c0894c6c
OD
577 * permissions and because of that this code below is safe.
578 */
d7e09d03
PT
579 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
580 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
581
582 /* We do not want O_EXCL here, presumably we opened the file
c0894c6c
OD
583 * already? XXX - NFS implications?
584 */
d7e09d03
PT
585 oit.it_flags &= ~O_EXCL;
586
587 /* bug20584, if "it_flags" contains O_CREAT, the file will be
588 * created if necessary, then "IT_CREAT" should be set to keep
c0894c6c
OD
589 * consistent with it
590 */
d7e09d03
PT
591 if (oit.it_flags & O_CREAT)
592 oit.it_op |= IT_CREAT;
593
594 it = &oit;
595 }
596
597restart:
598 /* Let's see if we have file open on MDS already. */
599 if (it->it_flags & FMODE_WRITE) {
600 och_p = &lli->lli_mds_write_och;
601 och_usecount = &lli->lli_open_fd_write_count;
602 } else if (it->it_flags & FMODE_EXEC) {
603 och_p = &lli->lli_mds_exec_och;
604 och_usecount = &lli->lli_open_fd_exec_count;
605 } else {
606 och_p = &lli->lli_mds_read_och;
607 och_usecount = &lli->lli_open_fd_read_count;
608 }
609
610 mutex_lock(&lli->lli_och_mutex);
611 if (*och_p) { /* Open handle is present */
612 if (it_disposition(it, DISP_OPEN_OPEN)) {
613 /* Well, there's extra open request that we do not need,
c0894c6c
OD
614 * let's close it somehow. This will decref request.
615 */
d7e09d03
PT
616 rc = it_open_error(DISP_OPEN_OPEN, it);
617 if (rc) {
618 mutex_unlock(&lli->lli_och_mutex);
34e1f2bb 619 goto out_openerr;
d7e09d03
PT
620 }
621
e22fdcc8 622 ll_release_openhandle(inode, it);
d7e09d03
PT
623 }
624 (*och_usecount)++;
625
626 rc = ll_local_open(file, it, fd, NULL);
627 if (rc) {
628 (*och_usecount)--;
629 mutex_unlock(&lli->lli_och_mutex);
34e1f2bb 630 goto out_openerr;
d7e09d03
PT
631 }
632 } else {
633 LASSERT(*och_usecount == 0);
e476f2e5 634 if (!it->it_disposition) {
d7e09d03 635 /* We cannot just request lock handle now, new ELC code
c0894c6c
OD
636 * means that one of other OPEN locks for this file
637 * could be cancelled, and since blocking ast handler
638 * would attempt to grab och_mutex as well, that would
639 * result in a deadlock
640 */
d7e09d03 641 mutex_unlock(&lli->lli_och_mutex);
c1b66fcc
LS
642 /*
643 * Normally called under two situations:
644 * 1. NFS export.
645 * 2. revalidate with IT_OPEN (revalidate doesn't
646 * execute this intent any more).
647 *
648 * Always fetch MDS_OPEN_LOCK if this is not setstripe.
649 *
650 * Always specify MDS_OPEN_BY_FID because we don't want
651 * to get file with different fid.
652 */
653 it->it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID;
48eddfd5 654 rc = ll_intent_file_open(file->f_path.dentry, NULL, 0, it);
d7e09d03 655 if (rc)
34e1f2bb 656 goto out_openerr;
d7e09d03
PT
657
658 goto restart;
659 }
496a51bd 660 *och_p = kzalloc(sizeof(struct obd_client_handle), GFP_NOFS);
34e1f2bb
JL
661 if (!*och_p) {
662 rc = -ENOMEM;
663 goto out_och_free;
664 }
d7e09d03
PT
665
666 (*och_usecount)++;
667
668 /* md_intent_lock() didn't get a request ref if there was an
669 * open error, so don't do cleanup on the request here
c0894c6c
OD
670 * (bug 3430)
671 */
d7e09d03 672 /* XXX (green): Should not we bail out on any error here, not
c0894c6c
OD
673 * just open error?
674 */
d7e09d03
PT
675 rc = it_open_error(DISP_OPEN_OPEN, it);
676 if (rc)
34e1f2bb 677 goto out_och_free;
d7e09d03 678
5787be94
AD
679 LASSERTF(it_disposition(it, DISP_ENQ_OPEN_REF),
680 "inode %p: disposition %x, status %d\n", inode,
e476f2e5 681 it_disposition(it, ~0), it->it_status);
d7e09d03
PT
682
683 rc = ll_local_open(file, it, fd, *och_p);
684 if (rc)
34e1f2bb 685 goto out_och_free;
d7e09d03
PT
686 }
687 mutex_unlock(&lli->lli_och_mutex);
688 fd = NULL;
689
690 /* Must do this outside lli_och_mutex lock to prevent deadlock where
c0894c6c
OD
691 * different kind of OPEN lock for this same inode gets cancelled
692 * by ldlm_cancel_lru
693 */
d7e09d03 694 if (!S_ISREG(inode->i_mode))
34e1f2bb 695 goto out_och_free;
d7e09d03 696
38585ccc
AD
697 if (!lli->lli_has_smd &&
698 (cl_is_lov_delay_create(file->f_flags) ||
699 (file->f_mode & FMODE_WRITE) == 0)) {
700 CDEBUG(D_INODE, "object creation was delayed\n");
34e1f2bb 701 goto out_och_free;
d7e09d03 702 }
38585ccc 703 cl_lov_delay_create_clear(&file->f_flags);
34e1f2bb 704 goto out_och_free;
d7e09d03
PT
705
706out_och_free:
707 if (rc) {
708 if (och_p && *och_p) {
97903a26 709 kfree(*och_p);
c0a2472f 710 *och_p = NULL;
d7e09d03
PT
711 (*och_usecount)--;
712 }
713 mutex_unlock(&lli->lli_och_mutex);
714
715out_openerr:
716 if (opendir_set != 0)
717 ll_stop_statahead(inode, lli->lli_opendir_key);
a5cb8880 718 ll_file_data_put(fd);
d7e09d03
PT
719 } else {
720 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
721 }
722
723 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
8bf86fd9 724 ptlrpc_req_finished(it->it_request);
d7e09d03
PT
725 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
726 }
727
728 return rc;
729}
730
d3a8a4e2 731static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
e15ba45d
OD
732 struct ldlm_lock_desc *desc,
733 void *data, int flag)
d3a8a4e2
JX
734{
735 int rc;
736 struct lustre_handle lockh;
737
738 switch (flag) {
739 case LDLM_CB_BLOCKING:
740 ldlm_lock2handle(lock, &lockh);
741 rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
742 if (rc < 0) {
743 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
744 return rc;
745 }
746 break;
747 case LDLM_CB_CANCELING:
748 /* do nothing */
749 break;
750 }
751 return 0;
752}
753
754/**
755 * Acquire a lease and open the file.
756 */
2d95f10e
JH
757static struct obd_client_handle *
758ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
759 __u64 open_flags)
d3a8a4e2
JX
760{
761 struct lookup_intent it = { .it_op = IT_OPEN };
762 struct ll_sb_info *sbi = ll_i2sbi(inode);
763 struct md_op_data *op_data;
70a251f6 764 struct ptlrpc_request *req = NULL;
d3a8a4e2
JX
765 struct lustre_handle old_handle = { 0 };
766 struct obd_client_handle *och = NULL;
767 int rc;
768 int rc2;
769
770 if (fmode != FMODE_WRITE && fmode != FMODE_READ)
771 return ERR_PTR(-EINVAL);
772
6e16818b 773 if (file) {
d3a8a4e2
JX
774 struct ll_inode_info *lli = ll_i2info(inode);
775 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
776 struct obd_client_handle **och_p;
777 __u64 *och_usecount;
778
779 if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
780 return ERR_PTR(-EPERM);
781
782 /* Get the openhandle of the file */
783 rc = -EBUSY;
784 mutex_lock(&lli->lli_och_mutex);
6e16818b 785 if (fd->fd_lease_och) {
d3a8a4e2
JX
786 mutex_unlock(&lli->lli_och_mutex);
787 return ERR_PTR(rc);
788 }
789
6e16818b 790 if (!fd->fd_och) {
d3a8a4e2 791 if (file->f_mode & FMODE_WRITE) {
6e16818b 792 LASSERT(lli->lli_mds_write_och);
d3a8a4e2
JX
793 och_p = &lli->lli_mds_write_och;
794 och_usecount = &lli->lli_open_fd_write_count;
795 } else {
6e16818b 796 LASSERT(lli->lli_mds_read_och);
d3a8a4e2
JX
797 och_p = &lli->lli_mds_read_och;
798 och_usecount = &lli->lli_open_fd_read_count;
799 }
800 if (*och_usecount == 1) {
801 fd->fd_och = *och_p;
802 *och_p = NULL;
803 *och_usecount = 0;
804 rc = 0;
805 }
806 }
807 mutex_unlock(&lli->lli_och_mutex);
808 if (rc < 0) /* more than 1 opener */
809 return ERR_PTR(rc);
810
6e16818b 811 LASSERT(fd->fd_och);
d3a8a4e2
JX
812 old_handle = fd->fd_och->och_fh;
813 }
814
496a51bd
JL
815 och = kzalloc(sizeof(*och), GFP_NOFS);
816 if (!och)
d3a8a4e2
JX
817 return ERR_PTR(-ENOMEM);
818
819 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
e15ba45d 820 LUSTRE_OPC_ANY, NULL);
34e1f2bb
JL
821 if (IS_ERR(op_data)) {
822 rc = PTR_ERR(op_data);
823 goto out;
824 }
d3a8a4e2
JX
825
826 /* To tell the MDT this openhandle is from the same owner */
827 op_data->op_handle = old_handle;
828
48d23e61
JX
829 it.it_flags = fmode | open_flags;
830 it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
70a251f6
JH
831 rc = md_intent_lock(sbi->ll_md_exp, op_data, &it, &req,
832 &ll_md_blocking_lease_ast,
d3a8a4e2
JX
833 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
834 * it can be cancelled which may mislead applications that the lease is
835 * broken;
836 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
837 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
c0894c6c
OD
838 * doesn't deal with openhandle, so normal openhandle will be leaked.
839 */
70a251f6 840 LDLM_FL_NO_LRU | LDLM_FL_EXCL);
d3a8a4e2 841 ll_finish_md_op_data(op_data);
f236f69b 842 ptlrpc_req_finished(req);
d3a8a4e2 843 if (rc < 0)
34e1f2bb 844 goto out_release_it;
d3a8a4e2 845
34e1f2bb
JL
846 if (it_disposition(&it, DISP_LOOKUP_NEG)) {
847 rc = -ENOENT;
848 goto out_release_it;
849 }
d3a8a4e2
JX
850
851 rc = it_open_error(DISP_OPEN_OPEN, &it);
852 if (rc)
34e1f2bb 853 goto out_release_it;
d3a8a4e2
JX
854
855 LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
856 ll_och_fill(sbi->ll_md_exp, &it, och);
857
34e1f2bb
JL
858 if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */ {
859 rc = -EOPNOTSUPP;
860 goto out_close;
861 }
d3a8a4e2
JX
862
863 /* already get lease, handle lease lock */
864 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
e476f2e5
JH
865 if (it.it_lock_mode == 0 ||
866 it.it_lock_bits != MDS_INODELOCK_OPEN) {
d3a8a4e2
JX
867 /* open lock must return for lease */
868 CERROR(DFID "lease granted but no open lock, %d/%llu.\n",
e476f2e5
JH
869 PFID(ll_inode2fid(inode)), it.it_lock_mode,
870 it.it_lock_bits);
34e1f2bb
JL
871 rc = -EPROTO;
872 goto out_close;
d3a8a4e2
JX
873 }
874
875 ll_intent_release(&it);
876 return och;
877
878out_close:
e55a68b6 879 /* Cancel open lock */
e476f2e5 880 if (it.it_lock_mode != 0) {
d3a8a4e2 881 ldlm_lock_decref_and_cancel(&och->och_lease_handle,
e476f2e5
JH
882 it.it_lock_mode);
883 it.it_lock_mode = 0;
e55a68b6 884 och->och_lease_handle.cookie = 0ULL;
d3a8a4e2 885 }
e55a68b6
JX
886 rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL);
887 if (rc2 < 0)
888 CERROR("%s: error closing file "DFID": %d\n",
889 ll_get_fsname(inode->i_sb, NULL, 0),
890 PFID(&ll_i2info(inode)->lli_fid), rc2);
891 och = NULL; /* och has been freed in ll_close_inode_openhandle() */
d3a8a4e2
JX
892out_release_it:
893 ll_intent_release(&it);
894out:
97903a26 895 kfree(och);
d3a8a4e2
JX
896 return ERR_PTR(rc);
897}
d3a8a4e2
JX
898
899/**
900 * Release lease and close the file.
901 * It will check if the lease has ever broken.
902 */
2d95f10e
JH
903static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
904 bool *lease_broken)
d3a8a4e2
JX
905{
906 struct ldlm_lock *lock;
907 bool cancelled = true;
908 int rc;
909
910 lock = ldlm_handle2lock(&och->och_lease_handle);
6e16818b 911 if (lock) {
d3a8a4e2
JX
912 lock_res_and_lock(lock);
913 cancelled = ldlm_is_cancel(lock);
914 unlock_res_and_lock(lock);
ead02808 915 LDLM_LOCK_PUT(lock);
d3a8a4e2
JX
916 }
917
e15ba45d
OD
918 CDEBUG(D_INODE, "lease for " DFID " broken? %d\n",
919 PFID(&ll_i2info(inode)->lli_fid), cancelled);
d3a8a4e2
JX
920
921 if (!cancelled)
922 ldlm_cli_cancel(&och->och_lease_handle, 0);
6e16818b 923 if (lease_broken)
d3a8a4e2
JX
924 *lease_broken = cancelled;
925
48d23e61
JX
926 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
927 NULL);
d3a8a4e2
JX
928 return rc;
929}
d3a8a4e2 930
d7e09d03
PT
931/* Fills the obdo with the attributes for the lsm */
932static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
e1798006 933 struct obdo *obdo, __u64 ioepoch, int dv_flags)
d7e09d03
PT
934{
935 struct ptlrpc_request_set *set;
45efd655 936 struct obd_info oinfo = { };
d7e09d03
PT
937 int rc;
938
6e16818b 939 LASSERT(lsm);
d7e09d03
PT
940
941 oinfo.oi_md = lsm;
942 oinfo.oi_oa = obdo;
943 oinfo.oi_oa->o_oi = lsm->lsm_oi;
944 oinfo.oi_oa->o_mode = S_IFREG;
945 oinfo.oi_oa->o_ioepoch = ioepoch;
946 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
947 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
948 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
949 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
950 OBD_MD_FLGROUP | OBD_MD_FLEPOCH |
951 OBD_MD_FLDATAVERSION;
e1798006 952 if (dv_flags & (LL_DV_WR_FLUSH | LL_DV_RD_FLUSH)) {
d7e09d03
PT
953 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
954 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
e1798006
JX
955 if (dv_flags & LL_DV_WR_FLUSH)
956 oinfo.oi_oa->o_flags |= OBD_FL_FLUSH;
d7e09d03
PT
957 }
958
959 set = ptlrpc_prep_set();
6e16818b 960 if (!set) {
19b2056f 961 CERROR("cannot allocate ptlrpc set: rc = %d\n", -ENOMEM);
d7e09d03
PT
962 rc = -ENOMEM;
963 } else {
964 rc = obd_getattr_async(exp, &oinfo, set);
965 if (rc == 0)
966 rc = ptlrpc_set_wait(set);
967 ptlrpc_set_destroy(set);
968 }
e1798006 969 if (rc == 0) {
d7e09d03
PT
970 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
971 OBD_MD_FLATIME | OBD_MD_FLMTIME |
972 OBD_MD_FLCTIME | OBD_MD_FLSIZE |
e1798006
JX
973 OBD_MD_FLDATAVERSION | OBD_MD_FLFLAGS);
974 if (dv_flags & LL_DV_WR_FLUSH &&
975 !(oinfo.oi_oa->o_valid & OBD_MD_FLFLAGS &&
976 oinfo.oi_oa->o_flags & OBD_FL_FLUSH))
977 return -ENOTSUPP;
978 }
0a3bdb00 979 return rc;
d7e09d03
PT
980}
981
982/**
983 * Performs the getattr on the inode and updates its fields.
984 * If @sync != 0, perform the getattr under the server-side lock.
985 */
986int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
987 __u64 ioepoch, int sync)
988{
d7e09d03
PT
989 struct lov_stripe_md *lsm;
990 int rc;
d7e09d03
PT
991
992 lsm = ccc_inode_lsm_get(inode);
993 rc = ll_lsm_getattr(lsm, ll_i2dtexp(inode),
e1798006 994 obdo, ioepoch, sync ? LL_DV_RD_FLUSH : 0);
d7e09d03
PT
995 if (rc == 0) {
996 struct ost_id *oi = lsm ? &lsm->lsm_oi : &obdo->o_oi;
997
998 obdo_refresh_inode(inode, obdo, obdo->o_valid);
2d00bd17
JP
999 CDEBUG(D_INODE, "objid " DOSTID " size %llu, blocks %llu, blksize %lu\n",
1000 POSTID(oi), i_size_read(inode),
d7e09d03 1001 (unsigned long long)inode->i_blocks,
16e0631d 1002 1UL << inode->i_blkbits);
d7e09d03
PT
1003 }
1004 ccc_inode_lsm_put(inode, lsm);
0a3bdb00 1005 return rc;
d7e09d03
PT
1006}
1007
d2995737 1008int ll_merge_attr(const struct lu_env *env, struct inode *inode)
d7e09d03
PT
1009{
1010 struct ll_inode_info *lli = ll_i2info(inode);
1011 struct cl_object *obj = lli->lli_clob;
9acc4500 1012 struct cl_attr *attr = vvp_env_thread_attr(env);
d2995737
JH
1013 s64 atime;
1014 s64 mtime;
1015 s64 ctime;
d7e09d03
PT
1016 int rc = 0;
1017
d7e09d03 1018 ll_inode_size_lock(inode);
d2995737 1019
d7e09d03 1020 /* merge timestamps the most recently obtained from mds with
c0894c6c
OD
1021 * timestamps obtained from osts
1022 */
d2995737
JH
1023 LTIME_S(inode->i_atime) = lli->lli_atime;
1024 LTIME_S(inode->i_mtime) = lli->lli_mtime;
1025 LTIME_S(inode->i_ctime) = lli->lli_ctime;
376ef86b 1026
d2995737
JH
1027 mtime = LTIME_S(inode->i_mtime);
1028 atime = LTIME_S(inode->i_atime);
1029 ctime = LTIME_S(inode->i_ctime);
d7e09d03
PT
1030
1031 cl_object_attr_lock(obj);
1032 rc = cl_object_attr_get(env, obj, attr);
1033 cl_object_attr_unlock(obj);
1034
d2995737
JH
1035 if (rc != 0)
1036 goto out_size_unlock;
d7e09d03 1037
d2995737
JH
1038 if (atime < attr->cat_atime)
1039 atime = attr->cat_atime;
d7e09d03 1040
d2995737
JH
1041 if (ctime < attr->cat_ctime)
1042 ctime = attr->cat_ctime;
d7e09d03 1043
d2995737
JH
1044 if (mtime < attr->cat_mtime)
1045 mtime = attr->cat_mtime;
1046
1047 CDEBUG(D_VFSTRACE, DFID " updating i_size %llu\n",
1048 PFID(&lli->lli_fid), attr->cat_size);
1049
1929c433 1050 i_size_write(inode, attr->cat_size);
d2995737
JH
1051
1052 inode->i_blocks = attr->cat_blocks;
1053
1054 LTIME_S(inode->i_mtime) = mtime;
1055 LTIME_S(inode->i_atime) = atime;
1056 LTIME_S(inode->i_ctime) = ctime;
1057
1058out_size_unlock:
d7e09d03
PT
1059 ll_inode_size_unlock(inode);
1060
0a3bdb00 1061 return rc;
d7e09d03
PT
1062}
1063
1064int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
1065 lstat_t *st)
1066{
1067 struct obdo obdo = { 0 };
1068 int rc;
1069
ef2e0f55 1070 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, &obdo, 0, 0);
d7e09d03
PT
1071 if (rc == 0) {
1072 st->st_size = obdo.o_size;
1073 st->st_blocks = obdo.o_blocks;
1074 st->st_mtime = obdo.o_mtime;
1075 st->st_atime = obdo.o_atime;
1076 st->st_ctime = obdo.o_ctime;
1077 }
1078 return rc;
1079}
1080
ec9bca9c
JH
1081static bool file_is_noatime(const struct file *file)
1082{
1083 const struct vfsmount *mnt = file->f_path.mnt;
2a8a3597 1084 const struct inode *inode = file_inode(file);
ec9bca9c
JH
1085
1086 /* Adapted from file_accessed() and touch_atime().*/
1087 if (file->f_flags & O_NOATIME)
1088 return true;
1089
1090 if (inode->i_flags & S_NOATIME)
1091 return true;
1092
1093 if (IS_NOATIME(inode))
1094 return true;
1095
1096 if (mnt->mnt_flags & (MNT_NOATIME | MNT_READONLY))
1097 return true;
1098
1099 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1100 return true;
1101
1102 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
1103 return true;
1104
1105 return false;
1106}
1107
d7e09d03
PT
1108void ll_io_init(struct cl_io *io, const struct file *file, int write)
1109{
2a8a3597 1110 struct inode *inode = file_inode(file);
d7e09d03
PT
1111
1112 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
1113 if (write) {
1114 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
1115 io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
1116 file->f_flags & O_DIRECT ||
1117 IS_SYNC(inode);
1118 }
1119 io->ci_obj = ll_i2info(inode)->lli_clob;
1120 io->ci_lockreq = CILR_MAYBE;
1121 if (ll_file_nolock(file)) {
1122 io->ci_lockreq = CILR_NEVER;
1123 io->ci_no_srvlock = 1;
1124 } else if (file->f_flags & O_APPEND) {
1125 io->ci_lockreq = CILR_MANDATORY;
1126 }
ec9bca9c
JH
1127
1128 io->ci_noatime = file_is_noatime(file);
d7e09d03
PT
1129}
1130
1131static ssize_t
1132ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
1133 struct file *file, enum cl_io_type iot,
1134 loff_t *ppos, size_t count)
1135{
2a8a3597 1136 struct ll_inode_info *lli = ll_i2info(file_inode(file));
d7e09d03
PT
1137 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1138 struct cl_io *io;
1139 ssize_t result;
d7e09d03 1140
47b34458
AV
1141 CDEBUG(D_VFSTRACE, "file: %pD, type: %d ppos: %llu, count: %zd\n",
1142 file, iot, *ppos, count);
77605e41 1143
d7e09d03 1144restart:
9acc4500 1145 io = vvp_env_thread_io(env);
d7e09d03
PT
1146 ll_io_init(io, file, iot == CIT_WRITE);
1147
1148 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
e0a8144b 1149 struct vvp_io *vio = vvp_env_io(env);
d7e09d03
PT
1150 int write_mutex_locked = 0;
1151
e0a8144b
JH
1152 vio->vui_fd = LUSTRE_FPRIVATE(file);
1153 vio->vui_io_subtype = args->via_io_subtype;
d7e09d03 1154
e0a8144b 1155 switch (vio->vui_io_subtype) {
d7e09d03 1156 case IO_NORMAL:
e0a8144b
JH
1157 vio->vui_iter = args->u.normal.via_iter;
1158 vio->vui_iocb = args->u.normal.via_iocb;
d7e09d03 1159 if ((iot == CIT_WRITE) &&
e0a8144b 1160 !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
d7e09d03 1161 if (mutex_lock_interruptible(&lli->
34e1f2bb
JL
1162 lli_write_mutex)) {
1163 result = -ERESTARTSYS;
1164 goto out;
1165 }
d7e09d03 1166 write_mutex_locked = 1;
d7e09d03 1167 }
77605e41 1168 down_read(&lli->lli_trunc_sem);
d7e09d03 1169 break;
d7e09d03 1170 case IO_SPLICE:
e0a8144b
JH
1171 vio->u.splice.vui_pipe = args->u.splice.via_pipe;
1172 vio->u.splice.vui_flags = args->u.splice.via_flags;
d7e09d03
PT
1173 break;
1174 default:
e0a8144b 1175 CERROR("Unknown IO type - %u\n", vio->vui_io_subtype);
d7e09d03
PT
1176 LBUG();
1177 }
966c4a8f 1178 ll_cl_add(file, env, io);
d7e09d03 1179 result = cl_io_loop(env, io);
966c4a8f 1180 ll_cl_remove(file, env);
77605e41
JX
1181 if (args->via_io_subtype == IO_NORMAL)
1182 up_read(&lli->lli_trunc_sem);
d7e09d03
PT
1183 if (write_mutex_locked)
1184 mutex_unlock(&lli->lli_write_mutex);
d7e09d03
PT
1185 } else {
1186 /* cl_io_rw_init() handled IO */
1187 result = io->ci_result;
1188 }
1189
1190 if (io->ci_nob > 0) {
1191 result = io->ci_nob;
1192 *ppos = io->u.ci_wr.wr.crw_pos;
1193 }
34e1f2bb 1194 goto out;
d7e09d03
PT
1195out:
1196 cl_io_fini(env, io);
1197 /* If any bit been read/written (result != 0), we just return
c0894c6c
OD
1198 * short read/write instead of restart io.
1199 */
5ea17d6c 1200 if ((result == 0 || result == -ENODATA) && io->ci_need_restart) {
09561a53 1201 CDEBUG(D_VFSTRACE, "Restart %s on %pD from %lld, count:%zd\n",
d7e09d03 1202 iot == CIT_READ ? "read" : "write",
09561a53 1203 file, *ppos, count);
19b2056f 1204 LASSERTF(io->ci_nob == 0, "%zd\n", io->ci_nob);
d7e09d03
PT
1205 goto restart;
1206 }
1207
1208 if (iot == CIT_READ) {
1209 if (result >= 0)
2a8a3597 1210 ll_stats_ops_tally(ll_i2sbi(file_inode(file)),
d7e09d03
PT
1211 LPROC_LL_READ_BYTES, result);
1212 } else if (iot == CIT_WRITE) {
1213 if (result >= 0) {
2a8a3597 1214 ll_stats_ops_tally(ll_i2sbi(file_inode(file)),
d7e09d03
PT
1215 LPROC_LL_WRITE_BYTES, result);
1216 fd->fd_write_failed = false;
1217 } else if (result != -ERESTARTSYS) {
1218 fd->fd_write_failed = true;
1219 }
1220 }
77605e41 1221 CDEBUG(D_VFSTRACE, "iot: %d, result: %zd\n", iot, result);
d7e09d03
PT
1222
1223 return result;
1224}
1225
b42b15fd 1226static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
d7e09d03
PT
1227{
1228 struct lu_env *env;
1229 struct vvp_io_args *args;
d7e09d03
PT
1230 ssize_t result;
1231 int refcheck;
d7e09d03 1232
d7e09d03
PT
1233 env = cl_env_get(&refcheck);
1234 if (IS_ERR(env))
0a3bdb00 1235 return PTR_ERR(env);
d7e09d03 1236
9989a58e 1237 args = ll_env_args(env, IO_NORMAL);
b42b15fd 1238 args->u.normal.via_iter = to;
d7e09d03
PT
1239 args->u.normal.via_iocb = iocb;
1240
1241 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
b42b15fd 1242 &iocb->ki_pos, iov_iter_count(to));
d7e09d03 1243 cl_env_put(env, &refcheck);
0a3bdb00 1244 return result;
d7e09d03
PT
1245}
1246
1247/*
1248 * Write to a file (through the page cache).
1249 */
b42b15fd 1250static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
d7e09d03
PT
1251{
1252 struct lu_env *env;
1253 struct vvp_io_args *args;
d7e09d03
PT
1254 ssize_t result;
1255 int refcheck;
d7e09d03 1256
d7e09d03
PT
1257 env = cl_env_get(&refcheck);
1258 if (IS_ERR(env))
0a3bdb00 1259 return PTR_ERR(env);
d7e09d03 1260
9989a58e 1261 args = ll_env_args(env, IO_NORMAL);
b42b15fd 1262 args->u.normal.via_iter = from;
d7e09d03
PT
1263 args->u.normal.via_iocb = iocb;
1264
1265 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
e15ba45d 1266 &iocb->ki_pos, iov_iter_count(from));
d7e09d03 1267 cl_env_put(env, &refcheck);
0a3bdb00 1268 return result;
d7e09d03
PT
1269}
1270
d7e09d03
PT
1271/*
1272 * Send file content (through pagecache) somewhere with helper
1273 */
1274static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1275 struct pipe_inode_info *pipe, size_t count,
1276 unsigned int flags)
1277{
1278 struct lu_env *env;
1279 struct vvp_io_args *args;
1280 ssize_t result;
1281 int refcheck;
d7e09d03
PT
1282
1283 env = cl_env_get(&refcheck);
1284 if (IS_ERR(env))
0a3bdb00 1285 return PTR_ERR(env);
d7e09d03 1286
9989a58e 1287 args = ll_env_args(env, IO_SPLICE);
d7e09d03
PT
1288 args->u.splice.via_pipe = pipe;
1289 args->u.splice.via_flags = flags;
1290
1291 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1292 cl_env_put(env, &refcheck);
0a3bdb00 1293 return result;
d7e09d03
PT
1294}
1295
21aef7d9 1296static int ll_lov_recreate(struct inode *inode, struct ost_id *oi, u32 ost_idx)
d7e09d03
PT
1297{
1298 struct obd_export *exp = ll_i2dtexp(inode);
1299 struct obd_trans_info oti = { 0 };
1300 struct obdo *oa = NULL;
1301 int lsm_size;
1302 int rc = 0;
1303 struct lov_stripe_md *lsm = NULL, *lsm2;
d7e09d03 1304
21068c46 1305 oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS);
6e16818b 1306 if (!oa)
0a3bdb00 1307 return -ENOMEM;
d7e09d03
PT
1308
1309 lsm = ccc_inode_lsm_get(inode);
34e1f2bb
JL
1310 if (!lsm_has_objects(lsm)) {
1311 rc = -ENOENT;
1312 goto out;
1313 }
d7e09d03
PT
1314
1315 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1316 (lsm->lsm_stripe_count));
1317
e958f49b 1318 lsm2 = libcfs_kvzalloc(lsm_size, GFP_NOFS);
6e16818b 1319 if (!lsm2) {
34e1f2bb
JL
1320 rc = -ENOMEM;
1321 goto out;
1322 }
d7e09d03
PT
1323
1324 oa->o_oi = *oi;
1325 oa->o_nlink = ost_idx;
1326 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1327 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1328 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1329 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1330 obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid);
1331 memcpy(lsm2, lsm, lsm_size);
1332 ll_inode_size_lock(inode);
1333 rc = obd_create(NULL, exp, oa, &lsm2, &oti);
1334 ll_inode_size_unlock(inode);
1335
e958f49b 1336 kvfree(lsm2);
34e1f2bb 1337 goto out;
d7e09d03
PT
1338out:
1339 ccc_inode_lsm_put(inode, lsm);
2ba262fb 1340 kmem_cache_free(obdo_cachep, oa);
d7e09d03
PT
1341 return rc;
1342}
1343
1344static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg)
1345{
1346 struct ll_recreate_obj ucreat;
1347 struct ost_id oi;
d7e09d03 1348
2eb90a75 1349 if (!capable(CFS_CAP_SYS_ADMIN))
0a3bdb00 1350 return -EPERM;
d7e09d03 1351
02f9c12e 1352 if (copy_from_user(&ucreat, (struct ll_recreate_obj __user *)arg,
d7e09d03 1353 sizeof(ucreat)))
0a3bdb00 1354 return -EFAULT;
d7e09d03
PT
1355
1356 ostid_set_seq_mdt0(&oi);
1357 ostid_set_id(&oi, ucreat.lrc_id);
0a3bdb00 1358 return ll_lov_recreate(inode, &oi, ucreat.lrc_ost_idx);
d7e09d03
PT
1359}
1360
1361static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
1362{
1363 struct lu_fid fid;
1364 struct ost_id oi;
21aef7d9 1365 u32 ost_idx;
d7e09d03 1366
2eb90a75 1367 if (!capable(CFS_CAP_SYS_ADMIN))
0a3bdb00 1368 return -EPERM;
d7e09d03 1369
02f9c12e 1370 if (copy_from_user(&fid, (struct lu_fid __user *)arg, sizeof(fid)))
0a3bdb00 1371 return -EFAULT;
d7e09d03
PT
1372
1373 fid_to_ostid(&fid, &oi);
1374 ost_idx = (fid_seq(&fid) >> 16) & 0xffff;
0a3bdb00 1375 return ll_lov_recreate(inode, &oi, ost_idx);
d7e09d03
PT
1376}
1377
c139f3ce 1378int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
d467220e
NY
1379 __u64 flags, struct lov_user_md *lum,
1380 int lum_size)
d7e09d03
PT
1381{
1382 struct lov_stripe_md *lsm = NULL;
1383 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1384 int rc = 0;
d7e09d03
PT
1385
1386 lsm = ccc_inode_lsm_get(inode);
6e16818b 1387 if (lsm) {
d7e09d03 1388 ccc_inode_lsm_put(inode, lsm);
97a075cd
JN
1389 CDEBUG(D_IOCTL, "stripe already exists for inode "DFID"\n",
1390 PFID(ll_inode2fid(inode)));
34e1f2bb
JL
1391 rc = -EEXIST;
1392 goto out;
d7e09d03
PT
1393 }
1394
1395 ll_inode_size_lock(inode);
c1b66fcc 1396 oit.it_flags |= MDS_OPEN_BY_FID;
c139f3ce 1397 rc = ll_intent_file_open(dentry, lum, lum_size, &oit);
d7e09d03 1398 if (rc)
34e1f2bb 1399 goto out_unlock;
e476f2e5 1400 rc = oit.it_status;
d7e09d03 1401 if (rc < 0)
34e1f2bb 1402 goto out_req_free;
d7e09d03 1403
e22fdcc8 1404 ll_release_openhandle(inode, &oit);
d7e09d03 1405
38585ccc 1406out_unlock:
d7e09d03
PT
1407 ll_inode_size_unlock(inode);
1408 ll_intent_release(&oit);
1409 ccc_inode_lsm_put(inode, lsm);
38585ccc 1410out:
0a3bdb00 1411 return rc;
d7e09d03 1412out_req_free:
8bf86fd9 1413 ptlrpc_req_finished((struct ptlrpc_request *)oit.it_request);
d7e09d03
PT
1414 goto out;
1415}
1416
1417int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1418 struct lov_mds_md **lmmp, int *lmm_size,
1419 struct ptlrpc_request **request)
1420{
1421 struct ll_sb_info *sbi = ll_i2sbi(inode);
1422 struct mdt_body *body;
1423 struct lov_mds_md *lmm = NULL;
1424 struct ptlrpc_request *req = NULL;
1425 struct md_op_data *op_data;
1426 int rc, lmmsize;
1427
44779340 1428 rc = ll_get_default_mdsize(sbi, &lmmsize);
d7e09d03 1429 if (rc)
0a3bdb00 1430 return rc;
d7e09d03
PT
1431
1432 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1433 strlen(filename), lmmsize,
1434 LUSTRE_OPC_ANY, NULL);
1435 if (IS_ERR(op_data))
0a3bdb00 1436 return PTR_ERR(op_data);
d7e09d03
PT
1437
1438 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1439 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1440 ll_finish_md_op_data(op_data);
1441 if (rc < 0) {
2d00bd17
JP
1442 CDEBUG(D_INFO, "md_getattr_name failed on %s: rc %d\n",
1443 filename, rc);
34e1f2bb 1444 goto out;
d7e09d03
PT
1445 }
1446
1447 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
d7e09d03 1448
2e1b5b8b 1449 lmmsize = body->mbo_eadatasize;
d7e09d03 1450
2e1b5b8b 1451 if (!(body->mbo_valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
e15ba45d 1452 lmmsize == 0) {
34e1f2bb
JL
1453 rc = -ENODATA;
1454 goto out;
d7e09d03
PT
1455 }
1456
1457 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
d7e09d03
PT
1458
1459 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1460 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
34e1f2bb
JL
1461 rc = -EPROTO;
1462 goto out;
d7e09d03
PT
1463 }
1464
1465 /*
1466 * This is coming from the MDS, so is probably in
1467 * little endian. We convert it to host endian before
1468 * passing it to userspace.
1469 */
1f6eaf83 1470 if (cpu_to_le32(LOV_MAGIC) != LOV_MAGIC) {
5dd16419
JX
1471 int stripe_count;
1472
1473 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
1474 if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED)
1475 stripe_count = 0;
1476
d7e09d03 1477 /* if function called for directory - we should
c0894c6c
OD
1478 * avoid swab not existent lsm objects
1479 */
d7e09d03
PT
1480 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1481 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
2e1b5b8b 1482 if (S_ISREG(body->mbo_mode))
d7e09d03
PT
1483 lustre_swab_lov_user_md_objects(
1484 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
5dd16419 1485 stripe_count);
d7e09d03
PT
1486 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1487 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
2e1b5b8b 1488 if (S_ISREG(body->mbo_mode))
d7e09d03
PT
1489 lustre_swab_lov_user_md_objects(
1490 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
5dd16419 1491 stripe_count);
d7e09d03
PT
1492 }
1493 }
1494
1495out:
1496 *lmmp = lmm;
1497 *lmm_size = lmmsize;
1498 *request = req;
1499 return rc;
1500}
1501
1502static int ll_lov_setea(struct inode *inode, struct file *file,
e15ba45d 1503 unsigned long arg)
d7e09d03 1504{
d467220e 1505 __u64 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
d7e09d03
PT
1506 struct lov_user_md *lump;
1507 int lum_size = sizeof(struct lov_user_md) +
1508 sizeof(struct lov_user_ost_data);
1509 int rc;
d7e09d03 1510
2eb90a75 1511 if (!capable(CFS_CAP_SYS_ADMIN))
0a3bdb00 1512 return -EPERM;
d7e09d03 1513
e958f49b 1514 lump = libcfs_kvzalloc(lum_size, GFP_NOFS);
6e16818b 1515 if (!lump)
0a3bdb00 1516 return -ENOMEM;
d7e09d03 1517
02f9c12e 1518 if (copy_from_user(lump, (struct lov_user_md __user *)arg, lum_size)) {
e958f49b 1519 kvfree(lump);
0a3bdb00 1520 return -EFAULT;
d7e09d03
PT
1521 }
1522
c139f3ce 1523 rc = ll_lov_setstripe_ea_info(inode, file->f_path.dentry, flags, lump,
e15ba45d 1524 lum_size);
c139f3ce 1525 cl_lov_delay_create_clear(&file->f_flags);
d7e09d03 1526
e958f49b 1527 kvfree(lump);
0a3bdb00 1528 return rc;
d7e09d03
PT
1529}
1530
1531static int ll_lov_setstripe(struct inode *inode, struct file *file,
1532 unsigned long arg)
1533{
02f9c12e
OD
1534 struct lov_user_md_v3 lumv3;
1535 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1536 struct lov_user_md_v1 __user *lumv1p = (void __user *)arg;
1537 struct lov_user_md_v3 __user *lumv3p = (void __user *)arg;
1538 int lum_size, rc;
d467220e 1539 __u64 flags = FMODE_WRITE;
d7e09d03
PT
1540
1541 /* first try with v1 which is smaller than v3 */
1542 lum_size = sizeof(struct lov_user_md_v1);
1543 if (copy_from_user(lumv1, lumv1p, lum_size))
0a3bdb00 1544 return -EFAULT;
d7e09d03
PT
1545
1546 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1547 lum_size = sizeof(struct lov_user_md_v3);
1548 if (copy_from_user(&lumv3, lumv3p, lum_size))
0a3bdb00 1549 return -EFAULT;
d7e09d03
PT
1550 }
1551
c139f3ce
AV
1552 rc = ll_lov_setstripe_ea_info(inode, file->f_path.dentry, flags, lumv1,
1553 lum_size);
1554 cl_lov_delay_create_clear(&file->f_flags);
d7e09d03
PT
1555 if (rc == 0) {
1556 struct lov_stripe_md *lsm;
1557 __u32 gen;
1558
1559 put_user(0, &lumv1p->lmm_stripe_count);
1560
1561 ll_layout_refresh(inode, &gen);
1562 lsm = ccc_inode_lsm_get(inode);
1563 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
e09bee34 1564 0, lsm, (void __user *)arg);
d7e09d03
PT
1565 ccc_inode_lsm_put(inode, lsm);
1566 }
0a3bdb00 1567 return rc;
d7e09d03
PT
1568}
1569
1570static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1571{
1572 struct lov_stripe_md *lsm;
1573 int rc = -ENODATA;
d7e09d03
PT
1574
1575 lsm = ccc_inode_lsm_get(inode);
6e16818b 1576 if (lsm)
d7e09d03 1577 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0,
e09bee34 1578 lsm, (void __user *)arg);
d7e09d03 1579 ccc_inode_lsm_put(inode, lsm);
0a3bdb00 1580 return rc;
d7e09d03
PT
1581}
1582
2d95f10e
JH
1583static int
1584ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
d7e09d03
PT
1585{
1586 struct ll_inode_info *lli = ll_i2info(inode);
1587 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
98eae5e7 1588 struct ll_grouplock grouplock;
d7e09d03 1589 int rc;
d7e09d03 1590
431b5678
PF
1591 if (arg == 0) {
1592 CWARN("group id for group lock must not be 0\n");
1593 return -EINVAL;
1594 }
1595
d7e09d03 1596 if (ll_file_nolock(file))
0a3bdb00 1597 return -EOPNOTSUPP;
d7e09d03
PT
1598
1599 spin_lock(&lli->lli_lock);
1600 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1601 CWARN("group lock already existed with gid %lu\n",
98eae5e7 1602 fd->fd_grouplock.lg_gid);
d7e09d03 1603 spin_unlock(&lli->lli_lock);
0a3bdb00 1604 return -EINVAL;
d7e09d03 1605 }
98eae5e7 1606 LASSERT(!fd->fd_grouplock.lg_lock);
d7e09d03
PT
1607 spin_unlock(&lli->lli_lock);
1608
1929c433 1609 rc = cl_get_grouplock(ll_i2info(inode)->lli_clob,
d7e09d03
PT
1610 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1611 if (rc)
0a3bdb00 1612 return rc;
d7e09d03
PT
1613
1614 spin_lock(&lli->lli_lock);
1615 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1616 spin_unlock(&lli->lli_lock);
1617 CERROR("another thread just won the race\n");
1618 cl_put_grouplock(&grouplock);
0a3bdb00 1619 return -EINVAL;
d7e09d03
PT
1620 }
1621
1622 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1623 fd->fd_grouplock = grouplock;
1624 spin_unlock(&lli->lli_lock);
1625
1626 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
0a3bdb00 1627 return 0;
d7e09d03
PT
1628}
1629
920b4f2e
LC
1630static int ll_put_grouplock(struct inode *inode, struct file *file,
1631 unsigned long arg)
d7e09d03
PT
1632{
1633 struct ll_inode_info *lli = ll_i2info(inode);
1634 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
98eae5e7 1635 struct ll_grouplock grouplock;
d7e09d03
PT
1636
1637 spin_lock(&lli->lli_lock);
1638 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1639 spin_unlock(&lli->lli_lock);
1640 CWARN("no group lock held\n");
0a3bdb00 1641 return -EINVAL;
d7e09d03 1642 }
98eae5e7 1643 LASSERT(fd->fd_grouplock.lg_lock);
d7e09d03 1644
98eae5e7 1645 if (fd->fd_grouplock.lg_gid != arg) {
d7e09d03 1646 CWARN("group lock %lu doesn't match current id %lu\n",
98eae5e7 1647 arg, fd->fd_grouplock.lg_gid);
d7e09d03 1648 spin_unlock(&lli->lli_lock);
0a3bdb00 1649 return -EINVAL;
d7e09d03
PT
1650 }
1651
1652 grouplock = fd->fd_grouplock;
1653 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1654 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1655 spin_unlock(&lli->lli_lock);
1656
1657 cl_put_grouplock(&grouplock);
1658 CDEBUG(D_INFO, "group lock %lu released\n", arg);
0a3bdb00 1659 return 0;
d7e09d03
PT
1660}
1661
1662/**
1663 * Close inode open handle
1664 *
e22fdcc8 1665 * \param inode [in] inode in question
d7e09d03
PT
1666 * \param it [in,out] intent which contains open info and result
1667 *
1668 * \retval 0 success
1669 * \retval <0 failure
1670 */
e22fdcc8 1671int ll_release_openhandle(struct inode *inode, struct lookup_intent *it)
d7e09d03 1672{
d7e09d03
PT
1673 struct obd_client_handle *och;
1674 int rc;
d7e09d03
PT
1675
1676 LASSERT(inode);
1677
1678 /* Root ? Do nothing. */
f76c23da 1679 if (is_root_inode(inode))
0a3bdb00 1680 return 0;
d7e09d03
PT
1681
1682 /* No open handle to close? Move away */
1683 if (!it_disposition(it, DISP_OPEN_OPEN))
0a3bdb00 1684 return 0;
d7e09d03
PT
1685
1686 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1687
496a51bd 1688 och = kzalloc(sizeof(*och), GFP_NOFS);
34e1f2bb
JL
1689 if (!och) {
1690 rc = -ENOMEM;
1691 goto out;
1692 }
d7e09d03 1693
ea1db081 1694 ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
d7e09d03
PT
1695
1696 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
48d23e61
JX
1697 inode, och, NULL);
1698out:
d7e09d03
PT
1699 /* this one is in place of ll_file_open */
1700 if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
8bf86fd9 1701 ptlrpc_req_finished(it->it_request);
d7e09d03
PT
1702 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1703 }
0a3bdb00 1704 return rc;
d7e09d03
PT
1705}
1706
1707/**
1708 * Get size for inode for which FIEMAP mapping is requested.
1709 * Make the FIEMAP get_info call and returns the result.
1710 */
2d95f10e 1711static int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
ebdc4fc5 1712 size_t num_bytes)
d7e09d03
PT
1713{
1714 struct obd_export *exp = ll_i2dtexp(inode);
1715 struct lov_stripe_md *lsm = NULL;
1716 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
ebdc4fc5 1717 __u32 vallen = num_bytes;
d7e09d03 1718 int rc;
d7e09d03
PT
1719
1720 /* Checks for fiemap flags */
1721 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1722 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1723 return -EBADR;
1724 }
1725
1726 /* Check for FIEMAP_FLAG_SYNC */
1727 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1728 rc = filemap_fdatawrite(inode->i_mapping);
1729 if (rc)
1730 return rc;
1731 }
1732
1733 lsm = ccc_inode_lsm_get(inode);
6e16818b 1734 if (!lsm)
d7e09d03
PT
1735 return -ENOENT;
1736
1737 /* If the stripe_count > 1 and the application does not understand
1738 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1739 */
1740 if (lsm->lsm_stripe_count > 1 &&
34e1f2bb
JL
1741 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER)) {
1742 rc = -EOPNOTSUPP;
1743 goto out;
1744 }
d7e09d03
PT
1745
1746 fm_key.oa.o_oi = lsm->lsm_oi;
1747 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1748
a915ffda
LD
1749 if (i_size_read(inode) == 0) {
1750 rc = ll_glimpse_size(inode);
1751 if (rc)
1752 goto out;
1753 }
1754
d7e09d03
PT
1755 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLSIZE);
1756 obdo_set_parent_fid(&fm_key.oa, &ll_i2info(inode)->lli_fid);
1757 /* If filesize is 0, then there would be no objects for mapping */
1758 if (fm_key.oa.o_size == 0) {
1759 fiemap->fm_mapped_extents = 0;
34e1f2bb
JL
1760 rc = 0;
1761 goto out;
d7e09d03
PT
1762 }
1763
1764 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1765
1766 rc = obd_get_info(NULL, exp, sizeof(fm_key), &fm_key, &vallen,
1767 fiemap, lsm);
1768 if (rc)
1769 CERROR("obd_get_info failed: rc = %d\n", rc);
1770
1771out:
1772 ccc_inode_lsm_put(inode, lsm);
0a3bdb00 1773 return rc;
d7e09d03
PT
1774}
1775
2b358b4e 1776int ll_fid2path(struct inode *inode, void __user *arg)
d7e09d03 1777{
2b358b4e
FZ
1778 struct obd_export *exp = ll_i2mdexp(inode);
1779 const struct getinfo_fid2path __user *gfin = arg;
1780 struct getinfo_fid2path *gfout;
1781 u32 pathlen;
1782 size_t outsize;
1783 int rc;
d7e09d03 1784
2eb90a75 1785 if (!capable(CFS_CAP_DAC_READ_SEARCH) &&
d7e09d03 1786 !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
0a3bdb00 1787 return -EPERM;
d7e09d03 1788
2b358b4e
FZ
1789 /* Only need to get the buflen */
1790 if (get_user(pathlen, &gfin->gf_pathlen))
0a3bdb00 1791 return -EFAULT;
d7e09d03 1792
c7b09efa
OD
1793 if (pathlen > PATH_MAX)
1794 return -EINVAL;
1795
2b358b4e
FZ
1796 outsize = sizeof(*gfout) + pathlen;
1797
496a51bd
JL
1798 gfout = kzalloc(outsize, GFP_NOFS);
1799 if (!gfout)
0a3bdb00 1800 return -ENOMEM;
2b358b4e 1801
34e1f2bb
JL
1802 if (copy_from_user(gfout, arg, sizeof(*gfout))) {
1803 rc = -EFAULT;
1804 goto gf_free;
1805 }
d7e09d03
PT
1806
1807 /* Call mdc_iocontrol */
1808 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
2b358b4e 1809 if (rc != 0)
34e1f2bb 1810 goto gf_free;
d7e09d03
PT
1811
1812 if (copy_to_user(arg, gfout, outsize))
1813 rc = -EFAULT;
1814
1815gf_free:
97903a26 1816 kfree(gfout);
0a3bdb00 1817 return rc;
d7e09d03
PT
1818}
1819
1820static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
1821{
1822 struct ll_user_fiemap *fiemap_s;
1823 size_t num_bytes, ret_bytes;
1824 unsigned int extent_count;
1825 int rc = 0;
1826
1827 /* Get the extent count so we can calculate the size of
c0894c6c
OD
1828 * required fiemap buffer
1829 */
d7e09d03 1830 if (get_user(extent_count,
e15ba45d 1831 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
0a3bdb00 1832 return -EFAULT;
7bc3dfa3
VO
1833
1834 if (extent_count >=
1835 (SIZE_MAX - sizeof(*fiemap_s)) / sizeof(struct ll_fiemap_extent))
1836 return -EINVAL;
d7e09d03
PT
1837 num_bytes = sizeof(*fiemap_s) + (extent_count *
1838 sizeof(struct ll_fiemap_extent));
1839
e958f49b 1840 fiemap_s = libcfs_kvzalloc(num_bytes, GFP_NOFS);
6e16818b 1841 if (!fiemap_s)
0a3bdb00 1842 return -ENOMEM;
d7e09d03
PT
1843
1844 /* get the fiemap value */
1845 if (copy_from_user(fiemap_s, (struct ll_user_fiemap __user *)arg,
34e1f2bb
JL
1846 sizeof(*fiemap_s))) {
1847 rc = -EFAULT;
1848 goto error;
1849 }
d7e09d03
PT
1850
1851 /* If fm_extent_count is non-zero, read the first extent since
1852 * it is used to calculate end_offset and device from previous
c0894c6c
OD
1853 * fiemap call.
1854 */
d7e09d03
PT
1855 if (extent_count) {
1856 if (copy_from_user(&fiemap_s->fm_extents[0],
e15ba45d
OD
1857 (char __user *)arg + sizeof(*fiemap_s),
1858 sizeof(struct ll_fiemap_extent))) {
34e1f2bb
JL
1859 rc = -EFAULT;
1860 goto error;
1861 }
d7e09d03
PT
1862 }
1863
1864 rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
1865 if (rc)
34e1f2bb 1866 goto error;
d7e09d03
PT
1867
1868 ret_bytes = sizeof(struct ll_user_fiemap);
1869
1870 if (extent_count != 0)
1871 ret_bytes += (fiemap_s->fm_mapped_extents *
1872 sizeof(struct ll_fiemap_extent));
1873
02f9c12e 1874 if (copy_to_user((void __user *)arg, fiemap_s, ret_bytes))
d7e09d03
PT
1875 rc = -EFAULT;
1876
1877error:
e958f49b 1878 kvfree(fiemap_s);
0a3bdb00 1879 return rc;
d7e09d03
PT
1880}
1881
1882/*
1883 * Read the data_version for inode.
1884 *
1885 * This value is computed using stripe object version on OST.
1886 * Version is computed using server side locking.
1887 *
e1798006
JX
1888 * @param sync if do sync on the OST side;
1889 * 0: no sync
1890 * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
1891 * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
d7e09d03 1892 */
e1798006 1893int ll_data_version(struct inode *inode, __u64 *data_version, int flags)
d7e09d03
PT
1894{
1895 struct lov_stripe_md *lsm = NULL;
1896 struct ll_sb_info *sbi = ll_i2sbi(inode);
1897 struct obdo *obdo = NULL;
1898 int rc;
d7e09d03
PT
1899
1900 /* If no stripe, we consider version is 0. */
1901 lsm = ccc_inode_lsm_get(inode);
5dd16419 1902 if (!lsm_has_objects(lsm)) {
d7e09d03
PT
1903 *data_version = 0;
1904 CDEBUG(D_INODE, "No object for inode\n");
34e1f2bb
JL
1905 rc = 0;
1906 goto out;
d7e09d03
PT
1907 }
1908
496a51bd
JL
1909 obdo = kzalloc(sizeof(*obdo), GFP_NOFS);
1910 if (!obdo) {
34e1f2bb
JL
1911 rc = -ENOMEM;
1912 goto out;
1913 }
d7e09d03 1914
e1798006 1915 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, obdo, 0, flags);
5dd16419 1916 if (rc == 0) {
d7e09d03
PT
1917 if (!(obdo->o_valid & OBD_MD_FLDATAVERSION))
1918 rc = -EOPNOTSUPP;
1919 else
1920 *data_version = obdo->o_data_version;
1921 }
1922
97903a26 1923 kfree(obdo);
5dd16419 1924out:
d7e09d03 1925 ccc_inode_lsm_put(inode, lsm);
0a3bdb00 1926 return rc;
d7e09d03
PT
1927}
1928
48d23e61
JX
1929/*
1930 * Trigger a HSM release request for the provided inode.
1931 */
1932int ll_hsm_release(struct inode *inode)
1933{
1934 struct cl_env_nest nest;
1935 struct lu_env *env;
1936 struct obd_client_handle *och = NULL;
1937 __u64 data_version = 0;
1938 int rc;
1939
48d23e61
JX
1940 CDEBUG(D_INODE, "%s: Releasing file "DFID".\n",
1941 ll_get_fsname(inode->i_sb, NULL, 0),
1942 PFID(&ll_i2info(inode)->lli_fid));
1943
1944 och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE);
34e1f2bb
JL
1945 if (IS_ERR(och)) {
1946 rc = PTR_ERR(och);
1947 goto out;
1948 }
48d23e61
JX
1949
1950 /* Grab latest data_version and [am]time values */
e1798006 1951 rc = ll_data_version(inode, &data_version, LL_DV_WR_FLUSH);
48d23e61 1952 if (rc != 0)
34e1f2bb 1953 goto out;
48d23e61
JX
1954
1955 env = cl_env_nested_get(&nest);
34e1f2bb
JL
1956 if (IS_ERR(env)) {
1957 rc = PTR_ERR(env);
1958 goto out;
1959 }
48d23e61 1960
d2995737 1961 ll_merge_attr(env, inode);
48d23e61
JX
1962 cl_env_nested_put(&nest, env);
1963
1964 /* Release the file.
1965 * NB: lease lock handle is released in mdc_hsm_release_pack() because
c0894c6c
OD
1966 * we still need it to pack l_remote_handle to MDT.
1967 */
48d23e61
JX
1968 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
1969 &data_version);
1970 och = NULL;
1971
48d23e61 1972out:
6e16818b 1973 if (och && !IS_ERR(och)) /* close the file */
48d23e61
JX
1974 ll_lease_close(och, inode, NULL);
1975
1976 return rc;
1977}
1978
d7e09d03
PT
1979struct ll_swap_stack {
1980 struct iattr ia1, ia2;
1981 __u64 dv1, dv2;
1982 struct inode *inode1, *inode2;
1983 bool check_dv1, check_dv2;
1984};
1985
1986static int ll_swap_layouts(struct file *file1, struct file *file2,
1987 struct lustre_swap_layouts *lsl)
1988{
1989 struct mdc_swap_layouts msl;
1990 struct md_op_data *op_data;
1991 __u32 gid;
1992 __u64 dv;
1993 struct ll_swap_stack *llss = NULL;
1994 int rc;
1995
496a51bd
JL
1996 llss = kzalloc(sizeof(*llss), GFP_NOFS);
1997 if (!llss)
0a3bdb00 1998 return -ENOMEM;
d7e09d03 1999
2a8a3597
AV
2000 llss->inode1 = file_inode(file1);
2001 llss->inode2 = file_inode(file2);
d7e09d03 2002
34e1f2bb
JL
2003 if (!S_ISREG(llss->inode2->i_mode)) {
2004 rc = -EINVAL;
2005 goto free;
2006 }
d7e09d03 2007
9c5fb72c 2008 if (inode_permission(llss->inode1, MAY_WRITE) ||
34e1f2bb
JL
2009 inode_permission(llss->inode2, MAY_WRITE)) {
2010 rc = -EPERM;
2011 goto free;
2012 }
d7e09d03 2013
34e1f2bb
JL
2014 if (llss->inode2->i_sb != llss->inode1->i_sb) {
2015 rc = -EXDEV;
2016 goto free;
2017 }
d7e09d03
PT
2018
2019 /* we use 2 bool because it is easier to swap than 2 bits */
2020 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
2021 llss->check_dv1 = true;
2022
2023 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
2024 llss->check_dv2 = true;
2025
2026 /* we cannot use lsl->sl_dvX directly because we may swap them */
2027 llss->dv1 = lsl->sl_dv1;
2028 llss->dv2 = lsl->sl_dv2;
2029
2030 rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
34e1f2bb
JL
2031 if (rc == 0) /* same file, done! */ {
2032 rc = 0;
2033 goto free;
2034 }
d7e09d03
PT
2035
2036 if (rc < 0) { /* sequentialize it */
2037 swap(llss->inode1, llss->inode2);
2038 swap(file1, file2);
2039 swap(llss->dv1, llss->dv2);
2040 swap(llss->check_dv1, llss->check_dv2);
2041 }
2042
2043 gid = lsl->sl_gid;
2044 if (gid != 0) { /* application asks to flush dirty cache */
2045 rc = ll_get_grouplock(llss->inode1, file1, gid);
2046 if (rc < 0)
34e1f2bb 2047 goto free;
d7e09d03
PT
2048
2049 rc = ll_get_grouplock(llss->inode2, file2, gid);
2050 if (rc < 0) {
2051 ll_put_grouplock(llss->inode1, file1, gid);
34e1f2bb 2052 goto free;
d7e09d03
PT
2053 }
2054 }
2055
2056 /* to be able to restore mtime and atime after swap
c0894c6c
OD
2057 * we need to first save them
2058 */
d7e09d03
PT
2059 if (lsl->sl_flags &
2060 (SWAP_LAYOUTS_KEEP_MTIME | SWAP_LAYOUTS_KEEP_ATIME)) {
2061 llss->ia1.ia_mtime = llss->inode1->i_mtime;
2062 llss->ia1.ia_atime = llss->inode1->i_atime;
2063 llss->ia1.ia_valid = ATTR_MTIME | ATTR_ATIME;
2064 llss->ia2.ia_mtime = llss->inode2->i_mtime;
2065 llss->ia2.ia_atime = llss->inode2->i_atime;
2066 llss->ia2.ia_valid = ATTR_MTIME | ATTR_ATIME;
2067 }
2068
d0a0acc3 2069 /* ultimate check, before swapping the layouts we check if
c0894c6c
OD
2070 * dataversion has changed (if requested)
2071 */
d7e09d03
PT
2072 if (llss->check_dv1) {
2073 rc = ll_data_version(llss->inode1, &dv, 0);
2074 if (rc)
34e1f2bb
JL
2075 goto putgl;
2076 if (dv != llss->dv1) {
2077 rc = -EAGAIN;
2078 goto putgl;
2079 }
d7e09d03
PT
2080 }
2081
2082 if (llss->check_dv2) {
2083 rc = ll_data_version(llss->inode2, &dv, 0);
2084 if (rc)
34e1f2bb
JL
2085 goto putgl;
2086 if (dv != llss->dv2) {
2087 rc = -EAGAIN;
2088 goto putgl;
2089 }
d7e09d03
PT
2090 }
2091
2092 /* struct md_op_data is used to send the swap args to the mdt
2093 * only flags is missing, so we use struct mdc_swap_layouts
c0894c6c
OD
2094 * through the md_op_data->op_data
2095 */
d7e09d03 2096 /* flags from user space have to be converted before they are send to
c0894c6c
OD
2097 * server, no flag is sent today, they are only used on the client
2098 */
d7e09d03
PT
2099 msl.msl_flags = 0;
2100 rc = -ENOMEM;
2101 op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
2102 0, LUSTRE_OPC_ANY, &msl);
34e1f2bb
JL
2103 if (IS_ERR(op_data)) {
2104 rc = PTR_ERR(op_data);
2105 goto free;
2106 }
79a8726a
JH
2107
2108 rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1),
2109 sizeof(*op_data), op_data, NULL);
2110 ll_finish_md_op_data(op_data);
d7e09d03
PT
2111
2112putgl:
2113 if (gid != 0) {
2114 ll_put_grouplock(llss->inode2, file2, gid);
2115 ll_put_grouplock(llss->inode1, file1, gid);
2116 }
2117
2118 /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */
2119 if (rc != 0)
34e1f2bb 2120 goto free;
d7e09d03
PT
2121
2122 /* clear useless flags */
2123 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_MTIME)) {
2124 llss->ia1.ia_valid &= ~ATTR_MTIME;
2125 llss->ia2.ia_valid &= ~ATTR_MTIME;
2126 }
2127
2128 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_ATIME)) {
2129 llss->ia1.ia_valid &= ~ATTR_ATIME;
2130 llss->ia2.ia_valid &= ~ATTR_ATIME;
2131 }
2132
2133 /* update time if requested */
2134 rc = 0;
2135 if (llss->ia2.ia_valid != 0) {
5955102c 2136 inode_lock(llss->inode1);
b583043e 2137 rc = ll_setattr(file1->f_path.dentry, &llss->ia2);
5955102c 2138 inode_unlock(llss->inode1);
d7e09d03
PT
2139 }
2140
2141 if (llss->ia1.ia_valid != 0) {
2142 int rc1;
2143
5955102c 2144 inode_lock(llss->inode2);
b583043e 2145 rc1 = ll_setattr(file2->f_path.dentry, &llss->ia1);
5955102c 2146 inode_unlock(llss->inode2);
d7e09d03
PT
2147 if (rc == 0)
2148 rc = rc1;
2149 }
2150
2151free:
e6b9a3b2 2152 kfree(llss);
d7e09d03 2153
0a3bdb00 2154 return rc;
d7e09d03
PT
2155}
2156
a720b790
JL
2157static int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
2158{
2159 struct md_op_data *op_data;
2160 int rc;
2161
2742c75e
BF
2162 /* Detect out-of range masks */
2163 if ((hss->hss_setmask | hss->hss_clearmask) & ~HSM_FLAGS_MASK)
2164 return -EINVAL;
2165
a720b790 2166 /* Non-root users are forbidden to set or clear flags which are
c0894c6c
OD
2167 * NOT defined in HSM_USER_MASK.
2168 */
a720b790 2169 if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) &&
2eb90a75 2170 !capable(CFS_CAP_SYS_ADMIN))
a720b790
JL
2171 return -EPERM;
2172
2742c75e
BF
2173 /* Detect out-of range archive id */
2174 if ((hss->hss_valid & HSS_ARCHIVE_ID) &&
2175 (hss->hss_archive_id > LL_HSM_MAX_ARCHIVE))
2176 return -EINVAL;
2177
a720b790
JL
2178 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2179 LUSTRE_OPC_ANY, hss);
2180 if (IS_ERR(op_data))
2181 return PTR_ERR(op_data);
2182
2183 rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode),
2184 sizeof(*op_data), op_data, NULL);
2185
2186 ll_finish_md_op_data(op_data);
2187
2188 return rc;
2189}
2190
2191static int ll_hsm_import(struct inode *inode, struct file *file,
2192 struct hsm_user_import *hui)
2193{
2194 struct hsm_state_set *hss = NULL;
2195 struct iattr *attr = NULL;
2196 int rc;
2197
a720b790
JL
2198 if (!S_ISREG(inode->i_mode))
2199 return -EINVAL;
2200
2201 /* set HSM flags */
496a51bd 2202 hss = kzalloc(sizeof(*hss), GFP_NOFS);
e6b9a3b2
JL
2203 if (!hss)
2204 return -ENOMEM;
a720b790
JL
2205
2206 hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID;
2207 hss->hss_archive_id = hui->hui_archive_id;
2208 hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED;
2209 rc = ll_hsm_state_set(inode, hss);
2210 if (rc != 0)
e6b9a3b2 2211 goto free_hss;
a720b790 2212
496a51bd
JL
2213 attr = kzalloc(sizeof(*attr), GFP_NOFS);
2214 if (!attr) {
34e1f2bb 2215 rc = -ENOMEM;
e6b9a3b2 2216 goto free_hss;
34e1f2bb 2217 }
a720b790
JL
2218
2219 attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
2220 attr->ia_mode |= S_IFREG;
2221 attr->ia_uid = make_kuid(&init_user_ns, hui->hui_uid);
2222 attr->ia_gid = make_kgid(&init_user_ns, hui->hui_gid);
2223 attr->ia_size = hui->hui_size;
2224 attr->ia_mtime.tv_sec = hui->hui_mtime;
2225 attr->ia_mtime.tv_nsec = hui->hui_mtime_ns;
2226 attr->ia_atime.tv_sec = hui->hui_atime;
2227 attr->ia_atime.tv_nsec = hui->hui_atime_ns;
2228
2229 attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE |
2230 ATTR_UID | ATTR_GID |
2231 ATTR_MTIME | ATTR_MTIME_SET |
2232 ATTR_ATIME | ATTR_ATIME_SET;
2233
5955102c 2234 inode_lock(inode);
b6ee56fe 2235
b583043e 2236 rc = ll_setattr_raw(file->f_path.dentry, attr, true);
a720b790
JL
2237 if (rc == -ENODATA)
2238 rc = 0;
2239
5955102c 2240 inode_unlock(inode);
b6ee56fe 2241
e6b9a3b2
JL
2242 kfree(attr);
2243free_hss:
2244 kfree(hss);
a720b790
JL
2245 return rc;
2246}
2247
2d95f10e
JH
2248static long
2249ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
d7e09d03 2250{
2a8a3597 2251 struct inode *inode = file_inode(file);
d7e09d03
PT
2252 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2253 int flags, rc;
d7e09d03 2254
97a075cd
JN
2255 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),cmd=%x\n",
2256 PFID(ll_inode2fid(inode)), inode, cmd);
d7e09d03
PT
2257 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
2258
2259 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2260 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
0a3bdb00 2261 return -ENOTTY;
d7e09d03 2262
a58a38ac 2263 switch (cmd) {
d7e09d03
PT
2264 case LL_IOC_GETFLAGS:
2265 /* Get the current value of the file flags */
02f9c12e 2266 return put_user(fd->fd_flags, (int __user *)arg);
d7e09d03
PT
2267 case LL_IOC_SETFLAGS:
2268 case LL_IOC_CLRFLAGS:
2269 /* Set or clear specific file flags */
2270 /* XXX This probably needs checks to ensure the flags are
2271 * not abused, and to handle any flag side effects.
2272 */
02f9c12e 2273 if (get_user(flags, (int __user *)arg))
0a3bdb00 2274 return -EFAULT;
d7e09d03
PT
2275
2276 if (cmd == LL_IOC_SETFLAGS) {
2277 if ((flags & LL_FILE_IGNORE_LOCK) &&
2278 !(file->f_flags & O_DIRECT)) {
2d00bd17
JP
2279 CERROR("%s: unable to disable locking on non-O_DIRECT file\n",
2280 current->comm);
0a3bdb00 2281 return -EINVAL;
d7e09d03
PT
2282 }
2283
2284 fd->fd_flags |= flags;
2285 } else {
2286 fd->fd_flags &= ~flags;
2287 }
0a3bdb00 2288 return 0;
d7e09d03 2289 case LL_IOC_LOV_SETSTRIPE:
0a3bdb00 2290 return ll_lov_setstripe(inode, file, arg);
d7e09d03 2291 case LL_IOC_LOV_SETEA:
0a3bdb00 2292 return ll_lov_setea(inode, file, arg);
d7e09d03
PT
2293 case LL_IOC_LOV_SWAP_LAYOUTS: {
2294 struct file *file2;
2295 struct lustre_swap_layouts lsl;
2296
02f9c12e
OD
2297 if (copy_from_user(&lsl, (char __user *)arg,
2298 sizeof(struct lustre_swap_layouts)))
0a3bdb00 2299 return -EFAULT;
d7e09d03
PT
2300
2301 if ((file->f_flags & O_ACCMODE) == 0) /* O_RDONLY */
0a3bdb00 2302 return -EPERM;
d7e09d03
PT
2303
2304 file2 = fget(lsl.sl_fd);
6e16818b 2305 if (!file2)
0a3bdb00 2306 return -EBADF;
d7e09d03
PT
2307
2308 rc = -EPERM;
2309 if ((file2->f_flags & O_ACCMODE) != 0) /* O_WRONLY or O_RDWR */
2310 rc = ll_swap_layouts(file, file2, &lsl);
2311 fput(file2);
0a3bdb00 2312 return rc;
d7e09d03
PT
2313 }
2314 case LL_IOC_LOV_GETSTRIPE:
0a3bdb00 2315 return ll_lov_getstripe(inode, arg);
d7e09d03 2316 case LL_IOC_RECREATE_OBJ:
0a3bdb00 2317 return ll_lov_recreate_obj(inode, arg);
d7e09d03 2318 case LL_IOC_RECREATE_FID:
0a3bdb00 2319 return ll_lov_recreate_fid(inode, arg);
d7e09d03 2320 case FSFILT_IOC_FIEMAP:
0a3bdb00 2321 return ll_ioctl_fiemap(inode, arg);
d7e09d03
PT
2322 case FSFILT_IOC_GETFLAGS:
2323 case FSFILT_IOC_SETFLAGS:
0a3bdb00 2324 return ll_iocontrol(inode, file, cmd, arg);
d7e09d03
PT
2325 case FSFILT_IOC_GETVERSION_OLD:
2326 case FSFILT_IOC_GETVERSION:
02f9c12e 2327 return put_user(inode->i_generation, (int __user *)arg);
d7e09d03 2328 case LL_IOC_GROUP_LOCK:
0a3bdb00 2329 return ll_get_grouplock(inode, file, arg);
d7e09d03 2330 case LL_IOC_GROUP_UNLOCK:
0a3bdb00 2331 return ll_put_grouplock(inode, file, arg);
d7e09d03 2332 case IOC_OBD_STATFS:
4c6243ec 2333 return ll_obd_statfs(inode, (void __user *)arg);
d7e09d03
PT
2334
2335 /* We need to special case any other ioctls we want to handle,
2336 * to send them to the MDS/OST as appropriate and to properly
2337 * network encode the arg field.
2338 case FSFILT_IOC_SETVERSION_OLD:
2339 case FSFILT_IOC_SETVERSION:
2340 */
2341 case LL_IOC_FLUSHCTX:
0a3bdb00 2342 return ll_flush_ctx(inode);
d7e09d03 2343 case LL_IOC_PATH2FID: {
02f9c12e 2344 if (copy_to_user((void __user *)arg, ll_inode2fid(inode),
d7e09d03 2345 sizeof(struct lu_fid)))
0a3bdb00 2346 return -EFAULT;
d7e09d03 2347
0a3bdb00 2348 return 0;
d7e09d03
PT
2349 }
2350 case OBD_IOC_FID2PATH:
61dad0ba 2351 return ll_fid2path(inode, (void __user *)arg);
d7e09d03
PT
2352 case LL_IOC_DATA_VERSION: {
2353 struct ioc_data_version idv;
2354 int rc;
2355
02f9c12e 2356 if (copy_from_user(&idv, (char __user *)arg, sizeof(idv)))
0a3bdb00 2357 return -EFAULT;
d7e09d03 2358
e1798006
JX
2359 idv.idv_flags &= LL_DV_RD_FLUSH | LL_DV_WR_FLUSH;
2360 rc = ll_data_version(inode, &idv.idv_version, idv.idv_flags);
02f9c12e
OD
2361 if (rc == 0 && copy_to_user((char __user *)arg, &idv,
2362 sizeof(idv)))
0a3bdb00 2363 return -EFAULT;
d7e09d03 2364
0a3bdb00 2365 return rc;
d7e09d03
PT
2366 }
2367
2368 case LL_IOC_GET_MDTIDX: {
2369 int mdtidx;
2370
2371 mdtidx = ll_get_mdt_idx(inode);
2372 if (mdtidx < 0)
0a3bdb00 2373 return mdtidx;
d7e09d03 2374
02f9c12e 2375 if (put_user(mdtidx, (int __user *)arg))
0a3bdb00 2376 return -EFAULT;
d7e09d03 2377
0a3bdb00 2378 return 0;
d7e09d03
PT
2379 }
2380 case OBD_IOC_GETDTNAME:
2381 case OBD_IOC_GETMDNAME:
0a3bdb00 2382 return ll_get_obd_name(inode, cmd, arg);
d7e09d03
PT
2383 case LL_IOC_HSM_STATE_GET: {
2384 struct md_op_data *op_data;
2385 struct hsm_user_state *hus;
2386 int rc;
2387
496a51bd
JL
2388 hus = kzalloc(sizeof(*hus), GFP_NOFS);
2389 if (!hus)
0a3bdb00 2390 return -ENOMEM;
d7e09d03
PT
2391
2392 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2393 LUSTRE_OPC_ANY, hus);
79a8726a 2394 if (IS_ERR(op_data)) {
97903a26 2395 kfree(hus);
0a3bdb00 2396 return PTR_ERR(op_data);
d7e09d03
PT
2397 }
2398
2399 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2400 op_data, NULL);
2401
02f9c12e 2402 if (copy_to_user((void __user *)arg, hus, sizeof(*hus)))
d7e09d03
PT
2403 rc = -EFAULT;
2404
2405 ll_finish_md_op_data(op_data);
97903a26 2406 kfree(hus);
0a3bdb00 2407 return rc;
d7e09d03
PT
2408 }
2409 case LL_IOC_HSM_STATE_SET: {
d7e09d03
PT
2410 struct hsm_state_set *hss;
2411 int rc;
2412
02f9c12e 2413 hss = memdup_user((char __user *)arg, sizeof(*hss));
0c027bc3
AH
2414 if (IS_ERR(hss))
2415 return PTR_ERR(hss);
d7e09d03 2416
a720b790 2417 rc = ll_hsm_state_set(inode, hss);
d7e09d03 2418
97903a26 2419 kfree(hss);
0a3bdb00 2420 return rc;
d7e09d03
PT
2421 }
2422 case LL_IOC_HSM_ACTION: {
2423 struct md_op_data *op_data;
2424 struct hsm_current_action *hca;
2425 int rc;
2426
496a51bd
JL
2427 hca = kzalloc(sizeof(*hca), GFP_NOFS);
2428 if (!hca)
0a3bdb00 2429 return -ENOMEM;
d7e09d03
PT
2430
2431 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2432 LUSTRE_OPC_ANY, hca);
79a8726a 2433 if (IS_ERR(op_data)) {
97903a26 2434 kfree(hca);
0a3bdb00 2435 return PTR_ERR(op_data);
d7e09d03
PT
2436 }
2437
2438 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2439 op_data, NULL);
2440
02f9c12e 2441 if (copy_to_user((char __user *)arg, hca, sizeof(*hca)))
d7e09d03
PT
2442 rc = -EFAULT;
2443
2444 ll_finish_md_op_data(op_data);
97903a26 2445 kfree(hca);
0a3bdb00 2446 return rc;
d7e09d03 2447 }
d3a8a4e2
JX
2448 case LL_IOC_SET_LEASE: {
2449 struct ll_inode_info *lli = ll_i2info(inode);
2450 struct obd_client_handle *och = NULL;
2451 bool lease_broken;
2452 fmode_t mode = 0;
2453
2454 switch (arg) {
2455 case F_WRLCK:
2456 if (!(file->f_mode & FMODE_WRITE))
2457 return -EPERM;
2458 mode = FMODE_WRITE;
2459 break;
2460 case F_RDLCK:
2461 if (!(file->f_mode & FMODE_READ))
2462 return -EPERM;
2463 mode = FMODE_READ;
2464 break;
2465 case F_UNLCK:
2466 mutex_lock(&lli->lli_och_mutex);
6e16818b 2467 if (fd->fd_lease_och) {
d3a8a4e2
JX
2468 och = fd->fd_lease_och;
2469 fd->fd_lease_och = NULL;
2470 }
2471 mutex_unlock(&lli->lli_och_mutex);
2472
6e16818b 2473 if (och) {
d3a8a4e2 2474 mode = och->och_flags &
cd94f231 2475 (FMODE_READ | FMODE_WRITE);
d3a8a4e2
JX
2476 rc = ll_lease_close(och, inode, &lease_broken);
2477 if (rc == 0 && lease_broken)
2478 mode = 0;
2479 } else {
2480 rc = -ENOLCK;
2481 }
2482
2483 /* return the type of lease or error */
2484 return rc < 0 ? rc : (int)mode;
2485 default:
2486 return -EINVAL;
2487 }
2488
2489 CDEBUG(D_INODE, "Set lease with mode %d\n", mode);
2490
2491 /* apply for lease */
48d23e61 2492 och = ll_lease_open(inode, file, mode, 0);
d3a8a4e2
JX
2493 if (IS_ERR(och))
2494 return PTR_ERR(och);
2495
2496 rc = 0;
2497 mutex_lock(&lli->lli_och_mutex);
6e16818b 2498 if (!fd->fd_lease_och) {
d3a8a4e2
JX
2499 fd->fd_lease_och = och;
2500 och = NULL;
2501 }
2502 mutex_unlock(&lli->lli_och_mutex);
6e16818b 2503 if (och) {
d3a8a4e2
JX
2504 /* impossible now that only excl is supported for now */
2505 ll_lease_close(och, inode, &lease_broken);
2506 rc = -EBUSY;
2507 }
2508 return rc;
2509 }
2510 case LL_IOC_GET_LEASE: {
2511 struct ll_inode_info *lli = ll_i2info(inode);
2512 struct ldlm_lock *lock = NULL;
2513
2514 rc = 0;
2515 mutex_lock(&lli->lli_och_mutex);
6e16818b 2516 if (fd->fd_lease_och) {
d3a8a4e2
JX
2517 struct obd_client_handle *och = fd->fd_lease_och;
2518
2519 lock = ldlm_handle2lock(&och->och_lease_handle);
6e16818b 2520 if (lock) {
d3a8a4e2
JX
2521 lock_res_and_lock(lock);
2522 if (!ldlm_is_cancel(lock))
2523 rc = och->och_flags &
2524 (FMODE_READ | FMODE_WRITE);
2525 unlock_res_and_lock(lock);
ead02808 2526 LDLM_LOCK_PUT(lock);
d3a8a4e2
JX
2527 }
2528 }
2529 mutex_unlock(&lli->lli_och_mutex);
a720b790
JL
2530 return rc;
2531 }
2532 case LL_IOC_HSM_IMPORT: {
2533 struct hsm_user_import *hui;
2534
02f9c12e 2535 hui = memdup_user((void __user *)arg, sizeof(*hui));
0c027bc3
AH
2536 if (IS_ERR(hui))
2537 return PTR_ERR(hui);
a720b790
JL
2538
2539 rc = ll_hsm_import(inode, file, hui);
d3a8a4e2 2540
97903a26 2541 kfree(hui);
d3a8a4e2
JX
2542 return rc;
2543 }
d7e09d03
PT
2544 default: {
2545 int err;
2546
1f6eaf83
JL
2547 if (ll_iocontrol_call(inode, file, cmd, arg, &err) ==
2548 LLIOC_STOP)
0a3bdb00 2549 return err;
d7e09d03 2550
0a3bdb00 2551 return obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
e09bee34 2552 (void __user *)arg);
d7e09d03
PT
2553 }
2554 }
2555}
2556
2d95f10e 2557static loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
d7e09d03 2558{
2a8a3597 2559 struct inode *inode = file_inode(file);
d7e09d03
PT
2560 loff_t retval, eof = 0;
2561
d7e09d03
PT
2562 retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
2563 (origin == SEEK_CUR) ? file->f_pos : 0);
97a075cd
JN
2564 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), to=%llu=%#llx(%d)\n",
2565 PFID(ll_inode2fid(inode)), inode, retval, retval, origin);
d7e09d03
PT
2566 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
2567
2568 if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
2569 retval = ll_glimpse_size(inode);
2570 if (retval != 0)
0a3bdb00 2571 return retval;
d7e09d03
PT
2572 eof = i_size_read(inode);
2573 }
2574
6f014339 2575 retval = generic_file_llseek_size(file, offset, origin,
d7e09d03 2576 ll_file_maxbytes(inode), eof);
0a3bdb00 2577 return retval;
d7e09d03
PT
2578}
2579
2d95f10e 2580static int ll_flush(struct file *file, fl_owner_t id)
d7e09d03 2581{
2a8a3597 2582 struct inode *inode = file_inode(file);
d7e09d03
PT
2583 struct ll_inode_info *lli = ll_i2info(inode);
2584 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2585 int rc, err;
2586
2587 LASSERT(!S_ISDIR(inode->i_mode));
2588
2589 /* catch async errors that were recorded back when async writeback
c0894c6c
OD
2590 * failed for pages in this mapping.
2591 */
d7e09d03
PT
2592 rc = lli->lli_async_rc;
2593 lli->lli_async_rc = 0;
79496845 2594 if (lli->lli_clob) {
2595 err = lov_read_and_clear_async_rc(lli->lli_clob);
2596 if (!rc)
2597 rc = err;
2598 }
d7e09d03 2599
c0894c6c
OD
2600 /* The application has been told about write failure already.
2601 * Do not report failure again.
2602 */
d7e09d03
PT
2603 if (fd->fd_write_failed)
2604 return 0;
2605 return rc ? -EIO : 0;
2606}
2607
2608/**
2609 * Called to make sure a portion of file has been written out.
05289927 2610 * if @mode is not CL_FSYNC_LOCAL, it will send OST_SYNC RPCs to OST.
d7e09d03
PT
2611 *
2612 * Return how many pages have been written.
2613 */
2614int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
65fb55d1 2615 enum cl_fsync_mode mode, int ignore_layout)
d7e09d03
PT
2616{
2617 struct cl_env_nest nest;
2618 struct lu_env *env;
2619 struct cl_io *io;
d7e09d03
PT
2620 struct cl_fsync_io *fio;
2621 int result;
d7e09d03
PT
2622
2623 if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
2624 mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
0a3bdb00 2625 return -EINVAL;
d7e09d03
PT
2626
2627 env = cl_env_nested_get(&nest);
2628 if (IS_ERR(env))
0a3bdb00 2629 return PTR_ERR(env);
d7e09d03 2630
9acc4500 2631 io = vvp_env_thread_io(env);
1929c433 2632 io->ci_obj = ll_i2info(inode)->lli_clob;
65fb55d1 2633 io->ci_ignore_layout = ignore_layout;
d7e09d03
PT
2634
2635 /* initialize parameters for sync */
2636 fio = &io->u.ci_fsync;
d7e09d03
PT
2637 fio->fi_start = start;
2638 fio->fi_end = end;
2639 fio->fi_fid = ll_inode2fid(inode);
2640 fio->fi_mode = mode;
2641 fio->fi_nr_written = 0;
2642
2643 if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
2644 result = cl_io_loop(env, io);
2645 else
2646 result = io->ci_result;
2647 if (result == 0)
2648 result = fio->fi_nr_written;
2649 cl_io_fini(env, io);
2650 cl_env_nested_put(&nest, env);
2651
0a3bdb00 2652 return result;
d7e09d03
PT
2653}
2654
d7e09d03
PT
2655int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2656{
2a8a3597 2657 struct inode *inode = file_inode(file);
d7e09d03
PT
2658 struct ll_inode_info *lli = ll_i2info(inode);
2659 struct ptlrpc_request *req;
d7e09d03 2660 int rc, err;
d7e09d03 2661
97a075cd
JN
2662 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
2663 PFID(ll_inode2fid(inode)), inode);
d7e09d03
PT
2664 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2665
2666 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
5955102c 2667 inode_lock(inode);
d7e09d03
PT
2668
2669 /* catch async errors that were recorded back when async writeback
c0894c6c
OD
2670 * failed for pages in this mapping.
2671 */
d7e09d03
PT
2672 if (!S_ISDIR(inode->i_mode)) {
2673 err = lli->lli_async_rc;
2674 lli->lli_async_rc = 0;
2675 if (rc == 0)
2676 rc = err;
2677 err = lov_read_and_clear_async_rc(lli->lli_clob);
2678 if (rc == 0)
2679 rc = err;
2680 }
2681
ef2e0f55 2682 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), &req);
d7e09d03
PT
2683 if (!rc)
2684 rc = err;
2685 if (!err)
2686 ptlrpc_req_finished(req);
2687
8d97deb9 2688 if (S_ISREG(inode->i_mode)) {
d7e09d03
PT
2689 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2690
05289927 2691 err = cl_sync_file_range(inode, start, end, CL_FSYNC_ALL, 0);
d7e09d03
PT
2692 if (rc == 0 && err < 0)
2693 rc = err;
2694 if (rc < 0)
2695 fd->fd_write_failed = true;
2696 else
2697 fd->fd_write_failed = false;
2698 }
2699
5955102c 2700 inode_unlock(inode);
0a3bdb00 2701 return rc;
d7e09d03
PT
2702}
2703
2d95f10e
JH
2704static int
2705ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
d7e09d03 2706{
2a8a3597 2707 struct inode *inode = file_inode(file);
d7e09d03 2708 struct ll_sb_info *sbi = ll_i2sbi(inode);
f2145eae
BK
2709 struct ldlm_enqueue_info einfo = {
2710 .ei_type = LDLM_FLOCK,
2711 .ei_cb_cp = ldlm_flock_completion_ast,
2712 .ei_cbdata = file_lock,
2713 };
d7e09d03
PT
2714 struct md_op_data *op_data;
2715 struct lustre_handle lockh = {0};
8369cfff 2716 ldlm_policy_data_t flock = { {0} };
c68c3fa4 2717 int fl_type = file_lock->fl_type;
875332d4 2718 __u64 flags = 0;
d7e09d03
PT
2719 int rc;
2720 int rc2 = 0;
d7e09d03 2721
97a075cd
JN
2722 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID" file_lock=%p\n",
2723 PFID(ll_inode2fid(inode)), file_lock);
d7e09d03
PT
2724
2725 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2726
130d1f95 2727 if (file_lock->fl_flags & FL_FLOCK)
d7e09d03 2728 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
130d1f95 2729 else if (!(file_lock->fl_flags & FL_POSIX))
0a3bdb00 2730 return -EINVAL;
130d1f95
JL
2731
2732 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
d7e09d03 2733 flock.l_flock.pid = file_lock->fl_pid;
130d1f95
JL
2734 flock.l_flock.start = file_lock->fl_start;
2735 flock.l_flock.end = file_lock->fl_end;
d7e09d03
PT
2736
2737 /* Somewhat ugly workaround for svc lockd.
2738 * lockd installs custom fl_lmops->lm_compare_owner that checks
2739 * for the fl_owner to be the same (which it always is on local node
2740 * I guess between lockd processes) and then compares pid.
2741 * As such we assign pid to the owner field to make it all work,
2742 * conflict with normal locks is unlikely since pid space and
c0894c6c
OD
2743 * pointer space for current->files are not intersecting
2744 */
d7e09d03
PT
2745 if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
2746 flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
2747
c68c3fa4 2748 switch (fl_type) {
d7e09d03
PT
2749 case F_RDLCK:
2750 einfo.ei_mode = LCK_PR;
2751 break;
2752 case F_UNLCK:
2753 /* An unlock request may or may not have any relation to
2754 * existing locks so we may not be able to pass a lock handle
2755 * via a normal ldlm_lock_cancel() request. The request may even
2756 * unlock a byte range in the middle of an existing lock. In
2757 * order to process an unlock request we need all of the same
2758 * information that is given with a normal read or write record
2759 * lock request. To avoid creating another ldlm unlock (cancel)
c0894c6c
OD
2760 * message we'll treat a LCK_NL flock request as an unlock.
2761 */
d7e09d03
PT
2762 einfo.ei_mode = LCK_NL;
2763 break;
2764 case F_WRLCK:
2765 einfo.ei_mode = LCK_PW;
2766 break;
2767 default:
c68c3fa4 2768 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n", fl_type);
0a3bdb00 2769 return -ENOTSUPP;
d7e09d03
PT
2770 }
2771
2772 switch (cmd) {
2773 case F_SETLKW:
2faedcd5 2774#ifdef F_SETLKW64
d7e09d03 2775 case F_SETLKW64:
2faedcd5 2776#endif
d7e09d03
PT
2777 flags = 0;
2778 break;
2779 case F_SETLK:
2faedcd5 2780#ifdef F_SETLK64
d7e09d03 2781 case F_SETLK64:
2faedcd5 2782#endif
d7e09d03
PT
2783 flags = LDLM_FL_BLOCK_NOWAIT;
2784 break;
2785 case F_GETLK:
2faedcd5 2786#ifdef F_GETLK64
d7e09d03 2787 case F_GETLK64:
2faedcd5 2788#endif
d7e09d03 2789 flags = LDLM_FL_TEST_LOCK;
d7e09d03
PT
2790 break;
2791 default:
2792 CERROR("unknown fcntl lock command: %d\n", cmd);
0a3bdb00 2793 return -EINVAL;
d7e09d03
PT
2794 }
2795
c68c3fa4
VF
2796 /*
2797 * Save the old mode so that if the mode in the lock changes we
2798 * can decrement the appropriate reader or writer refcount.
2799 */
2800 file_lock->fl_type = einfo.ei_mode;
2801
d7e09d03
PT
2802 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2803 LUSTRE_OPC_ANY, NULL);
2804 if (IS_ERR(op_data))
0a3bdb00 2805 return PTR_ERR(op_data);
d7e09d03 2806
97a075cd
JN
2807 CDEBUG(D_DLMTRACE, "inode="DFID", pid=%u, flags=%#llx, mode=%u, start=%llu, end=%llu\n",
2808 PFID(ll_inode2fid(inode)), flock.l_flock.pid, flags,
2809 einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
d7e09d03 2810
70a251f6
JH
2811 rc = md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data, &lockh,
2812 flags);
d7e09d03 2813
c68c3fa4
VF
2814 /* Restore the file lock type if not TEST lock. */
2815 if (!(flags & LDLM_FL_TEST_LOCK))
2816 file_lock->fl_type = fl_type;
d7e09d03 2817
4f656367 2818 if ((rc == 0 || file_lock->fl_type == F_UNLCK) &&
d7e09d03 2819 !(flags & LDLM_FL_TEST_LOCK))
4f656367 2820 rc2 = locks_lock_file_wait(file, file_lock);
d7e09d03
PT
2821
2822 if (rc2 && file_lock->fl_type != F_UNLCK) {
2823 einfo.ei_mode = LCK_NL;
70a251f6
JH
2824 md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data,
2825 &lockh, flags);
d7e09d03
PT
2826 rc = rc2;
2827 }
2828
2829 ll_finish_md_op_data(op_data);
2830
0a3bdb00 2831 return rc;
d7e09d03
PT
2832}
2833
1d82425f 2834int ll_get_fid_by_name(struct inode *parent, const char *name,
2835 int namelen, struct lu_fid *fid)
79496845 2836{
2837 struct md_op_data *op_data = NULL;
2838 struct ptlrpc_request *req;
2839 struct mdt_body *body;
2840 int rc;
2841
2842 op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen, 0,
2843 LUSTRE_OPC_ANY, NULL);
2844 if (IS_ERR(op_data))
2845 return PTR_ERR(op_data);
2846
2847 op_data->op_valid = OBD_MD_FLID;
2848 rc = md_getattr_name(ll_i2sbi(parent)->ll_md_exp, op_data, &req);
1d82425f 2849 ll_finish_md_op_data(op_data);
79496845 2850 if (rc < 0)
1d82425f 2851 return rc;
79496845 2852
2853 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
2854 if (!body) {
2855 rc = -EFAULT;
2856 goto out_req;
2857 }
1d82425f 2858 if (fid)
2e1b5b8b 2859 *fid = body->mbo_fid1;
79496845 2860out_req:
2861 ptlrpc_req_finished(req);
79496845 2862 return rc;
2863}
2864
2865int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
2866 const char *name, int namelen)
2867{
2868 struct ptlrpc_request *request = NULL;
1d82425f 2869 struct inode *child_inode = NULL;
79496845 2870 struct dentry *dchild = NULL;
2871 struct md_op_data *op_data;
2872 struct qstr qstr;
2873 int rc;
2874
1d82425f 2875 CDEBUG(D_VFSTRACE, "migrate %s under "DFID" to MDT%d\n",
79496845 2876 name, PFID(ll_inode2fid(parent)), mdtidx);
2877
2878 op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen,
2879 0, LUSTRE_OPC_ANY, NULL);
2880 if (IS_ERR(op_data))
2881 return PTR_ERR(op_data);
2882
2883 /* Get child FID first */
2884 qstr.hash = full_name_hash(parent, name, namelen);
2885 qstr.name = name;
2886 qstr.len = namelen;
2887 dchild = d_lookup(file_dentry(file), &qstr);
2888 if (dchild && dchild->d_inode) {
2889 op_data->op_fid3 = *ll_inode2fid(dchild->d_inode);
1d82425f 2890 if (dchild->d_inode) {
2891 child_inode = igrab(dchild->d_inode);
2892 ll_invalidate_aliases(child_inode);
2893 }
2894 dput(dchild);
79496845 2895 } else {
1d82425f 2896 rc = ll_get_fid_by_name(parent, name, namelen,
79496845 2897 &op_data->op_fid3);
2898 if (rc)
2899 goto out_free;
2900 }
2901
2902 if (!fid_is_sane(&op_data->op_fid3)) {
2903 CERROR("%s: migrate %s, but fid "DFID" is insane\n",
2904 ll_get_fsname(parent->i_sb, NULL, 0), name,
2905 PFID(&op_data->op_fid3));
1d82425f 2906 rc = -EINVAL;
79496845 2907 goto out_free;
2908 }
2909
2910 rc = ll_get_mdt_idx_by_fid(ll_i2sbi(parent), &op_data->op_fid3);
2911 if (rc < 0)
2912 goto out_free;
2913
2914 if (rc == mdtidx) {
2915 CDEBUG(D_INFO, "%s:"DFID" is already on MDT%d.\n", name,
2916 PFID(&op_data->op_fid3), mdtidx);
2917 rc = 0;
2918 goto out_free;
2919 }
2920
2921 op_data->op_mds = mdtidx;
2922 op_data->op_cli_flags = CLI_MIGRATE;
2923 rc = md_rename(ll_i2sbi(parent)->ll_md_exp, op_data, name,
1d82425f 2924 namelen, name, namelen, &request);
79496845 2925 if (!rc)
2926 ll_update_times(request, parent);
2927
2928 ptlrpc_req_finished(request);
2929
2930out_free:
1d82425f 2931 if (child_inode) {
2932 clear_nlink(child_inode);
2933 iput(child_inode);
79496845 2934 }
2935
2936 ll_finish_md_op_data(op_data);
2937 return rc;
2938}
2939
2d95f10e
JH
2940static int
2941ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
d7e09d03 2942{
0a3bdb00 2943 return -ENOSYS;
d7e09d03
PT
2944}
2945
2946/**
2947 * test if some locks matching bits and l_req_mode are acquired
2948 * - bits can be in different locks
2949 * - if found clear the common lock bits in *bits
2950 * - the bits not found, are kept in *bits
2951 * \param inode [IN]
2952 * \param bits [IN] searched lock bits [IN]
2953 * \param l_req_mode [IN] searched lock mode
2954 * \retval boolean, true iff all bits are found
2955 */
52ee0d20
OD
2956int ll_have_md_lock(struct inode *inode, __u64 *bits,
2957 enum ldlm_mode l_req_mode)
d7e09d03
PT
2958{
2959 struct lustre_handle lockh;
2960 ldlm_policy_data_t policy;
52ee0d20 2961 enum ldlm_mode mode = (l_req_mode == LCK_MINMODE) ?
cd94f231 2962 (LCK_CR | LCK_CW | LCK_PR | LCK_PW) : l_req_mode;
d7e09d03
PT
2963 struct lu_fid *fid;
2964 __u64 flags;
2965 int i;
d7e09d03
PT
2966
2967 if (!inode)
ef075edc 2968 return 0;
d7e09d03
PT
2969
2970 fid = &ll_i2info(inode)->lli_fid;
2971 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
2972 ldlm_lockname[mode]);
2973
2974 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
1253b2e8 2975 for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
d7e09d03
PT
2976 policy.l_inodebits.bits = *bits & (1 << i);
2977 if (policy.l_inodebits.bits == 0)
2978 continue;
2979
2980 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
2981 &policy, mode, &lockh)) {
2982 struct ldlm_lock *lock;
2983
2984 lock = ldlm_handle2lock(&lockh);
2985 if (lock) {
2986 *bits &=
2987 ~(lock->l_policy_data.l_inodebits.bits);
2988 LDLM_LOCK_PUT(lock);
2989 } else {
2990 *bits &= ~policy.l_inodebits.bits;
2991 }
2992 }
2993 }
0a3bdb00 2994 return *bits == 0;
d7e09d03
PT
2995}
2996
52ee0d20
OD
2997enum ldlm_mode ll_take_md_lock(struct inode *inode, __u64 bits,
2998 struct lustre_handle *lockh, __u64 flags,
2999 enum ldlm_mode mode)
d7e09d03 3000{
57303e76 3001 ldlm_policy_data_t policy = { .l_inodebits = {bits} };
d7e09d03 3002 struct lu_fid *fid;
52ee0d20 3003 enum ldlm_mode rc;
d7e09d03
PT
3004
3005 fid = &ll_i2info(inode)->lli_fid;
3006 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
3007
1f6eaf83 3008 rc = md_lock_match(ll_i2mdexp(inode), flags | LDLM_FL_BLOCK_GRANTED,
7fc1f831
AP
3009 fid, LDLM_IBITS, &policy, mode, lockh);
3010
0a3bdb00 3011 return rc;
d7e09d03
PT
3012}
3013
3014static int ll_inode_revalidate_fini(struct inode *inode, int rc)
3015{
3016 /* Already unlinked. Just update nlink and return success */
3017 if (rc == -ENOENT) {
3018 clear_nlink(inode);
3019 /* This path cannot be hit for regular files unless in
bef31c78
MI
3020 * case of obscure races, so no need to validate size.
3021 */
d7e09d03
PT
3022 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
3023 return 0;
3024 } else if (rc != 0) {
e49634bb
AD
3025 CDEBUG_LIMIT((rc == -EACCES || rc == -EIDRM) ? D_INFO : D_ERROR,
3026 "%s: revalidate FID "DFID" error: rc = %d\n",
3027 ll_get_fsname(inode->i_sb, NULL, 0),
3028 PFID(ll_inode2fid(inode)), rc);
d7e09d03
PT
3029 }
3030
3031 return rc;
3032}
3033
2d95f10e 3034static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
d7e09d03 3035{
2b0143b5 3036 struct inode *inode = d_inode(dentry);
d7e09d03
PT
3037 struct ptlrpc_request *req = NULL;
3038 struct obd_export *exp;
3039 int rc = 0;
d7e09d03 3040
97a075cd
JN
3041 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),name=%pd\n",
3042 PFID(ll_inode2fid(inode)), inode, dentry);
d7e09d03
PT
3043
3044 exp = ll_i2mdexp(inode);
3045
3046 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
3047 * But under CMD case, it caused some lock issues, should be fixed
c0894c6c
OD
3048 * with new CMD ibits lock. See bug 12718
3049 */
d7e09d03
PT
3050 if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
3051 struct lookup_intent oit = { .it_op = IT_GETATTR };
3052 struct md_op_data *op_data;
3053
3054 if (ibits == MDS_INODELOCK_LOOKUP)
3055 oit.it_op = IT_LOOKUP;
3056
3057 /* Call getattr by fid, so do not provide name at all. */
dbca51dd
AV
3058 op_data = ll_prep_md_op_data(NULL, inode,
3059 inode, NULL, 0, 0,
d7e09d03
PT
3060 LUSTRE_OPC_ANY, NULL);
3061 if (IS_ERR(op_data))
0a3bdb00 3062 return PTR_ERR(op_data);
d7e09d03 3063
70a251f6
JH
3064 rc = md_intent_lock(exp, op_data, &oit, &req,
3065 &ll_md_blocking_ast, 0);
d7e09d03 3066 ll_finish_md_op_data(op_data);
d7e09d03
PT
3067 if (rc < 0) {
3068 rc = ll_inode_revalidate_fini(inode, rc);
34e1f2bb 3069 goto out;
d7e09d03
PT
3070 }
3071
dbca51dd 3072 rc = ll_revalidate_it_finish(req, &oit, inode);
d7e09d03
PT
3073 if (rc != 0) {
3074 ll_intent_release(&oit);
34e1f2bb 3075 goto out;
d7e09d03
PT
3076 }
3077
3078 /* Unlinked? Unhash dentry, so it is not picked up later by
c0894c6c
OD
3079 * do_lookup() -> ll_revalidate_it(). We cannot use d_drop
3080 * here to preserve get_cwd functionality on 2.6.
3081 * Bug 10503
3082 */
c9cc8d0f
BF
3083 if (!d_inode(dentry)->i_nlink) {
3084 spin_lock(&inode->i_lock);
b1d2a127 3085 d_lustre_invalidate(dentry, 0);
c9cc8d0f
BF
3086 spin_unlock(&inode->i_lock);
3087 }
d7e09d03 3088
dbca51dd 3089 ll_lookup_finish_locks(&oit, inode);
2b0143b5
DH
3090 } else if (!ll_have_md_lock(d_inode(dentry), &ibits, LCK_MINMODE)) {
3091 struct ll_sb_info *sbi = ll_i2sbi(d_inode(dentry));
21aef7d9 3092 u64 valid = OBD_MD_FLGETATTR;
d7e09d03
PT
3093 struct md_op_data *op_data;
3094 int ealen = 0;
3095
3096 if (S_ISREG(inode->i_mode)) {
44779340 3097 rc = ll_get_default_mdsize(sbi, &ealen);
d7e09d03 3098 if (rc)
0a3bdb00 3099 return rc;
d7e09d03
PT
3100 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
3101 }
3102
3103 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
3104 0, ealen, LUSTRE_OPC_ANY,
3105 NULL);
3106 if (IS_ERR(op_data))
0a3bdb00 3107 return PTR_ERR(op_data);
d7e09d03
PT
3108
3109 op_data->op_valid = valid;
d7e09d03
PT
3110 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
3111 ll_finish_md_op_data(op_data);
3112 if (rc) {
3113 rc = ll_inode_revalidate_fini(inode, rc);
0a3bdb00 3114 return rc;
d7e09d03
PT
3115 }
3116
3117 rc = ll_prep_inode(&inode, req, NULL, NULL);
3118 }
3119out:
3120 ptlrpc_req_finished(req);
3121 return rc;
3122}
3123
2de35386 3124static int ll_merge_md_attr(struct inode *inode)
3125{
3126 struct cl_attr attr = { 0 };
3127 int rc;
3128
3129 LASSERT(ll_i2info(inode)->lli_lsm_md);
3130 rc = md_merge_attr(ll_i2mdexp(inode), ll_i2info(inode)->lli_lsm_md,
3131 &attr);
3132 if (rc)
3133 return rc;
3134
3135 ll_i2info(inode)->lli_stripe_dir_size = attr.cat_size;
3136 ll_i2info(inode)->lli_stripe_dir_nlink = attr.cat_nlink;
3137
3138 ll_i2info(inode)->lli_atime = attr.cat_atime;
3139 ll_i2info(inode)->lli_mtime = attr.cat_mtime;
3140 ll_i2info(inode)->lli_ctime = attr.cat_ctime;
3141
3142 return 0;
3143}
3144
2d95f10e 3145static int ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
d7e09d03 3146{
2b0143b5 3147 struct inode *inode = d_inode(dentry);
d7e09d03 3148 int rc;
d7e09d03 3149
2d95f10e 3150 rc = __ll_inode_revalidate(dentry, ibits);
d7e09d03 3151 if (rc != 0)
0a3bdb00 3152 return rc;
d7e09d03
PT
3153
3154 /* if object isn't regular file, don't validate size */
3155 if (!S_ISREG(inode->i_mode)) {
2de35386 3156 if (S_ISDIR(inode->i_mode) &&
3157 ll_i2info(inode)->lli_lsm_md) {
3158 rc = ll_merge_md_attr(inode);
3159 if (rc)
3160 return rc;
3161 }
3162
d2995737
JH
3163 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_atime;
3164 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_mtime;
3165 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_ctime;
d7e09d03 3166 } else {
5ea17d6c
JL
3167 /* In case of restore, the MDT has the right size and has
3168 * already send it back without granting the layout lock,
3169 * inode is up-to-date so glimpse is useless.
3170 * Also to glimpse we need the layout, in case of a running
3171 * restore the MDT holds the layout lock so the glimpse will
3172 * block up to the end of restore (getattr will block)
3173 */
3174 if (!(ll_i2info(inode)->lli_flags & LLIF_FILE_RESTORING))
3175 rc = ll_glimpse_size(inode);
d7e09d03 3176 }
0a3bdb00 3177 return rc;
d7e09d03
PT
3178}
3179
2d95f10e 3180int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
d7e09d03 3181{
2b0143b5 3182 struct inode *inode = d_inode(de);
d7e09d03
PT
3183 struct ll_sb_info *sbi = ll_i2sbi(inode);
3184 struct ll_inode_info *lli = ll_i2info(inode);
f82ced5d 3185 int res;
d7e09d03 3186
2d95f10e
JH
3187 res = ll_inode_revalidate(de, MDS_INODELOCK_UPDATE |
3188 MDS_INODELOCK_LOOKUP);
d7e09d03
PT
3189 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
3190
3191 if (res)
3192 return res;
3193
3da76276
AP
3194 OBD_FAIL_TIMEOUT(OBD_FAIL_GETATTR_DELAY, 30);
3195
d7e09d03
PT
3196 stat->dev = inode->i_sb->s_dev;
3197 if (ll_need_32bit_api(sbi))
3198 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
3199 else
3200 stat->ino = inode->i_ino;
3201 stat->mode = inode->i_mode;
d7e09d03
PT
3202 stat->uid = inode->i_uid;
3203 stat->gid = inode->i_gid;
3204 stat->rdev = inode->i_rdev;
3205 stat->atime = inode->i_atime;
3206 stat->mtime = inode->i_mtime;
3207 stat->ctime = inode->i_ctime;
3208 stat->blksize = 1 << inode->i_blkbits;
d7e09d03
PT
3209 stat->blocks = inode->i_blocks;
3210
2de35386 3211 if (S_ISDIR(inode->i_mode) &&
3212 ll_i2info(inode)->lli_lsm_md) {
3213 stat->nlink = lli->lli_stripe_dir_nlink;
3214 stat->size = lli->lli_stripe_dir_size;
3215 } else {
3216 stat->nlink = inode->i_nlink;
3217 stat->size = i_size_read(inode);
3218 }
3219
d7e09d03
PT
3220 return 0;
3221}
d7e09d03 3222
2d95f10e
JH
3223static int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3224 __u64 start, __u64 len)
89580e37
PT
3225{
3226 int rc;
3227 size_t num_bytes;
3228 struct ll_user_fiemap *fiemap;
3229 unsigned int extent_count = fieinfo->fi_extents_max;
3230
3231 num_bytes = sizeof(*fiemap) + (extent_count *
3232 sizeof(struct ll_fiemap_extent));
e958f49b 3233 fiemap = libcfs_kvzalloc(num_bytes, GFP_NOFS);
89580e37 3234
6e16818b 3235 if (!fiemap)
89580e37
PT
3236 return -ENOMEM;
3237
3238 fiemap->fm_flags = fieinfo->fi_flags;
3239 fiemap->fm_extent_count = fieinfo->fi_extents_max;
3240 fiemap->fm_start = start;
3241 fiemap->fm_length = len;
97514241
OD
3242 if (extent_count > 0 &&
3243 copy_from_user(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
3244 sizeof(struct ll_fiemap_extent)) != 0) {
3245 rc = -EFAULT;
3246 goto out;
3247 }
89580e37
PT
3248
3249 rc = ll_do_fiemap(inode, fiemap, num_bytes);
3250
3251 fieinfo->fi_flags = fiemap->fm_flags;
3252 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
97514241
OD
3253 if (extent_count > 0 &&
3254 copy_to_user(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
3255 fiemap->fm_mapped_extents *
3256 sizeof(struct ll_fiemap_extent)) != 0) {
3257 rc = -EFAULT;
3258 goto out;
3259 }
89580e37 3260
97514241 3261out:
e958f49b 3262 kvfree(fiemap);
89580e37
PT
3263 return rc;
3264}
d7e09d03 3265
2d95f10e 3266struct posix_acl *ll_get_acl(struct inode *inode, int type)
d7e09d03
PT
3267{
3268 struct ll_inode_info *lli = ll_i2info(inode);
3269 struct posix_acl *acl = NULL;
d7e09d03
PT
3270
3271 spin_lock(&lli->lli_lock);
3272 /* VFS' acl_permission_check->check_acl will release the refcount */
3273 acl = posix_acl_dup(lli->lli_posix_acl);
ed7bdf5c 3274#ifdef CONFIG_FS_POSIX_ACL
b788dc51 3275 forget_cached_acl(inode, type);
ed7bdf5c 3276#endif
d7e09d03
PT
3277 spin_unlock(&lli->lli_lock);
3278
0a3bdb00 3279 return acl;
d7e09d03
PT
3280}
3281
d7e09d03
PT
3282int ll_inode_permission(struct inode *inode, int mask)
3283{
c948390f
GP
3284 struct ll_sb_info *sbi;
3285 struct root_squash_info *squash;
3286 const struct cred *old_cred = NULL;
3287 struct cred *cred = NULL;
3288 bool squash_id = false;
3289 cfs_cap_t cap;
d7e09d03 3290 int rc = 0;
d7e09d03 3291
d7e09d03
PT
3292 if (mask & MAY_NOT_BLOCK)
3293 return -ECHILD;
d7e09d03
PT
3294
3295 /* as root inode are NOT getting validated in lookup operation,
c0894c6c
OD
3296 * need to do it before permission check.
3297 */
d7e09d03 3298
f76c23da 3299 if (is_root_inode(inode)) {
2d95f10e
JH
3300 rc = __ll_inode_revalidate(inode->i_sb->s_root,
3301 MDS_INODELOCK_LOOKUP);
d7e09d03 3302 if (rc)
0a3bdb00 3303 return rc;
d7e09d03
PT
3304 }
3305
97a075cd
JN
3306 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), inode mode %x mask %o\n",
3307 PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask);
d7e09d03 3308
c948390f
GP
3309 /* squash fsuid/fsgid if needed */
3310 sbi = ll_i2sbi(inode);
3311 squash = &sbi->ll_squash;
3312 if (unlikely(squash->rsi_uid &&
3313 uid_eq(current_fsuid(), GLOBAL_ROOT_UID) &&
3314 !(sbi->ll_flags & LL_SBI_NOROOTSQUASH))) {
3315 squash_id = true;
3316 }
3317
3318 if (squash_id) {
3319 CDEBUG(D_OTHER, "squash creds (%d:%d)=>(%d:%d)\n",
3320 __kuid_val(current_fsuid()), __kgid_val(current_fsgid()),
3321 squash->rsi_uid, squash->rsi_gid);
3322
3323 /*
3324 * update current process's credentials
3325 * and FS capability
3326 */
3327 cred = prepare_creds();
3328 if (!cred)
3329 return -ENOMEM;
3330
3331 cred->fsuid = make_kuid(&init_user_ns, squash->rsi_uid);
3332 cred->fsgid = make_kgid(&init_user_ns, squash->rsi_gid);
3333 for (cap = 0; cap < sizeof(cfs_cap_t) * 8; cap++) {
3334 if ((1 << cap) & CFS_CAP_FS_MASK)
3335 cap_lower(cred->cap_effective, cap);
3336 }
3337 old_cred = override_creds(cred);
3338 }
3339
d7e09d03 3340 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
8707c96e 3341 rc = generic_permission(inode, mask);
d7e09d03 3342
c948390f
GP
3343 /* restore current process's credentials and FS capability */
3344 if (squash_id) {
3345 revert_creds(old_cred);
3346 put_cred(cred);
3347 }
3348
0a3bdb00 3349 return rc;
d7e09d03
PT
3350}
3351
d7e09d03
PT
3352/* -o localflock - only provides locally consistent flock locks */
3353struct file_operations ll_file_operations = {
b42b15fd 3354 .read_iter = ll_file_read_iter,
b42b15fd 3355 .write_iter = ll_file_write_iter,
d7e09d03
PT
3356 .unlocked_ioctl = ll_file_ioctl,
3357 .open = ll_file_open,
3358 .release = ll_file_release,
3359 .mmap = ll_file_mmap,
3360 .llseek = ll_file_seek,
3361 .splice_read = ll_file_splice_read,
3362 .fsync = ll_fsync,
3363 .flush = ll_flush
3364};
3365
3366struct file_operations ll_file_operations_flock = {
b42b15fd 3367 .read_iter = ll_file_read_iter,
b42b15fd 3368 .write_iter = ll_file_write_iter,
d7e09d03
PT
3369 .unlocked_ioctl = ll_file_ioctl,
3370 .open = ll_file_open,
3371 .release = ll_file_release,
3372 .mmap = ll_file_mmap,
3373 .llseek = ll_file_seek,
3374 .splice_read = ll_file_splice_read,
3375 .fsync = ll_fsync,
3376 .flush = ll_flush,
3377 .flock = ll_file_flock,
3378 .lock = ll_file_flock
3379};
3380
3381/* These are for -o noflock - to return ENOSYS on flock calls */
3382struct file_operations ll_file_operations_noflock = {
b42b15fd 3383 .read_iter = ll_file_read_iter,
b42b15fd 3384 .write_iter = ll_file_write_iter,
d7e09d03
PT
3385 .unlocked_ioctl = ll_file_ioctl,
3386 .open = ll_file_open,
3387 .release = ll_file_release,
3388 .mmap = ll_file_mmap,
3389 .llseek = ll_file_seek,
3390 .splice_read = ll_file_splice_read,
3391 .fsync = ll_fsync,
3392 .flush = ll_flush,
3393 .flock = ll_file_noflock,
3394 .lock = ll_file_noflock
3395};
3396
d2d32738 3397const struct inode_operations ll_file_inode_operations = {
d7e09d03
PT
3398 .setattr = ll_setattr,
3399 .getattr = ll_getattr,
3400 .permission = ll_inode_permission,
2c563880
JS
3401 .setxattr = generic_setxattr,
3402 .getxattr = generic_getxattr,
d7e09d03 3403 .listxattr = ll_listxattr,
2c563880 3404 .removexattr = generic_removexattr,
89580e37 3405 .fiemap = ll_fiemap,
d7e09d03
PT
3406 .get_acl = ll_get_acl,
3407};
3408
d0a0acc3 3409/* dynamic ioctl number support routines */
d7e09d03
PT
3410static struct llioc_ctl_data {
3411 struct rw_semaphore ioc_sem;
3412 struct list_head ioc_head;
3413} llioc = {
3414 __RWSEM_INITIALIZER(llioc.ioc_sem),
3415 LIST_HEAD_INIT(llioc.ioc_head)
3416};
3417
d7e09d03
PT
3418struct llioc_data {
3419 struct list_head iocd_list;
3420 unsigned int iocd_size;
3421 llioc_callback_t iocd_cb;
3422 unsigned int iocd_count;
3423 unsigned int iocd_cmd[0];
3424};
3425
3426void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
3427{
3428 unsigned int size;
3429 struct llioc_data *in_data = NULL;
d7e09d03 3430
6e16818b 3431 if (!cb || !cmd || count > LLIOC_MAX_CMD || count < 0)
0a3bdb00 3432 return NULL;
d7e09d03
PT
3433
3434 size = sizeof(*in_data) + count * sizeof(unsigned int);
496a51bd
JL
3435 in_data = kzalloc(size, GFP_NOFS);
3436 if (!in_data)
0a3bdb00 3437 return NULL;
d7e09d03 3438
d7e09d03
PT
3439 in_data->iocd_size = size;
3440 in_data->iocd_cb = cb;
3441 in_data->iocd_count = count;
3442 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
3443
3444 down_write(&llioc.ioc_sem);
3445 list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
3446 up_write(&llioc.ioc_sem);
3447
0a3bdb00 3448 return in_data;
d7e09d03 3449}
93133eb4 3450EXPORT_SYMBOL(ll_iocontrol_register);
d7e09d03
PT
3451
3452void ll_iocontrol_unregister(void *magic)
3453{
3454 struct llioc_data *tmp;
3455
6e16818b 3456 if (!magic)
d7e09d03
PT
3457 return;
3458
3459 down_write(&llioc.ioc_sem);
3460 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
3461 if (tmp == magic) {
d7e09d03
PT
3462 list_del(&tmp->iocd_list);
3463 up_write(&llioc.ioc_sem);
3464
97903a26 3465 kfree(tmp);
d7e09d03
PT
3466 return;
3467 }
3468 }
3469 up_write(&llioc.ioc_sem);
3470
3471 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
3472}
d7e09d03
PT
3473EXPORT_SYMBOL(ll_iocontrol_unregister);
3474
2d95f10e
JH
3475static enum llioc_iter
3476ll_iocontrol_call(struct inode *inode, struct file *file,
3477 unsigned int cmd, unsigned long arg, int *rcp)
d7e09d03
PT
3478{
3479 enum llioc_iter ret = LLIOC_CONT;
3480 struct llioc_data *data;
3481 int rc = -EINVAL, i;
3482
3483 down_read(&llioc.ioc_sem);
3484 list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
3485 for (i = 0; i < data->iocd_count; i++) {
3486 if (cmd != data->iocd_cmd[i])
3487 continue;
3488
3489 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
3490 break;
3491 }
3492
3493 if (ret == LLIOC_STOP)
3494 break;
3495 }
3496 up_read(&llioc.ioc_sem);
3497
3498 if (rcp)
3499 *rcp = rc;
3500 return ret;
3501}
3502
3503int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
3504{
3505 struct ll_inode_info *lli = ll_i2info(inode);
3506 struct cl_env_nest nest;
3507 struct lu_env *env;
3508 int result;
d7e09d03 3509
6e16818b 3510 if (!lli->lli_clob)
0a3bdb00 3511 return 0;
d7e09d03
PT
3512
3513 env = cl_env_nested_get(&nest);
3514 if (IS_ERR(env))
0a3bdb00 3515 return PTR_ERR(env);
d7e09d03
PT
3516
3517 result = cl_conf_set(env, lli->lli_clob, conf);
3518 cl_env_nested_put(&nest, env);
3519
3520 if (conf->coc_opc == OBJECT_CONF_SET) {
3521 struct ldlm_lock *lock = conf->coc_lock;
3522
6e16818b 3523 LASSERT(lock);
d7e09d03
PT
3524 LASSERT(ldlm_has_layout(lock));
3525 if (result == 0) {
3526 /* it can only be allowed to match after layout is
3527 * applied to inode otherwise false layout would be
d0a0acc3 3528 * seen. Applying layout should happen before dropping
c0894c6c
OD
3529 * the intent lock.
3530 */
d7e09d03
PT
3531 ldlm_lock_allow_match(lock);
3532 }
3533 }
0a3bdb00 3534 return result;
d7e09d03
PT
3535}
3536
3537/* Fetch layout from MDT with getxattr request, if it's not ready yet */
3538static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
3539
3540{
3541 struct ll_sb_info *sbi = ll_i2sbi(inode);
d7e09d03
PT
3542 struct ptlrpc_request *req;
3543 struct mdt_body *body;
3544 void *lvbdata;
3545 void *lmm;
3546 int lmmsize;
3547 int rc;
d7e09d03 3548
e2335e5d 3549 CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
5a9a80ba 3550 PFID(ll_inode2fid(inode)), ldlm_is_lvb_ready(lock),
e2335e5d 3551 lock->l_lvb_data, lock->l_lvb_len);
3552
5a9a80ba 3553 if (lock->l_lvb_data && ldlm_is_lvb_ready(lock))
0a3bdb00 3554 return 0;
d7e09d03
PT
3555
3556 /* if layout lock was granted right away, the layout is returned
3557 * within DLM_LVB of dlm reply; otherwise if the lock was ever
3558 * blocked and then granted via completion ast, we have to fetch
3559 * layout here. Please note that we can't use the LVB buffer in
c0894c6c
OD
3560 * completion AST because it doesn't have a large enough buffer
3561 */
44779340 3562 rc = ll_get_default_mdsize(sbi, &lmmsize);
d7e09d03 3563 if (rc == 0)
ef2e0f55
OD
3564 rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode),
3565 OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0,
3566 lmmsize, 0, &req);
d7e09d03 3567 if (rc < 0)
0a3bdb00 3568 return rc;
d7e09d03
PT
3569
3570 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
6e16818b 3571 if (!body) {
34e1f2bb
JL
3572 rc = -EPROTO;
3573 goto out;
3574 }
d7e09d03 3575
2e1b5b8b 3576 lmmsize = body->mbo_eadatasize;
34e1f2bb
JL
3577 if (lmmsize == 0) /* empty layout */ {
3578 rc = 0;
3579 goto out;
3580 }
d7e09d03
PT
3581
3582 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize);
6e16818b 3583 if (!lmm) {
34e1f2bb
JL
3584 rc = -EFAULT;
3585 goto out;
3586 }
d7e09d03 3587
e958f49b 3588 lvbdata = libcfs_kvzalloc(lmmsize, GFP_NOFS);
6e16818b 3589 if (!lvbdata) {
34e1f2bb
JL
3590 rc = -ENOMEM;
3591 goto out;
3592 }
d7e09d03
PT
3593
3594 memcpy(lvbdata, lmm, lmmsize);
3595 lock_res_and_lock(lock);
6e16818b 3596 if (lock->l_lvb_data)
e958f49b 3597 kvfree(lock->l_lvb_data);
e2335e5d 3598
3599 lock->l_lvb_data = lvbdata;
3600 lock->l_lvb_len = lmmsize;
d7e09d03
PT
3601 unlock_res_and_lock(lock);
3602
d7e09d03
PT
3603out:
3604 ptlrpc_req_finished(req);
3605 return rc;
3606}
3607
3608/**
3609 * Apply the layout to the inode. Layout lock is held and will be released
3610 * in this function.
3611 */
52ee0d20
OD
3612static int ll_layout_lock_set(struct lustre_handle *lockh, enum ldlm_mode mode,
3613 struct inode *inode, __u32 *gen, bool reconf)
d7e09d03
PT
3614{
3615 struct ll_inode_info *lli = ll_i2info(inode);
3616 struct ll_sb_info *sbi = ll_i2sbi(inode);
3617 struct ldlm_lock *lock;
3618 struct lustre_md md = { NULL };
3619 struct cl_object_conf conf;
3620 int rc = 0;
3621 bool lvb_ready;
3622 bool wait_layout = false;
d7e09d03
PT
3623
3624 LASSERT(lustre_handle_is_used(lockh));
3625
3626 lock = ldlm_handle2lock(lockh);
6e16818b 3627 LASSERT(lock);
d7e09d03
PT
3628 LASSERT(ldlm_has_layout(lock));
3629
97a075cd
JN
3630 LDLM_DEBUG(lock, "File "DFID"(%p) being reconfigured: %d",
3631 PFID(&lli->lli_fid), inode, reconf);
d7e09d03 3632
bc969176 3633 /* in case this is a caching lock and reinstate with new inode */
bc30c172 3634 md_set_lock_data(sbi->ll_md_exp, lockh, inode, NULL);
bc969176 3635
d7e09d03 3636 lock_res_and_lock(lock);
5a9a80ba 3637 lvb_ready = ldlm_is_lvb_ready(lock);
d7e09d03
PT
3638 unlock_res_and_lock(lock);
3639 /* checking lvb_ready is racy but this is okay. The worst case is
c0894c6c
OD
3640 * that multi processes may configure the file on the same time.
3641 */
d7e09d03
PT
3642 if (lvb_ready || !reconf) {
3643 rc = -ENODATA;
3644 if (lvb_ready) {
3645 /* layout_gen must be valid if layout lock is not
c0894c6c
OD
3646 * cancelled and stripe has already set
3647 */
09aed8a5 3648 *gen = ll_layout_version_get(lli);
d7e09d03
PT
3649 rc = 0;
3650 }
34e1f2bb 3651 goto out;
d7e09d03
PT
3652 }
3653
3654 rc = ll_layout_fetch(inode, lock);
3655 if (rc < 0)
34e1f2bb 3656 goto out;
d7e09d03
PT
3657
3658 /* for layout lock, lmm is returned in lock's lvb.
3659 * lvb_data is immutable if the lock is held so it's safe to access it
3660 * without res lock. See the description in ldlm_lock_decref_internal()
c0894c6c
OD
3661 * for the condition to free lvb_data of layout lock
3662 */
6e16818b 3663 if (lock->l_lvb_data) {
d7e09d03
PT
3664 rc = obd_unpackmd(sbi->ll_dt_exp, &md.lsm,
3665 lock->l_lvb_data, lock->l_lvb_len);
3666 if (rc >= 0) {
3667 *gen = LL_LAYOUT_GEN_EMPTY;
6e16818b 3668 if (md.lsm)
d7e09d03
PT
3669 *gen = md.lsm->lsm_layout_gen;
3670 rc = 0;
3671 } else {
e15ba45d
OD
3672 CERROR("%s: file " DFID " unpackmd error: %d\n",
3673 ll_get_fsname(inode->i_sb, NULL, 0),
3674 PFID(&lli->lli_fid), rc);
d7e09d03
PT
3675 }
3676 }
3677 if (rc < 0)
34e1f2bb 3678 goto out;
d7e09d03
PT
3679
3680 /* set layout to file. Unlikely this will fail as old layout was
c0894c6c
OD
3681 * surely eliminated
3682 */
ec83e611 3683 memset(&conf, 0, sizeof(conf));
d7e09d03
PT
3684 conf.coc_opc = OBJECT_CONF_SET;
3685 conf.coc_inode = inode;
3686 conf.coc_lock = lock;
3687 conf.u.coc_md = &md;
3688 rc = ll_layout_conf(inode, &conf);
3689
6e16818b 3690 if (md.lsm)
d7e09d03
PT
3691 obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
3692
3693 /* refresh layout failed, need to wait */
3694 wait_layout = rc == -EBUSY;
d7e09d03
PT
3695
3696out:
3697 LDLM_LOCK_PUT(lock);
3698 ldlm_lock_decref(lockh, mode);
3699
3700 /* wait for IO to complete if it's still being used. */
3701 if (wait_layout) {
97a075cd 3702 CDEBUG(D_INODE, "%s: "DFID"(%p) wait for layout reconf\n",
e15ba45d 3703 ll_get_fsname(inode->i_sb, NULL, 0),
97a075cd 3704 PFID(&lli->lli_fid), inode);
d7e09d03 3705
ec83e611 3706 memset(&conf, 0, sizeof(conf));
d7e09d03
PT
3707 conf.coc_opc = OBJECT_CONF_WAIT;
3708 conf.coc_inode = inode;
3709 rc = ll_layout_conf(inode, &conf);
3710 if (rc == 0)
3711 rc = -EAGAIN;
3712
97a075cd
JN
3713 CDEBUG(D_INODE, "%s: file="DFID" waiting layout return: %d.\n",
3714 ll_get_fsname(inode->i_sb, NULL, 0),
e15ba45d 3715 PFID(&lli->lli_fid), rc);
d7e09d03 3716 }
0a3bdb00 3717 return rc;
d7e09d03
PT
3718}
3719
3720/**
3721 * This function checks if there exists a LAYOUT lock on the client side,
3722 * or enqueues it if it doesn't have one in cache.
3723 *
3724 * This function will not hold layout lock so it may be revoked any time after
3725 * this function returns. Any operations depend on layout should be redone
3726 * in that case.
3727 *
3728 * This function should be called before lov_io_init() to get an uptodate
3729 * layout version, the caller should save the version number and after IO
3730 * is finished, this function should be called again to verify that layout
3731 * is not changed during IO time.
3732 */
3733int ll_layout_refresh(struct inode *inode, __u32 *gen)
3734{
3735 struct ll_inode_info *lli = ll_i2info(inode);
3736 struct ll_sb_info *sbi = ll_i2sbi(inode);
3737 struct md_op_data *op_data;
3738 struct lookup_intent it;
3739 struct lustre_handle lockh;
52ee0d20 3740 enum ldlm_mode mode;
f2145eae
BK
3741 struct ldlm_enqueue_info einfo = {
3742 .ei_type = LDLM_IBITS,
3743 .ei_mode = LCK_CR,
70a251f6
JH
3744 .ei_cb_bl = &ll_md_blocking_ast,
3745 .ei_cb_cp = &ldlm_completion_ast,
f2145eae 3746 };
d7e09d03 3747 int rc;
d7e09d03 3748
09aed8a5
JX
3749 *gen = ll_layout_version_get(lli);
3750 if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK) || *gen != LL_LAYOUT_GEN_NONE)
0a3bdb00 3751 return 0;
d7e09d03
PT
3752
3753 /* sanity checks */
3754 LASSERT(fid_is_sane(ll_inode2fid(inode)));
3755 LASSERT(S_ISREG(inode->i_mode));
3756
d7e09d03
PT
3757 /* take layout lock mutex to enqueue layout lock exclusively. */
3758 mutex_lock(&lli->lli_layout_mutex);
3759
3760again:
09aed8a5 3761 /* mostly layout lock is caching on the local side, so try to match
c0894c6c
OD
3762 * it before grabbing layout lock mutex.
3763 */
7fc1f831
AP
3764 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3765 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
d7e09d03
PT
3766 if (mode != 0) { /* hit cached lock */
3767 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3768 if (rc == -EAGAIN)
3769 goto again;
3770
3771 mutex_unlock(&lli->lli_layout_mutex);
0a3bdb00 3772 return rc;
d7e09d03
PT
3773 }
3774
3775 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
e15ba45d 3776 0, 0, LUSTRE_OPC_ANY, NULL);
d7e09d03
PT
3777 if (IS_ERR(op_data)) {
3778 mutex_unlock(&lli->lli_layout_mutex);
0a3bdb00 3779 return PTR_ERR(op_data);
d7e09d03
PT
3780 }
3781
3782 /* have to enqueue one */
3783 memset(&it, 0, sizeof(it));
3784 it.it_op = IT_LAYOUT;
3785 lockh.cookie = 0ULL;
3786
97a075cd
JN
3787 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file "DFID"(%p)",
3788 ll_get_fsname(inode->i_sb, NULL, 0),
3789 PFID(&lli->lli_fid), inode);
d7e09d03 3790
70a251f6 3791 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL, &it, op_data, &lockh, 0);
8bf86fd9
JH
3792 ptlrpc_req_finished(it.it_request);
3793 it.it_request = NULL;
d7e09d03
PT
3794
3795 ll_finish_md_op_data(op_data);
3796
e476f2e5
JH
3797 mode = it.it_lock_mode;
3798 it.it_lock_mode = 0;
d7e09d03
PT
3799 ll_intent_drop_lock(&it);
3800
3801 if (rc == 0) {
3802 /* set lock data in case this is a new lock */
3803 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
3804 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3805 if (rc == -EAGAIN)
3806 goto again;
3807 }
3808 mutex_unlock(&lli->lli_layout_mutex);
3809
0a3bdb00 3810 return rc;
d7e09d03 3811}
5ea17d6c
JL
3812
3813/**
3814 * This function send a restore request to the MDT
3815 */
1b1594da 3816int ll_layout_restore(struct inode *inode, loff_t offset, __u64 length)
5ea17d6c
JL
3817{
3818 struct hsm_user_request *hur;
3819 int len, rc;
3820
3821 len = sizeof(struct hsm_user_request) +
3822 sizeof(struct hsm_user_item);
496a51bd
JL
3823 hur = kzalloc(len, GFP_NOFS);
3824 if (!hur)
5ea17d6c
JL
3825 return -ENOMEM;
3826
3827 hur->hur_request.hr_action = HUA_RESTORE;
3828 hur->hur_request.hr_archive_id = 0;
3829 hur->hur_request.hr_flags = 0;
3830 memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
3831 sizeof(hur->hur_user_item[0].hui_fid));
1b1594da
JX
3832 hur->hur_user_item[0].hui_extent.offset = offset;
3833 hur->hur_user_item[0].hui_extent.length = length;
5ea17d6c 3834 hur->hur_request.hr_itemcount = 1;
1929c433 3835 rc = obd_iocontrol(LL_IOC_HSM_REQUEST, ll_i2sbi(inode)->ll_md_exp,
5ea17d6c 3836 len, hur, NULL);
97903a26 3837 kfree(hur);
5ea17d6c
JL
3838 return rc;
3839}
This page took 0.873764 seconds and 5 git commands to generate.