4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2015, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/llite/llite_lib.c
38 * Lustre Light Super operations
41 #define DEBUG_SUBSYSTEM S_LLITE
43 #include <linux/module.h>
44 #include <linux/statfs.h>
45 #include <linux/types.h>
48 #include "../include/lustre_lite.h"
49 #include "../include/lustre_ha.h"
50 #include "../include/lustre_dlm.h"
51 #include "../include/lprocfs_status.h"
52 #include "../include/lustre_disk.h"
53 #include "../include/lustre_param.h"
54 #include "../include/lustre_log.h"
55 #include "../include/cl_object.h"
56 #include "../include/obd_cksum.h"
57 #include "llite_internal.h"
59 struct kmem_cache
*ll_file_data_slab
;
60 struct dentry
*llite_root
;
61 struct kset
*llite_kset
;
64 #define log2(n) ffz(~(n))
67 static struct ll_sb_info
*ll_init_sbi(struct super_block
*sb
)
69 struct ll_sb_info
*sbi
= NULL
;
71 unsigned long lru_page_max
;
76 sbi
= kzalloc(sizeof(*sbi
), GFP_NOFS
);
80 spin_lock_init(&sbi
->ll_lock
);
81 mutex_init(&sbi
->ll_lco
.lco_lock
);
82 spin_lock_init(&sbi
->ll_pp_extent_lock
);
83 spin_lock_init(&sbi
->ll_process_lock
);
84 sbi
->ll_rw_stats_on
= 0;
87 pages
= si
.totalram
- si
.totalhigh
;
88 if (pages
>> (20 - PAGE_CACHE_SHIFT
) < 512)
89 lru_page_max
= pages
/ 2;
91 lru_page_max
= (pages
/ 4) * 3;
93 /* initialize lru data */
94 atomic_set(&sbi
->ll_cache
.ccc_users
, 0);
95 sbi
->ll_cache
.ccc_lru_max
= lru_page_max
;
96 atomic_set(&sbi
->ll_cache
.ccc_lru_left
, lru_page_max
);
97 spin_lock_init(&sbi
->ll_cache
.ccc_lru_lock
);
98 INIT_LIST_HEAD(&sbi
->ll_cache
.ccc_lru
);
100 sbi
->ll_ra_info
.ra_max_pages_per_file
= min(pages
/ 32,
101 SBI_DEFAULT_READAHEAD_MAX
);
102 sbi
->ll_ra_info
.ra_max_pages
= sbi
->ll_ra_info
.ra_max_pages_per_file
;
103 sbi
->ll_ra_info
.ra_max_read_ahead_whole_pages
=
104 SBI_DEFAULT_READAHEAD_WHOLE_MAX
;
106 ll_generate_random_uuid(uuid
);
107 class_uuid_unparse(uuid
, &sbi
->ll_sb_uuid
);
108 CDEBUG(D_CONFIG
, "generated uuid: %s\n", sbi
->ll_sb_uuid
.uuid
);
110 sbi
->ll_flags
|= LL_SBI_VERBOSE
;
111 sbi
->ll_flags
|= LL_SBI_CHECKSUM
;
113 sbi
->ll_flags
|= LL_SBI_LRU_RESIZE
;
115 for (i
= 0; i
<= LL_PROCESS_HIST_MAX
; i
++) {
116 spin_lock_init(&sbi
->ll_rw_extents_info
.pp_extents
[i
].
118 spin_lock_init(&sbi
->ll_rw_extents_info
.pp_extents
[i
].
122 /* metadata statahead is enabled by default */
123 sbi
->ll_sa_max
= LL_SA_RPC_DEF
;
124 atomic_set(&sbi
->ll_sa_total
, 0);
125 atomic_set(&sbi
->ll_sa_wrong
, 0);
126 atomic_set(&sbi
->ll_agl_total
, 0);
127 sbi
->ll_flags
|= LL_SBI_AGL_ENABLED
;
134 static void ll_free_sbi(struct super_block
*sb
)
136 struct ll_sb_info
*sbi
= ll_s2sbi(sb
);
141 static int client_common_fill_super(struct super_block
*sb
, char *md
, char *dt
,
142 struct vfsmount
*mnt
)
144 struct inode
*root
= NULL
;
145 struct ll_sb_info
*sbi
= ll_s2sbi(sb
);
146 struct obd_device
*obd
;
147 struct obd_statfs
*osfs
= NULL
;
148 struct ptlrpc_request
*request
= NULL
;
149 struct obd_connect_data
*data
= NULL
;
150 struct obd_uuid
*uuid
;
151 struct md_op_data
*op_data
;
152 struct lustre_md lmd
;
154 int size
, err
, checksum
;
156 obd
= class_name2obd(md
);
158 CERROR("MD %s: not setup or attached\n", md
);
162 data
= kzalloc(sizeof(*data
), GFP_NOFS
);
166 osfs
= kzalloc(sizeof(*osfs
), GFP_NOFS
);
173 err
= ldebugfs_register_mountpoint(llite_root
, sb
, dt
, md
);
175 CERROR("could not register mount in <debugfs>/lustre/llite\n");
178 /* indicate the features supported by this client */
179 data
->ocd_connect_flags
= OBD_CONNECT_IBITS
| OBD_CONNECT_NODEVOH
|
180 OBD_CONNECT_ATTRFID
|
181 OBD_CONNECT_VERSION
| OBD_CONNECT_BRW_SIZE
|
182 OBD_CONNECT_CANCELSET
| OBD_CONNECT_FID
|
183 OBD_CONNECT_AT
| OBD_CONNECT_LOV_V3
|
184 OBD_CONNECT_RMT_CLIENT
| OBD_CONNECT_VBR
|
185 OBD_CONNECT_FULL20
| OBD_CONNECT_64BITHASH
|
186 OBD_CONNECT_EINPROGRESS
|
187 OBD_CONNECT_JOBSTATS
| OBD_CONNECT_LVB_TYPE
|
188 OBD_CONNECT_LAYOUTLOCK
|
189 OBD_CONNECT_PINGLESS
|
190 OBD_CONNECT_MAX_EASIZE
|
191 OBD_CONNECT_FLOCK_DEAD
|
192 OBD_CONNECT_DISP_STRIPE
;
194 if (sbi
->ll_flags
& LL_SBI_SOM_PREVIEW
)
195 data
->ocd_connect_flags
|= OBD_CONNECT_SOM
;
197 if (sbi
->ll_flags
& LL_SBI_LRU_RESIZE
)
198 data
->ocd_connect_flags
|= OBD_CONNECT_LRU_RESIZE
;
199 #ifdef CONFIG_FS_POSIX_ACL
200 data
->ocd_connect_flags
|= OBD_CONNECT_ACL
| OBD_CONNECT_UMASK
;
203 if (OBD_FAIL_CHECK(OBD_FAIL_MDC_LIGHTWEIGHT
))
204 /* flag mdc connection as lightweight, only used for test
205 * purpose, use with care
207 data
->ocd_connect_flags
|= OBD_CONNECT_LIGHTWEIGHT
;
209 data
->ocd_ibits_known
= MDS_INODELOCK_FULL
;
210 data
->ocd_version
= LUSTRE_VERSION_CODE
;
212 if (sb
->s_flags
& MS_RDONLY
)
213 data
->ocd_connect_flags
|= OBD_CONNECT_RDONLY
;
214 if (sbi
->ll_flags
& LL_SBI_USER_XATTR
)
215 data
->ocd_connect_flags
|= OBD_CONNECT_XATTR
;
217 if (sbi
->ll_flags
& LL_SBI_FLOCK
)
218 sbi
->ll_fop
= &ll_file_operations_flock
;
219 else if (sbi
->ll_flags
& LL_SBI_LOCALFLOCK
)
220 sbi
->ll_fop
= &ll_file_operations
;
222 sbi
->ll_fop
= &ll_file_operations_noflock
;
225 data
->ocd_connect_flags
|= OBD_CONNECT_REAL
;
226 if (sbi
->ll_flags
& LL_SBI_RMT_CLIENT
)
227 data
->ocd_connect_flags
|= OBD_CONNECT_RMT_CLIENT_FORCE
;
229 data
->ocd_brw_size
= MD_MAX_BRW_SIZE
;
231 err
= obd_connect(NULL
, &sbi
->ll_md_exp
, obd
, &sbi
->ll_sb_uuid
,
234 LCONSOLE_ERROR_MSG(0x14f, "An MDT (md %s) is performing recovery, of which this client is not a part. Please wait for recovery to complete, abort, or time out.\n",
238 CERROR("cannot connect to %s: rc = %d\n", md
, err
);
242 sbi
->ll_md_exp
->exp_connect_data
= *data
;
244 err
= obd_fid_init(sbi
->ll_md_exp
->exp_obd
, sbi
->ll_md_exp
,
245 LUSTRE_SEQ_METADATA
);
247 CERROR("%s: Can't init metadata layer FID infrastructure, rc = %d\n",
248 sbi
->ll_md_exp
->exp_obd
->obd_name
, err
);
252 /* For mount, we only need fs info from MDT0, and also in DNE, it
253 * can make sure the client can be mounted as long as MDT0 is
256 err
= obd_statfs(NULL
, sbi
->ll_md_exp
, osfs
,
257 cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS
),
258 OBD_STATFS_FOR_MDT0
);
262 /* This needs to be after statfs to ensure connect has finished.
263 * Note that "data" does NOT contain the valid connect reply.
264 * If connecting to a 1.8 server there will be no LMV device, so
265 * we can access the MDC export directly and exp_connect_flags will
266 * be non-zero, but if accessing an upgraded 2.1 server it will
267 * have the correct flags filled in.
268 * XXX: fill in the LMV exp_connect_flags from MDC(s).
270 valid
= exp_connect_flags(sbi
->ll_md_exp
) & CLIENT_CONNECT_MDT_REQD
;
271 if (exp_connect_flags(sbi
->ll_md_exp
) != 0 &&
272 valid
!= CLIENT_CONNECT_MDT_REQD
) {
275 buf
= kzalloc(PAGE_CACHE_SIZE
, GFP_KERNEL
);
280 obd_connect_flags2str(buf
, PAGE_CACHE_SIZE
,
281 valid
^ CLIENT_CONNECT_MDT_REQD
, ",");
282 LCONSOLE_ERROR_MSG(0x170, "Server %s does not support feature(s) needed for correct operation of this client (%s). Please upgrade server or downgrade client.\n",
283 sbi
->ll_md_exp
->exp_obd
->obd_name
, buf
);
289 size
= sizeof(*data
);
290 err
= obd_get_info(NULL
, sbi
->ll_md_exp
, sizeof(KEY_CONN_DATA
),
291 KEY_CONN_DATA
, &size
, data
, NULL
);
293 CERROR("%s: Get connect data failed: rc = %d\n",
294 sbi
->ll_md_exp
->exp_obd
->obd_name
, err
);
298 LASSERT(osfs
->os_bsize
);
299 sb
->s_blocksize
= osfs
->os_bsize
;
300 sb
->s_blocksize_bits
= log2(osfs
->os_bsize
);
301 sb
->s_magic
= LL_SUPER_MAGIC
;
302 sb
->s_maxbytes
= MAX_LFS_FILESIZE
;
303 sbi
->ll_namelen
= osfs
->os_namelen
;
305 if ((sbi
->ll_flags
& LL_SBI_USER_XATTR
) &&
306 !(data
->ocd_connect_flags
& OBD_CONNECT_XATTR
)) {
307 LCONSOLE_INFO("Disabling user_xattr feature because it is not supported on the server\n");
308 sbi
->ll_flags
&= ~LL_SBI_USER_XATTR
;
311 if (data
->ocd_connect_flags
& OBD_CONNECT_ACL
) {
312 sb
->s_flags
|= MS_POSIXACL
;
313 sbi
->ll_flags
|= LL_SBI_ACL
;
315 LCONSOLE_INFO("client wants to enable acl, but mdt not!\n");
316 sb
->s_flags
&= ~MS_POSIXACL
;
317 sbi
->ll_flags
&= ~LL_SBI_ACL
;
320 if (data
->ocd_connect_flags
& OBD_CONNECT_RMT_CLIENT
) {
321 if (!(sbi
->ll_flags
& LL_SBI_RMT_CLIENT
)) {
322 sbi
->ll_flags
|= LL_SBI_RMT_CLIENT
;
323 LCONSOLE_INFO("client is set as remote by default.\n");
326 if (sbi
->ll_flags
& LL_SBI_RMT_CLIENT
) {
327 sbi
->ll_flags
&= ~LL_SBI_RMT_CLIENT
;
328 LCONSOLE_INFO("client claims to be remote, but server rejected, forced to be local.\n");
332 if (data
->ocd_connect_flags
& OBD_CONNECT_64BITHASH
)
333 sbi
->ll_flags
|= LL_SBI_64BIT_HASH
;
335 if (data
->ocd_connect_flags
& OBD_CONNECT_BRW_SIZE
)
336 sbi
->ll_md_brw_size
= data
->ocd_brw_size
;
338 sbi
->ll_md_brw_size
= PAGE_CACHE_SIZE
;
340 if (data
->ocd_connect_flags
& OBD_CONNECT_LAYOUTLOCK
) {
341 LCONSOLE_INFO("Layout lock feature supported.\n");
342 sbi
->ll_flags
|= LL_SBI_LAYOUT_LOCK
;
345 if (data
->ocd_ibits_known
& MDS_INODELOCK_XATTR
) {
346 if (!(data
->ocd_connect_flags
& OBD_CONNECT_MAX_EASIZE
)) {
348 "%s: disabling xattr cache due to unknown maximum xattr size.\n",
351 sbi
->ll_flags
|= LL_SBI_XATTR_CACHE
;
352 sbi
->ll_xattr_cache_enabled
= 1;
356 obd
= class_name2obd(dt
);
358 CERROR("DT %s: not setup or attached\n", dt
);
363 data
->ocd_connect_flags
= OBD_CONNECT_GRANT
| OBD_CONNECT_VERSION
|
364 OBD_CONNECT_REQPORTAL
| OBD_CONNECT_BRW_SIZE
|
365 OBD_CONNECT_CANCELSET
| OBD_CONNECT_FID
|
366 OBD_CONNECT_SRVLOCK
| OBD_CONNECT_TRUNCLOCK
|
367 OBD_CONNECT_AT
| OBD_CONNECT_RMT_CLIENT
|
368 OBD_CONNECT_OSS_CAPA
| OBD_CONNECT_VBR
|
369 OBD_CONNECT_FULL20
| OBD_CONNECT_64BITHASH
|
370 OBD_CONNECT_MAXBYTES
|
371 OBD_CONNECT_EINPROGRESS
|
372 OBD_CONNECT_JOBSTATS
| OBD_CONNECT_LVB_TYPE
|
373 OBD_CONNECT_LAYOUTLOCK
| OBD_CONNECT_PINGLESS
;
375 if (sbi
->ll_flags
& LL_SBI_SOM_PREVIEW
)
376 data
->ocd_connect_flags
|= OBD_CONNECT_SOM
;
378 if (!OBD_FAIL_CHECK(OBD_FAIL_OSC_CONNECT_CKSUM
)) {
379 /* OBD_CONNECT_CKSUM should always be set, even if checksums are
380 * disabled by default, because it can still be enabled on the
381 * fly via /sys. As a consequence, we still need to come to an
382 * agreement on the supported algorithms at connect time
384 data
->ocd_connect_flags
|= OBD_CONNECT_CKSUM
;
386 if (OBD_FAIL_CHECK(OBD_FAIL_OSC_CKSUM_ADLER_ONLY
))
387 data
->ocd_cksum_types
= OBD_CKSUM_ADLER
;
389 data
->ocd_cksum_types
= cksum_types_supported_client();
392 data
->ocd_connect_flags
|= OBD_CONNECT_LRU_RESIZE
;
393 if (sbi
->ll_flags
& LL_SBI_RMT_CLIENT
)
394 data
->ocd_connect_flags
|= OBD_CONNECT_RMT_CLIENT_FORCE
;
396 CDEBUG(D_RPCTRACE
, "ocd_connect_flags: %#llx ocd_version: %d ocd_grant: %d\n",
397 data
->ocd_connect_flags
,
398 data
->ocd_version
, data
->ocd_grant
);
400 obd
->obd_upcall
.onu_owner
= &sbi
->ll_lco
;
401 obd
->obd_upcall
.onu_upcall
= cl_ocd_update
;
403 data
->ocd_brw_size
= DT_MAX_BRW_SIZE
;
405 err
= obd_connect(NULL
, &sbi
->ll_dt_exp
, obd
, &sbi
->ll_sb_uuid
, data
,
408 LCONSOLE_ERROR_MSG(0x150, "An OST (dt %s) is performing recovery, of which this client is not a part. Please wait for recovery to complete, abort, or time out.\n",
412 CERROR("%s: Cannot connect to %s: rc = %d\n",
413 sbi
->ll_dt_exp
->exp_obd
->obd_name
, dt
, err
);
417 sbi
->ll_dt_exp
->exp_connect_data
= *data
;
419 err
= obd_fid_init(sbi
->ll_dt_exp
->exp_obd
, sbi
->ll_dt_exp
,
420 LUSTRE_SEQ_METADATA
);
422 CERROR("%s: Can't init data layer FID infrastructure, rc = %d\n",
423 sbi
->ll_dt_exp
->exp_obd
->obd_name
, err
);
427 mutex_lock(&sbi
->ll_lco
.lco_lock
);
428 sbi
->ll_lco
.lco_flags
= data
->ocd_connect_flags
;
429 sbi
->ll_lco
.lco_md_exp
= sbi
->ll_md_exp
;
430 sbi
->ll_lco
.lco_dt_exp
= sbi
->ll_dt_exp
;
431 mutex_unlock(&sbi
->ll_lco
.lco_lock
);
433 fid_zero(&sbi
->ll_root_fid
);
434 err
= md_getstatus(sbi
->ll_md_exp
, &sbi
->ll_root_fid
);
436 CERROR("cannot mds_connect: rc = %d\n", err
);
439 if (!fid_is_sane(&sbi
->ll_root_fid
)) {
440 CERROR("%s: Invalid root fid "DFID
" during mount\n",
441 sbi
->ll_md_exp
->exp_obd
->obd_name
,
442 PFID(&sbi
->ll_root_fid
));
446 CDEBUG(D_SUPER
, "rootfid "DFID
"\n", PFID(&sbi
->ll_root_fid
));
448 sb
->s_op
= &lustre_super_operations
;
449 #if THREAD_SIZE >= 8192 /*b=17630*/
450 sb
->s_export_op
= &lustre_export_operations
;
454 * XXX: move this to after cbd setup?
456 valid
= OBD_MD_FLGETATTR
| OBD_MD_FLBLOCKS
;
457 if (sbi
->ll_flags
& LL_SBI_RMT_CLIENT
)
458 valid
|= OBD_MD_FLRMTPERM
;
459 else if (sbi
->ll_flags
& LL_SBI_ACL
)
460 valid
|= OBD_MD_FLACL
;
462 op_data
= kzalloc(sizeof(*op_data
), GFP_NOFS
);
468 op_data
->op_fid1
= sbi
->ll_root_fid
;
469 op_data
->op_mode
= 0;
470 op_data
->op_valid
= valid
;
472 err
= md_getattr(sbi
->ll_md_exp
, op_data
, &request
);
475 CERROR("%s: md_getattr failed for root: rc = %d\n",
476 sbi
->ll_md_exp
->exp_obd
->obd_name
, err
);
480 err
= md_get_lustre_md(sbi
->ll_md_exp
, request
, sbi
->ll_dt_exp
,
481 sbi
->ll_md_exp
, &lmd
);
483 CERROR("failed to understand root inode md: rc = %d\n", err
);
484 ptlrpc_req_finished(request
);
488 LASSERT(fid_is_sane(&sbi
->ll_root_fid
));
489 root
= ll_iget(sb
, cl_fid_build_ino(&sbi
->ll_root_fid
,
490 sbi
->ll_flags
& LL_SBI_32BIT_API
),
492 md_free_lustre_md(sbi
->ll_md_exp
, &lmd
);
493 ptlrpc_req_finished(request
);
497 obd_free_memmd(sbi
->ll_dt_exp
, &lmd
.lsm
);
498 #ifdef CONFIG_FS_POSIX_ACL
500 posix_acl_release(lmd
.posix_acl
);
501 lmd
.posix_acl
= NULL
;
505 CERROR("lustre_lite: bad iget4 for root\n");
509 err
= ll_close_thread_start(&sbi
->ll_lcq
);
511 CERROR("cannot start close thread: rc %d\n", err
);
515 #ifdef CONFIG_FS_POSIX_ACL
516 if (sbi
->ll_flags
& LL_SBI_RMT_CLIENT
) {
517 rct_init(&sbi
->ll_rct
);
518 et_init(&sbi
->ll_et
);
522 checksum
= sbi
->ll_flags
& LL_SBI_CHECKSUM
;
523 err
= obd_set_info_async(NULL
, sbi
->ll_dt_exp
, sizeof(KEY_CHECKSUM
),
524 KEY_CHECKSUM
, sizeof(checksum
), &checksum
,
528 err
= obd_set_info_async(NULL
, sbi
->ll_dt_exp
, sizeof(KEY_CACHE_SET
),
529 KEY_CACHE_SET
, sizeof(sbi
->ll_cache
),
530 &sbi
->ll_cache
, NULL
);
532 sb
->s_root
= d_make_root(root
);
534 CERROR("%s: can't make root dentry\n",
535 ll_get_fsname(sb
, NULL
, 0));
540 sbi
->ll_sdev_orig
= sb
->s_dev
;
542 /* We set sb->s_dev equal on all lustre clients in order to support
543 * NFS export clustering. NFSD requires that the FSID be the same
546 /* s_dev is also used in lt_compare() to compare two fs, but that is
547 * only a node-local comparison.
549 uuid
= obd_get_uuid(sbi
->ll_md_exp
);
551 sb
->s_dev
= get_uuid2int(uuid
->uuid
, strlen(uuid
->uuid
));
552 get_uuid2fsid(uuid
->uuid
, strlen(uuid
->uuid
), &sbi
->ll_fsid
);
562 obd_fid_fini(sbi
->ll_dt_exp
->exp_obd
);
564 obd_disconnect(sbi
->ll_dt_exp
);
565 sbi
->ll_dt_exp
= NULL
;
566 /* Make sure all OScs are gone, since cl_cache is accessing sbi. */
567 obd_zombie_barrier();
569 obd_fid_fini(sbi
->ll_md_exp
->exp_obd
);
571 obd_disconnect(sbi
->ll_md_exp
);
572 sbi
->ll_md_exp
= NULL
;
576 ldebugfs_unregister_mountpoint(sbi
);
580 int ll_get_max_mdsize(struct ll_sb_info
*sbi
, int *lmmsize
)
584 *lmmsize
= obd_size_diskmd(sbi
->ll_dt_exp
, NULL
);
586 rc
= obd_get_info(NULL
, sbi
->ll_md_exp
, sizeof(KEY_MAX_EASIZE
),
587 KEY_MAX_EASIZE
, &size
, lmmsize
, NULL
);
589 CERROR("Get max mdsize error rc %d\n", rc
);
594 int ll_get_default_mdsize(struct ll_sb_info
*sbi
, int *lmmsize
)
599 rc
= obd_get_info(NULL
, sbi
->ll_md_exp
, sizeof(KEY_DEFAULT_EASIZE
),
600 KEY_DEFAULT_EASIZE
, &size
, lmmsize
, NULL
);
602 CERROR("Get default mdsize error rc %d\n", rc
);
607 static void client_common_put_super(struct super_block
*sb
)
609 struct ll_sb_info
*sbi
= ll_s2sbi(sb
);
611 #ifdef CONFIG_FS_POSIX_ACL
612 if (sbi
->ll_flags
& LL_SBI_RMT_CLIENT
) {
613 et_fini(&sbi
->ll_et
);
614 rct_fini(&sbi
->ll_rct
);
618 ll_close_thread_shutdown(sbi
->ll_lcq
);
622 obd_fid_fini(sbi
->ll_dt_exp
->exp_obd
);
623 obd_disconnect(sbi
->ll_dt_exp
);
624 sbi
->ll_dt_exp
= NULL
;
625 /* wait till all OSCs are gone, since cl_cache is accessing sbi.
628 obd_zombie_barrier();
630 ldebugfs_unregister_mountpoint(sbi
);
632 obd_fid_fini(sbi
->ll_md_exp
->exp_obd
);
633 obd_disconnect(sbi
->ll_md_exp
);
634 sbi
->ll_md_exp
= NULL
;
637 void ll_kill_super(struct super_block
*sb
)
639 struct ll_sb_info
*sbi
;
642 if (!(sb
->s_flags
& MS_ACTIVE
))
646 /* we need to restore s_dev from changed for clustered NFS before
647 * put_super because new kernels have cached s_dev and change sb->s_dev
648 * in put_super not affected real removing devices
651 sb
->s_dev
= sbi
->ll_sdev_orig
;
652 sbi
->ll_umounting
= 1;
656 static inline int ll_set_opt(const char *opt
, char *data
, int fl
)
658 if (strncmp(opt
, data
, strlen(opt
)) != 0)
664 /* non-client-specific mount options are parsed in lmd_parse */
665 static int ll_options(char *options
, int *flags
)
668 char *s1
= options
, *s2
;
673 CDEBUG(D_CONFIG
, "Parsing opts %s\n", options
);
676 CDEBUG(D_SUPER
, "next opt=%s\n", s1
);
677 tmp
= ll_set_opt("nolock", s1
, LL_SBI_NOLCK
);
682 tmp
= ll_set_opt("flock", s1
, LL_SBI_FLOCK
);
687 tmp
= ll_set_opt("localflock", s1
, LL_SBI_LOCALFLOCK
);
692 tmp
= ll_set_opt("noflock", s1
, LL_SBI_FLOCK
|LL_SBI_LOCALFLOCK
);
697 tmp
= ll_set_opt("user_xattr", s1
, LL_SBI_USER_XATTR
);
702 tmp
= ll_set_opt("nouser_xattr", s1
, LL_SBI_USER_XATTR
);
707 tmp
= ll_set_opt("remote_client", s1
, LL_SBI_RMT_CLIENT
);
712 tmp
= ll_set_opt("user_fid2path", s1
, LL_SBI_USER_FID2PATH
);
717 tmp
= ll_set_opt("nouser_fid2path", s1
, LL_SBI_USER_FID2PATH
);
723 tmp
= ll_set_opt("checksum", s1
, LL_SBI_CHECKSUM
);
728 tmp
= ll_set_opt("nochecksum", s1
, LL_SBI_CHECKSUM
);
733 tmp
= ll_set_opt("lruresize", s1
, LL_SBI_LRU_RESIZE
);
738 tmp
= ll_set_opt("nolruresize", s1
, LL_SBI_LRU_RESIZE
);
743 tmp
= ll_set_opt("lazystatfs", s1
, LL_SBI_LAZYSTATFS
);
748 tmp
= ll_set_opt("nolazystatfs", s1
, LL_SBI_LAZYSTATFS
);
753 tmp
= ll_set_opt("som_preview", s1
, LL_SBI_SOM_PREVIEW
);
758 tmp
= ll_set_opt("32bitapi", s1
, LL_SBI_32BIT_API
);
763 tmp
= ll_set_opt("verbose", s1
, LL_SBI_VERBOSE
);
768 tmp
= ll_set_opt("noverbose", s1
, LL_SBI_VERBOSE
);
773 LCONSOLE_ERROR_MSG(0x152, "Unknown option '%s', won't mount.\n",
779 s2
= strchr(s1
, ',');
787 void ll_lli_init(struct ll_inode_info
*lli
)
789 lli
->lli_inode_magic
= LLI_INODE_MAGIC
;
791 lli
->lli_ioepoch
= 0;
792 lli
->lli_maxbytes
= MAX_LFS_FILESIZE
;
793 spin_lock_init(&lli
->lli_lock
);
794 lli
->lli_posix_acl
= NULL
;
795 lli
->lli_remote_perms
= NULL
;
796 mutex_init(&lli
->lli_rmtperm_mutex
);
797 /* Do not set lli_fid, it has been initialized already. */
798 fid_zero(&lli
->lli_pfid
);
799 INIT_LIST_HEAD(&lli
->lli_close_list
);
800 lli
->lli_rmtperm_time
= 0;
801 lli
->lli_pending_och
= NULL
;
802 lli
->lli_mds_read_och
= NULL
;
803 lli
->lli_mds_write_och
= NULL
;
804 lli
->lli_mds_exec_och
= NULL
;
805 lli
->lli_open_fd_read_count
= 0;
806 lli
->lli_open_fd_write_count
= 0;
807 lli
->lli_open_fd_exec_count
= 0;
808 mutex_init(&lli
->lli_och_mutex
);
809 spin_lock_init(&lli
->lli_agl_lock
);
810 lli
->lli_has_smd
= false;
811 spin_lock_init(&lli
->lli_layout_lock
);
812 ll_layout_version_set(lli
, LL_LAYOUT_GEN_NONE
);
813 lli
->lli_clob
= NULL
;
815 init_rwsem(&lli
->lli_xattrs_list_rwsem
);
816 mutex_init(&lli
->lli_xattrs_enq_lock
);
818 LASSERT(lli
->lli_vfs_inode
.i_mode
!= 0);
819 if (S_ISDIR(lli
->lli_vfs_inode
.i_mode
)) {
820 mutex_init(&lli
->lli_readdir_mutex
);
821 lli
->lli_opendir_key
= NULL
;
823 spin_lock_init(&lli
->lli_sa_lock
);
824 lli
->lli_opendir_pid
= 0;
826 mutex_init(&lli
->lli_size_mutex
);
827 lli
->lli_symlink_name
= NULL
;
828 init_rwsem(&lli
->lli_trunc_sem
);
829 mutex_init(&lli
->lli_write_mutex
);
830 init_rwsem(&lli
->lli_glimpse_sem
);
831 lli
->lli_glimpse_time
= 0;
832 INIT_LIST_HEAD(&lli
->lli_agl_list
);
833 lli
->lli_agl_index
= 0;
834 lli
->lli_async_rc
= 0;
836 mutex_init(&lli
->lli_layout_mutex
);
839 static inline int ll_bdi_register(struct backing_dev_info
*bdi
)
841 static atomic_t ll_bdi_num
= ATOMIC_INIT(0);
843 bdi
->name
= "lustre";
844 return bdi_register(bdi
, NULL
, "lustre-%d",
845 atomic_inc_return(&ll_bdi_num
));
848 int ll_fill_super(struct super_block
*sb
, struct vfsmount
*mnt
)
850 struct lustre_profile
*lprof
= NULL
;
851 struct lustre_sb_info
*lsi
= s2lsi(sb
);
852 struct ll_sb_info
*sbi
;
853 char *dt
= NULL
, *md
= NULL
;
854 char *profilenm
= get_profile_name(sb
);
855 struct config_llog_instance
*cfg
;
858 CDEBUG(D_VFSTRACE
, "VFS Op: sb %p\n", sb
);
860 cfg
= kzalloc(sizeof(*cfg
), GFP_NOFS
);
864 try_module_get(THIS_MODULE
);
866 /* client additional sb info */
867 lsi
->lsi_llsbi
= sbi
= ll_init_sbi(sb
);
869 module_put(THIS_MODULE
);
874 err
= ll_options(lsi
->lsi_lmd
->lmd_opts
, &sbi
->ll_flags
);
878 err
= bdi_init(&lsi
->lsi_bdi
);
881 lsi
->lsi_flags
|= LSI_BDI_INITIALIZED
;
882 lsi
->lsi_bdi
.capabilities
= 0;
883 err
= ll_bdi_register(&lsi
->lsi_bdi
);
887 sb
->s_bdi
= &lsi
->lsi_bdi
;
888 /* kernel >= 2.6.38 store dentry operations in sb->s_d_op. */
889 sb
->s_d_op
= &ll_d_ops
;
891 /* Generate a string unique to this super, in case some joker tries
892 * to mount the same fs at two mount points.
893 * Use the address of the super itself.
895 cfg
->cfg_instance
= sb
;
896 cfg
->cfg_uuid
= lsi
->lsi_llsbi
->ll_sb_uuid
;
897 cfg
->cfg_callback
= class_config_llog_handler
;
898 /* set up client obds */
899 err
= lustre_process_log(sb
, profilenm
, cfg
);
901 CERROR("Unable to process log: %d\n", err
);
905 /* Profile set with LCFG_MOUNTOPT so we can find our mdc and osc obds */
906 lprof
= class_get_profile(profilenm
);
908 LCONSOLE_ERROR_MSG(0x156, "The client profile '%s' could not be read from the MGS. Does that filesystem exist?\n",
913 CDEBUG(D_CONFIG
, "Found profile %s: mdc=%s osc=%s\n", profilenm
,
914 lprof
->lp_md
, lprof
->lp_dt
);
916 dt
= kasprintf(GFP_NOFS
, "%s-%p", lprof
->lp_dt
, cfg
->cfg_instance
);
922 md
= kasprintf(GFP_NOFS
, "%s-%p", lprof
->lp_md
, cfg
->cfg_instance
);
928 /* connections, registrations, sb setup */
929 err
= client_common_fill_super(sb
, md
, dt
, mnt
);
936 else if (sbi
->ll_flags
& LL_SBI_VERBOSE
)
937 LCONSOLE_WARN("Mounted %s\n", profilenm
);
941 } /* ll_fill_super */
943 void ll_put_super(struct super_block
*sb
)
945 struct config_llog_instance cfg
, params_cfg
;
946 struct obd_device
*obd
;
947 struct lustre_sb_info
*lsi
= s2lsi(sb
);
948 struct ll_sb_info
*sbi
= ll_s2sbi(sb
);
949 char *profilenm
= get_profile_name(sb
);
952 CDEBUG(D_VFSTRACE
, "VFS Op: sb %p - %s\n", sb
, profilenm
);
954 cfg
.cfg_instance
= sb
;
955 lustre_end_log(sb
, profilenm
, &cfg
);
957 params_cfg
.cfg_instance
= sb
;
958 lustre_end_log(sb
, PARAMS_FILENAME
, ¶ms_cfg
);
960 if (sbi
->ll_md_exp
) {
961 obd
= class_exp2obd(sbi
->ll_md_exp
);
963 force
= obd
->obd_force
;
966 /* We need to set force before the lov_disconnect in
967 * lustre_common_put_super, since l_d cleans up osc's as well.
971 while ((obd
= class_devices_in_group(&sbi
->ll_sb_uuid
,
973 obd
->obd_force
= force
;
978 /* Only if client_common_fill_super succeeded */
979 client_common_put_super(sb
);
983 while ((obd
= class_devices_in_group(&sbi
->ll_sb_uuid
, &next
)))
984 class_manual_cleanup(obd
);
986 if (sbi
->ll_flags
& LL_SBI_VERBOSE
)
987 LCONSOLE_WARN("Unmounted %s\n", profilenm
? profilenm
: "");
990 class_del_profile(profilenm
);
992 if (lsi
->lsi_flags
& LSI_BDI_INITIALIZED
) {
993 bdi_destroy(&lsi
->lsi_bdi
);
994 lsi
->lsi_flags
&= ~LSI_BDI_INITIALIZED
;
998 lsi
->lsi_llsbi
= NULL
;
1000 lustre_common_put_super(sb
);
1002 module_put(THIS_MODULE
);
1003 } /* client_put_super */
1005 struct inode
*ll_inode_from_resource_lock(struct ldlm_lock
*lock
)
1007 struct inode
*inode
= NULL
;
1009 /* NOTE: we depend on atomic igrab() -bzzz */
1010 lock_res_and_lock(lock
);
1011 if (lock
->l_resource
->lr_lvb_inode
) {
1012 struct ll_inode_info
*lli
;
1014 lli
= ll_i2info(lock
->l_resource
->lr_lvb_inode
);
1015 if (lli
->lli_inode_magic
== LLI_INODE_MAGIC
) {
1016 inode
= igrab(lock
->l_resource
->lr_lvb_inode
);
1018 inode
= lock
->l_resource
->lr_lvb_inode
;
1019 LDLM_DEBUG_LIMIT(inode
->i_state
& I_FREEING
? D_INFO
:
1020 D_WARNING
, lock
, "lr_lvb_inode %p is bogus: magic %08x",
1021 lock
->l_resource
->lr_lvb_inode
,
1022 lli
->lli_inode_magic
);
1026 unlock_res_and_lock(lock
);
1030 void ll_clear_inode(struct inode
*inode
)
1032 struct ll_inode_info
*lli
= ll_i2info(inode
);
1033 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
1035 CDEBUG(D_VFSTRACE
, "VFS Op:inode=%lu/%u(%p)\n", inode
->i_ino
,
1036 inode
->i_generation
, inode
);
1038 if (S_ISDIR(inode
->i_mode
)) {
1039 /* these should have been cleared in ll_file_release */
1040 LASSERT(!lli
->lli_opendir_key
);
1041 LASSERT(!lli
->lli_sai
);
1042 LASSERT(lli
->lli_opendir_pid
== 0);
1045 spin_lock(&lli
->lli_lock
);
1046 ll_i2info(inode
)->lli_flags
&= ~LLIF_MDS_SIZE_LOCK
;
1047 spin_unlock(&lli
->lli_lock
);
1048 md_null_inode(sbi
->ll_md_exp
, ll_inode2fid(inode
));
1050 LASSERT(!lli
->lli_open_fd_write_count
);
1051 LASSERT(!lli
->lli_open_fd_read_count
);
1052 LASSERT(!lli
->lli_open_fd_exec_count
);
1054 if (lli
->lli_mds_write_och
)
1055 ll_md_real_close(inode
, FMODE_WRITE
);
1056 if (lli
->lli_mds_exec_och
)
1057 ll_md_real_close(inode
, FMODE_EXEC
);
1058 if (lli
->lli_mds_read_och
)
1059 ll_md_real_close(inode
, FMODE_READ
);
1061 if (S_ISLNK(inode
->i_mode
)) {
1062 kfree(lli
->lli_symlink_name
);
1063 lli
->lli_symlink_name
= NULL
;
1066 ll_xattr_cache_destroy(inode
);
1068 if (sbi
->ll_flags
& LL_SBI_RMT_CLIENT
) {
1069 LASSERT(!lli
->lli_posix_acl
);
1070 if (lli
->lli_remote_perms
) {
1071 free_rmtperm_hash(lli
->lli_remote_perms
);
1072 lli
->lli_remote_perms
= NULL
;
1075 #ifdef CONFIG_FS_POSIX_ACL
1076 else if (lli
->lli_posix_acl
) {
1077 LASSERT(atomic_read(&lli
->lli_posix_acl
->a_refcount
) == 1);
1078 LASSERT(!lli
->lli_remote_perms
);
1079 posix_acl_release(lli
->lli_posix_acl
);
1080 lli
->lli_posix_acl
= NULL
;
1083 lli
->lli_inode_magic
= LLI_INODE_DEAD
;
1085 if (!S_ISDIR(inode
->i_mode
))
1086 LASSERT(list_empty(&lli
->lli_agl_list
));
1089 * XXX This has to be done before lsm is freed below, because
1090 * cl_object still uses inode lsm.
1092 cl_inode_fini(inode
);
1093 lli
->lli_has_smd
= false;
1096 #define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)
1098 static int ll_md_setattr(struct dentry
*dentry
, struct md_op_data
*op_data
,
1099 struct md_open_data
**mod
)
1101 struct lustre_md md
;
1102 struct inode
*inode
= d_inode(dentry
);
1103 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
1104 struct ptlrpc_request
*request
= NULL
;
1107 op_data
= ll_prep_md_op_data(op_data
, inode
, NULL
, NULL
, 0, 0,
1108 LUSTRE_OPC_ANY
, NULL
);
1109 if (IS_ERR(op_data
))
1110 return PTR_ERR(op_data
);
1112 rc
= md_setattr(sbi
->ll_md_exp
, op_data
, NULL
, 0, NULL
, 0,
1115 ptlrpc_req_finished(request
);
1116 if (rc
== -ENOENT
) {
1118 /* Unlinked special device node? Or just a race?
1119 * Pretend we did everything.
1121 if (!S_ISREG(inode
->i_mode
) &&
1122 !S_ISDIR(inode
->i_mode
)) {
1123 ia_valid
= op_data
->op_attr
.ia_valid
;
1124 op_data
->op_attr
.ia_valid
&= ~TIMES_SET_FLAGS
;
1125 rc
= simple_setattr(dentry
, &op_data
->op_attr
);
1126 op_data
->op_attr
.ia_valid
= ia_valid
;
1128 } else if (rc
!= -EPERM
&& rc
!= -EACCES
&& rc
!= -ETXTBSY
) {
1129 CERROR("md_setattr fails: rc = %d\n", rc
);
1134 rc
= md_get_lustre_md(sbi
->ll_md_exp
, request
, sbi
->ll_dt_exp
,
1135 sbi
->ll_md_exp
, &md
);
1137 ptlrpc_req_finished(request
);
1141 ia_valid
= op_data
->op_attr
.ia_valid
;
1142 /* inode size will be in cl_setattr_ost, can't do it now since dirty
1143 * cache is not cleared yet.
1145 op_data
->op_attr
.ia_valid
&= ~(TIMES_SET_FLAGS
| ATTR_SIZE
);
1146 rc
= simple_setattr(dentry
, &op_data
->op_attr
);
1147 op_data
->op_attr
.ia_valid
= ia_valid
;
1149 /* Extract epoch data if obtained. */
1150 op_data
->op_handle
= md
.body
->handle
;
1151 op_data
->op_ioepoch
= md
.body
->ioepoch
;
1153 ll_update_inode(inode
, &md
);
1154 ptlrpc_req_finished(request
);
1159 /* Close IO epoch and send Size-on-MDS attribute update. */
1160 static int ll_setattr_done_writing(struct inode
*inode
,
1161 struct md_op_data
*op_data
,
1162 struct md_open_data
*mod
)
1164 struct ll_inode_info
*lli
= ll_i2info(inode
);
1167 if (!S_ISREG(inode
->i_mode
))
1170 CDEBUG(D_INODE
, "Epoch %llu closed on "DFID
" for truncate\n",
1171 op_data
->op_ioepoch
, PFID(&lli
->lli_fid
));
1173 op_data
->op_flags
= MF_EPOCH_CLOSE
;
1174 ll_done_writing_attr(inode
, op_data
);
1175 ll_pack_inode2opdata(inode
, op_data
, NULL
);
1177 rc
= md_done_writing(ll_i2sbi(inode
)->ll_md_exp
, op_data
, mod
);
1179 /* MDS has instructed us to obtain Size-on-MDS attribute
1180 * from OSTs and send setattr to back to MDS.
1182 rc
= ll_som_update(inode
, op_data
);
1184 CERROR("inode %lu mdc truncate failed: rc = %d\n",
1189 /* If this inode has objects allocated to it (lsm != NULL), then the OST
1190 * object(s) determine the file size and mtime. Otherwise, the MDS will
1191 * keep these values until such a time that objects are allocated for it.
1192 * We do the MDS operations first, as it is checking permissions for us.
1193 * We don't to the MDS RPC if there is nothing that we want to store there,
1194 * otherwise there is no harm in updating mtime/atime on the MDS if we are
1195 * going to do an RPC anyways.
1197 * If we are doing a truncate, we will send the mtime and ctime updates
1198 * to the OST with the punch RPC, otherwise we do an explicit setattr RPC.
1199 * I don't believe it is possible to get e.g. ATTR_MTIME_SET and ATTR_SIZE
1202 * In case of HSMimport, we only set attr on MDS.
1204 int ll_setattr_raw(struct dentry
*dentry
, struct iattr
*attr
, bool hsm_import
)
1206 struct inode
*inode
= d_inode(dentry
);
1207 struct ll_inode_info
*lli
= ll_i2info(inode
);
1208 struct md_op_data
*op_data
= NULL
;
1209 struct md_open_data
*mod
= NULL
;
1210 bool file_is_released
= false;
1211 int rc
= 0, rc1
= 0;
1214 "%s: setattr inode %p/fid:" DFID
1215 " from %llu to %llu, valid %x, hsm_import %d\n",
1216 ll_get_fsname(inode
->i_sb
, NULL
, 0), inode
,
1217 PFID(&lli
->lli_fid
), i_size_read(inode
), attr
->ia_size
,
1218 attr
->ia_valid
, hsm_import
);
1220 if (attr
->ia_valid
& ATTR_SIZE
) {
1221 /* Check new size against VFS/VM file size limit and rlimit */
1222 rc
= inode_newsize_ok(inode
, attr
->ia_size
);
1226 /* The maximum Lustre file size is variable, based on the
1227 * OST maximum object size and number of stripes. This
1228 * needs another check in addition to the VFS check above.
1230 if (attr
->ia_size
> ll_file_maxbytes(inode
)) {
1231 CDEBUG(D_INODE
, "file "DFID
" too large %llu > %llu\n",
1232 PFID(&lli
->lli_fid
), attr
->ia_size
,
1233 ll_file_maxbytes(inode
));
1237 attr
->ia_valid
|= ATTR_MTIME
| ATTR_CTIME
;
1240 /* POSIX: check before ATTR_*TIME_SET set (from inode_change_ok) */
1241 if (attr
->ia_valid
& TIMES_SET_FLAGS
) {
1242 if ((!uid_eq(current_fsuid(), inode
->i_uid
)) &&
1243 !capable(CFS_CAP_FOWNER
))
1247 /* We mark all of the fields "set" so MDS/OST does not re-set them */
1248 if (attr
->ia_valid
& ATTR_CTIME
) {
1249 attr
->ia_ctime
= CURRENT_TIME
;
1250 attr
->ia_valid
|= ATTR_CTIME_SET
;
1252 if (!(attr
->ia_valid
& ATTR_ATIME_SET
) &&
1253 (attr
->ia_valid
& ATTR_ATIME
)) {
1254 attr
->ia_atime
= CURRENT_TIME
;
1255 attr
->ia_valid
|= ATTR_ATIME_SET
;
1257 if (!(attr
->ia_valid
& ATTR_MTIME_SET
) &&
1258 (attr
->ia_valid
& ATTR_MTIME
)) {
1259 attr
->ia_mtime
= CURRENT_TIME
;
1260 attr
->ia_valid
|= ATTR_MTIME_SET
;
1263 if (attr
->ia_valid
& (ATTR_MTIME
| ATTR_CTIME
))
1264 CDEBUG(D_INODE
, "setting mtime %lu, ctime %lu, now = %llu\n",
1265 LTIME_S(attr
->ia_mtime
), LTIME_S(attr
->ia_ctime
),
1266 (s64
)ktime_get_real_seconds());
1268 /* If we are changing file size, file content is modified, flag it. */
1269 if (attr
->ia_valid
& ATTR_SIZE
) {
1270 attr
->ia_valid
|= MDS_OPEN_OWNEROVERRIDE
;
1271 spin_lock(&lli
->lli_lock
);
1272 lli
->lli_flags
|= LLIF_DATA_MODIFIED
;
1273 spin_unlock(&lli
->lli_lock
);
1276 /* We always do an MDS RPC, even if we're only changing the size;
1277 * only the MDS knows whether truncate() should fail with -ETXTBUSY
1280 op_data
= kzalloc(sizeof(*op_data
), GFP_NOFS
);
1284 if (!S_ISDIR(inode
->i_mode
))
1285 inode_unlock(inode
);
1287 memcpy(&op_data
->op_attr
, attr
, sizeof(*attr
));
1289 /* Open epoch for truncate. */
1290 if (exp_connect_som(ll_i2mdexp(inode
)) &&
1291 (attr
->ia_valid
& (ATTR_SIZE
| ATTR_MTIME
| ATTR_MTIME_SET
)))
1292 op_data
->op_flags
= MF_EPOCH_OPEN
;
1294 /* truncate on a released file must failed with -ENODATA,
1295 * so size must not be set on MDS for released file
1296 * but other attributes must be set
1298 if (S_ISREG(inode
->i_mode
)) {
1299 struct lov_stripe_md
*lsm
;
1302 ll_layout_refresh(inode
, &gen
);
1303 lsm
= ccc_inode_lsm_get(inode
);
1304 if (lsm
&& lsm
->lsm_pattern
& LOV_PATTERN_F_RELEASED
)
1305 file_is_released
= true;
1306 ccc_inode_lsm_put(inode
, lsm
);
1309 /* if not in HSM import mode, clear size attr for released file
1310 * we clear the attribute send to MDT in op_data, not the original
1311 * received from caller in attr which is used later to
1312 * decide return code
1314 if (file_is_released
&& (attr
->ia_valid
& ATTR_SIZE
) && !hsm_import
)
1315 op_data
->op_attr
.ia_valid
&= ~ATTR_SIZE
;
1317 rc
= ll_md_setattr(dentry
, op_data
, &mod
);
1321 /* truncate failed (only when non HSM import), others succeed */
1322 if (file_is_released
) {
1323 if ((attr
->ia_valid
& ATTR_SIZE
) && !hsm_import
)
1330 /* RPC to MDT is sent, cancel data modification flag */
1331 if (op_data
->op_bias
& MDS_DATA_MODIFIED
) {
1332 spin_lock(&lli
->lli_lock
);
1333 lli
->lli_flags
&= ~LLIF_DATA_MODIFIED
;
1334 spin_unlock(&lli
->lli_lock
);
1337 ll_ioepoch_open(lli
, op_data
->op_ioepoch
);
1338 if (!S_ISREG(inode
->i_mode
)) {
1343 if (attr
->ia_valid
& (ATTR_SIZE
|
1344 ATTR_ATIME
| ATTR_ATIME_SET
|
1345 ATTR_MTIME
| ATTR_MTIME_SET
)) {
1346 /* For truncate and utimes sending attributes to OSTs, setting
1347 * mtime/atime to the past will be performed under PW [0:EOF]
1348 * extent lock (new_size:EOF for truncate). It may seem
1349 * excessive to send mtime/atime updates to OSTs when not
1350 * setting times to past, but it is necessary due to possible
1351 * time de-synchronization between MDT inode and OST objects
1353 if (attr
->ia_valid
& ATTR_SIZE
)
1354 down_write(&lli
->lli_trunc_sem
);
1355 rc
= cl_setattr_ost(inode
, attr
);
1356 if (attr
->ia_valid
& ATTR_SIZE
)
1357 up_write(&lli
->lli_trunc_sem
);
1360 if (op_data
->op_ioepoch
) {
1361 rc1
= ll_setattr_done_writing(inode
, op_data
, mod
);
1365 ll_finish_md_op_data(op_data
);
1367 if (!S_ISDIR(inode
->i_mode
)) {
1369 if ((attr
->ia_valid
& ATTR_SIZE
) && !hsm_import
)
1370 inode_dio_wait(inode
);
1373 ll_stats_ops_tally(ll_i2sbi(inode
), (attr
->ia_valid
& ATTR_SIZE
) ?
1374 LPROC_LL_TRUNC
: LPROC_LL_SETATTR
, 1);
1379 int ll_setattr(struct dentry
*de
, struct iattr
*attr
)
1381 int mode
= d_inode(de
)->i_mode
;
1383 if ((attr
->ia_valid
& (ATTR_CTIME
|ATTR_SIZE
|ATTR_MODE
)) ==
1384 (ATTR_CTIME
|ATTR_SIZE
|ATTR_MODE
))
1385 attr
->ia_valid
|= MDS_OPEN_OWNEROVERRIDE
;
1387 if (((attr
->ia_valid
& (ATTR_MODE
|ATTR_FORCE
|ATTR_SIZE
)) ==
1388 (ATTR_SIZE
|ATTR_MODE
)) &&
1389 (((mode
& S_ISUID
) && !(attr
->ia_mode
& S_ISUID
)) ||
1390 (((mode
& (S_ISGID
|S_IXGRP
)) == (S_ISGID
|S_IXGRP
)) &&
1391 !(attr
->ia_mode
& S_ISGID
))))
1392 attr
->ia_valid
|= ATTR_FORCE
;
1394 if ((attr
->ia_valid
& ATTR_MODE
) &&
1396 !(attr
->ia_mode
& S_ISUID
) &&
1397 !(attr
->ia_valid
& ATTR_KILL_SUID
))
1398 attr
->ia_valid
|= ATTR_KILL_SUID
;
1400 if ((attr
->ia_valid
& ATTR_MODE
) &&
1401 ((mode
& (S_ISGID
|S_IXGRP
)) == (S_ISGID
|S_IXGRP
)) &&
1402 !(attr
->ia_mode
& S_ISGID
) &&
1403 !(attr
->ia_valid
& ATTR_KILL_SGID
))
1404 attr
->ia_valid
|= ATTR_KILL_SGID
;
1406 return ll_setattr_raw(de
, attr
, false);
1409 int ll_statfs_internal(struct super_block
*sb
, struct obd_statfs
*osfs
,
1410 __u64 max_age
, __u32 flags
)
1412 struct ll_sb_info
*sbi
= ll_s2sbi(sb
);
1413 struct obd_statfs obd_osfs
;
1416 rc
= obd_statfs(NULL
, sbi
->ll_md_exp
, osfs
, max_age
, flags
);
1418 CERROR("md_statfs fails: rc = %d\n", rc
);
1422 osfs
->os_type
= sb
->s_magic
;
1424 CDEBUG(D_SUPER
, "MDC blocks %llu/%llu objects %llu/%llu\n",
1425 osfs
->os_bavail
, osfs
->os_blocks
, osfs
->os_ffree
,
1428 if (sbi
->ll_flags
& LL_SBI_LAZYSTATFS
)
1429 flags
|= OBD_STATFS_NODELAY
;
1431 rc
= obd_statfs_rqset(sbi
->ll_dt_exp
, &obd_osfs
, max_age
, flags
);
1433 CERROR("obd_statfs fails: rc = %d\n", rc
);
1437 CDEBUG(D_SUPER
, "OSC blocks %llu/%llu objects %llu/%llu\n",
1438 obd_osfs
.os_bavail
, obd_osfs
.os_blocks
, obd_osfs
.os_ffree
,
1441 osfs
->os_bsize
= obd_osfs
.os_bsize
;
1442 osfs
->os_blocks
= obd_osfs
.os_blocks
;
1443 osfs
->os_bfree
= obd_osfs
.os_bfree
;
1444 osfs
->os_bavail
= obd_osfs
.os_bavail
;
1446 /* If we don't have as many objects free on the OST as inodes
1447 * on the MDS, we reduce the total number of inodes to
1448 * compensate, so that the "inodes in use" number is correct.
1450 if (obd_osfs
.os_ffree
< osfs
->os_ffree
) {
1451 osfs
->os_files
= (osfs
->os_files
- osfs
->os_ffree
) +
1453 osfs
->os_ffree
= obd_osfs
.os_ffree
;
1459 int ll_statfs(struct dentry
*de
, struct kstatfs
*sfs
)
1461 struct super_block
*sb
= de
->d_sb
;
1462 struct obd_statfs osfs
;
1465 CDEBUG(D_VFSTRACE
, "VFS Op: at %llu jiffies\n", get_jiffies_64());
1466 ll_stats_ops_tally(ll_s2sbi(sb
), LPROC_LL_STAFS
, 1);
1468 /* Some amount of caching on the client is allowed */
1469 rc
= ll_statfs_internal(sb
, &osfs
,
1470 cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS
),
1475 statfs_unpack(sfs
, &osfs
);
1477 /* We need to downshift for all 32-bit kernels, because we can't
1478 * tell if the kernel is being called via sys_statfs64() or not.
1479 * Stop before overflowing f_bsize - in which case it is better
1480 * to just risk EOVERFLOW if caller is using old sys_statfs().
1482 if (sizeof(long) < 8) {
1483 while (osfs
.os_blocks
> ~0UL && sfs
->f_bsize
< 0x40000000) {
1486 osfs
.os_blocks
>>= 1;
1487 osfs
.os_bfree
>>= 1;
1488 osfs
.os_bavail
>>= 1;
1492 sfs
->f_blocks
= osfs
.os_blocks
;
1493 sfs
->f_bfree
= osfs
.os_bfree
;
1494 sfs
->f_bavail
= osfs
.os_bavail
;
1495 sfs
->f_fsid
= ll_s2sbi(sb
)->ll_fsid
;
1499 void ll_inode_size_lock(struct inode
*inode
)
1501 struct ll_inode_info
*lli
;
1503 LASSERT(!S_ISDIR(inode
->i_mode
));
1505 lli
= ll_i2info(inode
);
1506 mutex_lock(&lli
->lli_size_mutex
);
1509 void ll_inode_size_unlock(struct inode
*inode
)
1511 struct ll_inode_info
*lli
;
1513 lli
= ll_i2info(inode
);
1514 mutex_unlock(&lli
->lli_size_mutex
);
1517 void ll_update_inode(struct inode
*inode
, struct lustre_md
*md
)
1519 struct ll_inode_info
*lli
= ll_i2info(inode
);
1520 struct mdt_body
*body
= md
->body
;
1521 struct lov_stripe_md
*lsm
= md
->lsm
;
1522 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
1524 LASSERT((lsm
!= NULL
) == ((body
->valid
& OBD_MD_FLEASIZE
) != 0));
1526 if (!lli
->lli_has_smd
&&
1527 !(sbi
->ll_flags
& LL_SBI_LAYOUT_LOCK
))
1528 cl_file_inode_init(inode
, md
);
1530 lli
->lli_maxbytes
= lsm
->lsm_maxbytes
;
1531 if (lli
->lli_maxbytes
> MAX_LFS_FILESIZE
)
1532 lli
->lli_maxbytes
= MAX_LFS_FILESIZE
;
1535 if (sbi
->ll_flags
& LL_SBI_RMT_CLIENT
) {
1536 if (body
->valid
& OBD_MD_FLRMTPERM
)
1537 ll_update_remote_perm(inode
, md
->remote_perm
);
1539 #ifdef CONFIG_FS_POSIX_ACL
1540 else if (body
->valid
& OBD_MD_FLACL
) {
1541 spin_lock(&lli
->lli_lock
);
1542 if (lli
->lli_posix_acl
)
1543 posix_acl_release(lli
->lli_posix_acl
);
1544 lli
->lli_posix_acl
= md
->posix_acl
;
1545 spin_unlock(&lli
->lli_lock
);
1548 inode
->i_ino
= cl_fid_build_ino(&body
->fid1
,
1549 sbi
->ll_flags
& LL_SBI_32BIT_API
);
1550 inode
->i_generation
= cl_fid_build_gen(&body
->fid1
);
1552 if (body
->valid
& OBD_MD_FLATIME
) {
1553 if (body
->atime
> LTIME_S(inode
->i_atime
))
1554 LTIME_S(inode
->i_atime
) = body
->atime
;
1555 lli
->lli_lvb
.lvb_atime
= body
->atime
;
1557 if (body
->valid
& OBD_MD_FLMTIME
) {
1558 if (body
->mtime
> LTIME_S(inode
->i_mtime
)) {
1559 CDEBUG(D_INODE
, "setting ino %lu mtime from %lu to %llu\n",
1560 inode
->i_ino
, LTIME_S(inode
->i_mtime
),
1562 LTIME_S(inode
->i_mtime
) = body
->mtime
;
1564 lli
->lli_lvb
.lvb_mtime
= body
->mtime
;
1566 if (body
->valid
& OBD_MD_FLCTIME
) {
1567 if (body
->ctime
> LTIME_S(inode
->i_ctime
))
1568 LTIME_S(inode
->i_ctime
) = body
->ctime
;
1569 lli
->lli_lvb
.lvb_ctime
= body
->ctime
;
1571 if (body
->valid
& OBD_MD_FLMODE
)
1572 inode
->i_mode
= (inode
->i_mode
& S_IFMT
)|(body
->mode
& ~S_IFMT
);
1573 if (body
->valid
& OBD_MD_FLTYPE
)
1574 inode
->i_mode
= (inode
->i_mode
& ~S_IFMT
)|(body
->mode
& S_IFMT
);
1575 LASSERT(inode
->i_mode
!= 0);
1576 if (S_ISREG(inode
->i_mode
))
1577 inode
->i_blkbits
= min(PTLRPC_MAX_BRW_BITS
+ 1,
1578 LL_MAX_BLKSIZE_BITS
);
1580 inode
->i_blkbits
= inode
->i_sb
->s_blocksize_bits
;
1581 if (body
->valid
& OBD_MD_FLUID
)
1582 inode
->i_uid
= make_kuid(&init_user_ns
, body
->uid
);
1583 if (body
->valid
& OBD_MD_FLGID
)
1584 inode
->i_gid
= make_kgid(&init_user_ns
, body
->gid
);
1585 if (body
->valid
& OBD_MD_FLFLAGS
)
1586 inode
->i_flags
= ll_ext_to_inode_flags(body
->flags
);
1587 if (body
->valid
& OBD_MD_FLNLINK
)
1588 set_nlink(inode
, body
->nlink
);
1589 if (body
->valid
& OBD_MD_FLRDEV
)
1590 inode
->i_rdev
= old_decode_dev(body
->rdev
);
1592 if (body
->valid
& OBD_MD_FLID
) {
1593 /* FID shouldn't be changed! */
1594 if (fid_is_sane(&lli
->lli_fid
)) {
1595 LASSERTF(lu_fid_eq(&lli
->lli_fid
, &body
->fid1
),
1596 "Trying to change FID "DFID
1597 " to the "DFID
", inode %lu/%u(%p)\n",
1598 PFID(&lli
->lli_fid
), PFID(&body
->fid1
),
1599 inode
->i_ino
, inode
->i_generation
, inode
);
1601 lli
->lli_fid
= body
->fid1
;
1604 LASSERT(fid_seq(&lli
->lli_fid
) != 0);
1606 if (body
->valid
& OBD_MD_FLSIZE
) {
1607 if (exp_connect_som(ll_i2mdexp(inode
)) &&
1608 S_ISREG(inode
->i_mode
)) {
1609 struct lustre_handle lockh
;
1610 enum ldlm_mode mode
;
1612 /* As it is possible a blocking ast has been processed
1613 * by this time, we need to check there is an UPDATE
1614 * lock on the client and set LLIF_MDS_SIZE_LOCK holding
1617 mode
= ll_take_md_lock(inode
, MDS_INODELOCK_UPDATE
,
1618 &lockh
, LDLM_FL_CBPENDING
,
1622 if (lli
->lli_flags
& (LLIF_DONE_WRITING
|
1623 LLIF_EPOCH_PENDING
|
1625 CERROR("ino %lu flags %u still has size authority! do not trust the size got from MDS\n",
1626 inode
->i_ino
, lli
->lli_flags
);
1628 /* Use old size assignment to avoid
1629 * deadlock bz14138 & bz14326
1631 i_size_write(inode
, body
->size
);
1632 spin_lock(&lli
->lli_lock
);
1633 lli
->lli_flags
|= LLIF_MDS_SIZE_LOCK
;
1634 spin_unlock(&lli
->lli_lock
);
1636 ldlm_lock_decref(&lockh
, mode
);
1639 /* Use old size assignment to avoid
1640 * deadlock bz14138 & bz14326
1642 i_size_write(inode
, body
->size
);
1644 CDEBUG(D_VFSTRACE
, "inode=%lu, updating i_size %llu\n",
1645 inode
->i_ino
, (unsigned long long)body
->size
);
1648 if (body
->valid
& OBD_MD_FLBLOCKS
)
1649 inode
->i_blocks
= body
->blocks
;
1652 if (body
->valid
& OBD_MD_TSTATE
) {
1653 if (body
->t_state
& MS_RESTORE
)
1654 lli
->lli_flags
|= LLIF_FILE_RESTORING
;
1658 void ll_read_inode2(struct inode
*inode
, void *opaque
)
1660 struct lustre_md
*md
= opaque
;
1661 struct ll_inode_info
*lli
= ll_i2info(inode
);
1663 CDEBUG(D_VFSTRACE
, "VFS Op:inode="DFID
"(%p)\n",
1664 PFID(&lli
->lli_fid
), inode
);
1666 LASSERT(!lli
->lli_has_smd
);
1668 /* Core attributes from the MDS first. This is a new inode, and
1669 * the VFS doesn't zero times in the core inode so we have to do
1670 * it ourselves. They will be overwritten by either MDS or OST
1671 * attributes - we just need to make sure they aren't newer.
1673 LTIME_S(inode
->i_mtime
) = 0;
1674 LTIME_S(inode
->i_atime
) = 0;
1675 LTIME_S(inode
->i_ctime
) = 0;
1677 ll_update_inode(inode
, md
);
1679 /* OIDEBUG(inode); */
1681 if (S_ISREG(inode
->i_mode
)) {
1682 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
1684 inode
->i_op
= &ll_file_inode_operations
;
1685 inode
->i_fop
= sbi
->ll_fop
;
1686 inode
->i_mapping
->a_ops
= (struct address_space_operations
*)&ll_aops
;
1687 } else if (S_ISDIR(inode
->i_mode
)) {
1688 inode
->i_op
= &ll_dir_inode_operations
;
1689 inode
->i_fop
= &ll_dir_operations
;
1690 } else if (S_ISLNK(inode
->i_mode
)) {
1691 inode
->i_op
= &ll_fast_symlink_inode_operations
;
1693 inode
->i_op
= &ll_special_inode_operations
;
1695 init_special_inode(inode
, inode
->i_mode
,
1700 void ll_delete_inode(struct inode
*inode
)
1702 struct cl_inode_info
*lli
= cl_i2info(inode
);
1704 if (S_ISREG(inode
->i_mode
) && lli
->lli_clob
)
1705 /* discard all dirty pages before truncating them, required by
1706 * osc_extent implementation at LU-1030.
1708 cl_sync_file_range(inode
, 0, OBD_OBJECT_EOF
,
1709 CL_FSYNC_DISCARD
, 1);
1711 truncate_inode_pages_final(&inode
->i_data
);
1713 /* Workaround for LU-118 */
1714 if (inode
->i_data
.nrpages
) {
1715 spin_lock_irq(&inode
->i_data
.tree_lock
);
1716 spin_unlock_irq(&inode
->i_data
.tree_lock
);
1717 LASSERTF(inode
->i_data
.nrpages
== 0,
1718 "inode=%lu/%u(%p) nrpages=%lu, see http://jira.whamcloud.com/browse/LU-118\n",
1719 inode
->i_ino
, inode
->i_generation
, inode
,
1720 inode
->i_data
.nrpages
);
1722 /* Workaround end */
1724 ll_clear_inode(inode
);
1728 int ll_iocontrol(struct inode
*inode
, struct file
*file
,
1729 unsigned int cmd
, unsigned long arg
)
1731 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
1732 struct ptlrpc_request
*req
= NULL
;
1736 case FSFILT_IOC_GETFLAGS
: {
1737 struct mdt_body
*body
;
1738 struct md_op_data
*op_data
;
1740 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, NULL
,
1741 0, 0, LUSTRE_OPC_ANY
,
1743 if (IS_ERR(op_data
))
1744 return PTR_ERR(op_data
);
1746 op_data
->op_valid
= OBD_MD_FLFLAGS
;
1747 rc
= md_getattr(sbi
->ll_md_exp
, op_data
, &req
);
1748 ll_finish_md_op_data(op_data
);
1750 CERROR("failure %d inode %lu\n", rc
, inode
->i_ino
);
1754 body
= req_capsule_server_get(&req
->rq_pill
, &RMF_MDT_BODY
);
1756 flags
= body
->flags
;
1758 ptlrpc_req_finished(req
);
1760 return put_user(flags
, (int __user
*)arg
);
1762 case FSFILT_IOC_SETFLAGS
: {
1763 struct lov_stripe_md
*lsm
;
1764 struct obd_info oinfo
= { };
1765 struct md_op_data
*op_data
;
1767 if (get_user(flags
, (int __user
*)arg
))
1770 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, NULL
, 0, 0,
1771 LUSTRE_OPC_ANY
, NULL
);
1772 if (IS_ERR(op_data
))
1773 return PTR_ERR(op_data
);
1775 ((struct ll_iattr
*)&op_data
->op_attr
)->ia_attr_flags
= flags
;
1776 op_data
->op_attr
.ia_valid
|= ATTR_ATTR_FLAG
;
1777 rc
= md_setattr(sbi
->ll_md_exp
, op_data
,
1778 NULL
, 0, NULL
, 0, &req
, NULL
);
1779 ll_finish_md_op_data(op_data
);
1780 ptlrpc_req_finished(req
);
1784 inode
->i_flags
= ll_ext_to_inode_flags(flags
);
1786 lsm
= ccc_inode_lsm_get(inode
);
1787 if (!lsm_has_objects(lsm
)) {
1788 ccc_inode_lsm_put(inode
, lsm
);
1792 oinfo
.oi_oa
= kmem_cache_zalloc(obdo_cachep
, GFP_NOFS
);
1794 ccc_inode_lsm_put(inode
, lsm
);
1798 oinfo
.oi_oa
->o_oi
= lsm
->lsm_oi
;
1799 oinfo
.oi_oa
->o_flags
= flags
;
1800 oinfo
.oi_oa
->o_valid
= OBD_MD_FLID
| OBD_MD_FLFLAGS
|
1802 obdo_set_parent_fid(oinfo
.oi_oa
, &ll_i2info(inode
)->lli_fid
);
1803 rc
= obd_setattr_rqset(sbi
->ll_dt_exp
, &oinfo
, NULL
);
1804 kmem_cache_free(obdo_cachep
, oinfo
.oi_oa
);
1805 ccc_inode_lsm_put(inode
, lsm
);
1807 if (rc
&& rc
!= -EPERM
&& rc
!= -EACCES
)
1808 CERROR("osc_setattr_async fails: rc = %d\n", rc
);
1819 int ll_flush_ctx(struct inode
*inode
)
1821 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
1823 CDEBUG(D_SEC
, "flush context for user %d\n",
1824 from_kuid(&init_user_ns
, current_uid()));
1826 obd_set_info_async(NULL
, sbi
->ll_md_exp
,
1827 sizeof(KEY_FLUSH_CTX
), KEY_FLUSH_CTX
,
1829 obd_set_info_async(NULL
, sbi
->ll_dt_exp
,
1830 sizeof(KEY_FLUSH_CTX
), KEY_FLUSH_CTX
,
1835 /* umount -f client means force down, don't save state */
1836 void ll_umount_begin(struct super_block
*sb
)
1838 struct ll_sb_info
*sbi
= ll_s2sbi(sb
);
1839 struct obd_device
*obd
;
1840 struct obd_ioctl_data
*ioc_data
;
1842 CDEBUG(D_VFSTRACE
, "VFS Op: superblock %p count %d active %d\n", sb
,
1843 sb
->s_count
, atomic_read(&sb
->s_active
));
1845 obd
= class_exp2obd(sbi
->ll_md_exp
);
1847 CERROR("Invalid MDC connection handle %#llx\n",
1848 sbi
->ll_md_exp
->exp_handle
.h_cookie
);
1853 obd
= class_exp2obd(sbi
->ll_dt_exp
);
1855 CERROR("Invalid LOV connection handle %#llx\n",
1856 sbi
->ll_dt_exp
->exp_handle
.h_cookie
);
1861 ioc_data
= kzalloc(sizeof(*ioc_data
), GFP_NOFS
);
1863 obd_iocontrol(IOC_OSC_SET_ACTIVE
, sbi
->ll_md_exp
,
1864 sizeof(*ioc_data
), ioc_data
, NULL
);
1866 obd_iocontrol(IOC_OSC_SET_ACTIVE
, sbi
->ll_dt_exp
,
1867 sizeof(*ioc_data
), ioc_data
, NULL
);
1872 /* Really, we'd like to wait until there are no requests outstanding,
1873 * and then continue. For now, we just invalidate the requests,
1874 * schedule() and sleep one second if needed, and hope.
1879 int ll_remount_fs(struct super_block
*sb
, int *flags
, char *data
)
1881 struct ll_sb_info
*sbi
= ll_s2sbi(sb
);
1882 char *profilenm
= get_profile_name(sb
);
1886 if ((*flags
& MS_RDONLY
) != (sb
->s_flags
& MS_RDONLY
)) {
1887 read_only
= *flags
& MS_RDONLY
;
1888 err
= obd_set_info_async(NULL
, sbi
->ll_md_exp
,
1889 sizeof(KEY_READ_ONLY
),
1890 KEY_READ_ONLY
, sizeof(read_only
),
1893 LCONSOLE_WARN("Failed to remount %s %s (%d)\n",
1894 profilenm
, read_only
?
1895 "read-only" : "read-write", err
);
1900 sb
->s_flags
|= MS_RDONLY
;
1902 sb
->s_flags
&= ~MS_RDONLY
;
1904 if (sbi
->ll_flags
& LL_SBI_VERBOSE
)
1905 LCONSOLE_WARN("Remounted %s %s\n", profilenm
,
1906 read_only
? "read-only" : "read-write");
1912 * Cleanup the open handle that is cached on MDT-side.
1914 * For open case, the client side open handling thread may hit error
1915 * after the MDT grant the open. Under such case, the client should
1916 * send close RPC to the MDT as cleanup; otherwise, the open handle
1917 * on the MDT will be leaked there until the client umount or evicted.
1919 * In further, if someone unlinked the file, because the open handle
1920 * holds the reference on such file/object, then it will block the
1921 * subsequent threads that want to locate such object via FID.
1923 * \param[in] sb super block for this file-system
1924 * \param[in] open_req pointer to the original open request
1926 void ll_open_cleanup(struct super_block
*sb
, struct ptlrpc_request
*open_req
)
1928 struct mdt_body
*body
;
1929 struct md_op_data
*op_data
;
1930 struct ptlrpc_request
*close_req
= NULL
;
1931 struct obd_export
*exp
= ll_s2sbi(sb
)->ll_md_exp
;
1933 body
= req_capsule_server_get(&open_req
->rq_pill
, &RMF_MDT_BODY
);
1934 op_data
= kzalloc(sizeof(*op_data
), GFP_NOFS
);
1938 op_data
->op_fid1
= body
->fid1
;
1939 op_data
->op_ioepoch
= body
->ioepoch
;
1940 op_data
->op_handle
= body
->handle
;
1941 op_data
->op_mod_time
= get_seconds();
1942 md_close(exp
, op_data
, NULL
, &close_req
);
1943 ptlrpc_req_finished(close_req
);
1944 ll_finish_md_op_data(op_data
);
1947 int ll_prep_inode(struct inode
**inode
, struct ptlrpc_request
*req
,
1948 struct super_block
*sb
, struct lookup_intent
*it
)
1950 struct ll_sb_info
*sbi
= NULL
;
1951 struct lustre_md md
= { NULL
};
1954 LASSERT(*inode
|| sb
);
1955 sbi
= sb
? ll_s2sbi(sb
) : ll_i2sbi(*inode
);
1956 rc
= md_get_lustre_md(sbi
->ll_md_exp
, req
, sbi
->ll_dt_exp
,
1957 sbi
->ll_md_exp
, &md
);
1962 ll_update_inode(*inode
, &md
);
1967 * At this point server returns to client's same fid as client
1968 * generated for creating. So using ->fid1 is okay here.
1970 LASSERT(fid_is_sane(&md
.body
->fid1
));
1972 *inode
= ll_iget(sb
, cl_fid_build_ino(&md
.body
->fid1
,
1973 sbi
->ll_flags
& LL_SBI_32BIT_API
),
1976 #ifdef CONFIG_FS_POSIX_ACL
1978 posix_acl_release(md
.posix_acl
);
1979 md
.posix_acl
= NULL
;
1983 CERROR("new_inode -fatal: rc %d\n", rc
);
1988 /* Handling piggyback layout lock.
1989 * Layout lock can be piggybacked by getattr and open request.
1990 * The lsm can be applied to inode only if it comes with a layout lock
1991 * otherwise correct layout may be overwritten, for example:
1992 * 1. proc1: mdt returns a lsm but not granting layout
1993 * 2. layout was changed by another client
1994 * 3. proc2: refresh layout and layout lock granted
1995 * 4. proc1: to apply a stale layout
1997 if (it
&& it
->d
.lustre
.it_lock_mode
!= 0) {
1998 struct lustre_handle lockh
;
1999 struct ldlm_lock
*lock
;
2001 lockh
.cookie
= it
->d
.lustre
.it_lock_handle
;
2002 lock
= ldlm_handle2lock(&lockh
);
2004 if (ldlm_has_layout(lock
)) {
2005 struct cl_object_conf conf
;
2007 memset(&conf
, 0, sizeof(conf
));
2008 conf
.coc_opc
= OBJECT_CONF_SET
;
2009 conf
.coc_inode
= *inode
;
2010 conf
.coc_lock
= lock
;
2011 conf
.u
.coc_md
= &md
;
2012 (void)ll_layout_conf(*inode
, &conf
);
2014 LDLM_LOCK_PUT(lock
);
2019 obd_free_memmd(sbi
->ll_dt_exp
, &md
.lsm
);
2020 md_free_lustre_md(sbi
->ll_md_exp
, &md
);
2023 if (rc
!= 0 && it
&& it
->it_op
& IT_OPEN
)
2024 ll_open_cleanup(sb
? sb
: (*inode
)->i_sb
, req
);
2029 int ll_obd_statfs(struct inode
*inode
, void __user
*arg
)
2031 struct ll_sb_info
*sbi
= NULL
;
2032 struct obd_export
*exp
;
2034 struct obd_ioctl_data
*data
= NULL
;
2043 sbi
= ll_i2sbi(inode
);
2049 rc
= obd_ioctl_getdata(&buf
, &len
, arg
);
2054 if (!data
->ioc_inlbuf1
|| !data
->ioc_inlbuf2
||
2055 !data
->ioc_pbuf1
|| !data
->ioc_pbuf2
) {
2060 if (data
->ioc_inllen1
!= sizeof(__u32
) ||
2061 data
->ioc_inllen2
!= sizeof(__u32
) ||
2062 data
->ioc_plen1
!= sizeof(struct obd_statfs
) ||
2063 data
->ioc_plen2
!= sizeof(struct obd_uuid
)) {
2068 memcpy(&type
, data
->ioc_inlbuf1
, sizeof(__u32
));
2069 if (type
& LL_STATFS_LMV
)
2070 exp
= sbi
->ll_md_exp
;
2071 else if (type
& LL_STATFS_LOV
)
2072 exp
= sbi
->ll_dt_exp
;
2078 rc
= obd_iocontrol(IOC_OBD_STATFS
, exp
, len
, buf
, NULL
);
2083 obd_ioctl_freedata(buf
, len
);
2087 int ll_process_config(struct lustre_cfg
*lcfg
)
2091 struct lprocfs_static_vars lvars
;
2095 lprocfs_llite_init_vars(&lvars
);
2097 /* The instance name contains the sb: lustre-client-aacfe000 */
2098 ptr
= strrchr(lustre_cfg_string(lcfg
, 0), '-');
2099 if (!ptr
|| !*(++ptr
))
2101 rc
= kstrtoul(ptr
, 16, &x
);
2105 /* This better be a real Lustre superblock! */
2106 LASSERT(s2lsi((struct super_block
*)sb
)->lsi_lmd
->lmd_magic
== LMD_MAGIC
);
2108 /* Note we have not called client_common_fill_super yet, so
2109 * proc fns must be able to handle that!
2111 rc
= class_process_proc_param(PARAM_LLITE
, lvars
.obd_vars
,
2118 /* this function prepares md_op_data hint for passing ot down to MD stack. */
2119 struct md_op_data
*ll_prep_md_op_data(struct md_op_data
*op_data
,
2120 struct inode
*i1
, struct inode
*i2
,
2121 const char *name
, int namelen
,
2122 int mode
, __u32 opc
, void *data
)
2124 if (namelen
> ll_i2sbi(i1
)->ll_namelen
)
2125 return ERR_PTR(-ENAMETOOLONG
);
2128 op_data
= kzalloc(sizeof(*op_data
), GFP_NOFS
);
2131 return ERR_PTR(-ENOMEM
);
2133 ll_i2gids(op_data
->op_suppgids
, i1
, i2
);
2134 op_data
->op_fid1
= *ll_inode2fid(i1
);
2137 op_data
->op_fid2
= *ll_inode2fid(i2
);
2139 fid_zero(&op_data
->op_fid2
);
2141 op_data
->op_name
= name
;
2142 op_data
->op_namelen
= namelen
;
2143 op_data
->op_mode
= mode
;
2144 op_data
->op_mod_time
= ktime_get_real_seconds();
2145 op_data
->op_fsuid
= from_kuid(&init_user_ns
, current_fsuid());
2146 op_data
->op_fsgid
= from_kgid(&init_user_ns
, current_fsgid());
2147 op_data
->op_cap
= cfs_curproc_cap_pack();
2148 op_data
->op_bias
= 0;
2149 op_data
->op_cli_flags
= 0;
2150 if ((opc
== LUSTRE_OPC_CREATE
) && name
&&
2151 filename_is_volatile(name
, namelen
, NULL
))
2152 op_data
->op_bias
|= MDS_CREATE_VOLATILE
;
2153 op_data
->op_opc
= opc
;
2154 op_data
->op_mds
= 0;
2155 op_data
->op_data
= data
;
2157 /* If the file is being opened after mknod() (normally due to NFS)
2158 * try to use the default stripe data from parent directory for
2159 * allocating OST objects. Try to pass the parent FID to MDS.
2161 if (opc
== LUSTRE_OPC_CREATE
&& i1
== i2
&& S_ISREG(i2
->i_mode
) &&
2162 !ll_i2info(i2
)->lli_has_smd
) {
2163 struct ll_inode_info
*lli
= ll_i2info(i2
);
2165 spin_lock(&lli
->lli_lock
);
2166 if (likely(!lli
->lli_has_smd
&& !fid_is_zero(&lli
->lli_pfid
)))
2167 op_data
->op_fid1
= lli
->lli_pfid
;
2168 spin_unlock(&lli
->lli_lock
);
2171 /* When called by ll_setattr_raw, file is i1. */
2172 if (ll_i2info(i1
)->lli_flags
& LLIF_DATA_MODIFIED
)
2173 op_data
->op_bias
|= MDS_DATA_MODIFIED
;
2178 void ll_finish_md_op_data(struct md_op_data
*op_data
)
2183 int ll_show_options(struct seq_file
*seq
, struct dentry
*dentry
)
2185 struct ll_sb_info
*sbi
;
2187 LASSERT(seq
&& dentry
);
2188 sbi
= ll_s2sbi(dentry
->d_sb
);
2190 if (sbi
->ll_flags
& LL_SBI_NOLCK
)
2191 seq_puts(seq
, ",nolock");
2193 if (sbi
->ll_flags
& LL_SBI_FLOCK
)
2194 seq_puts(seq
, ",flock");
2196 if (sbi
->ll_flags
& LL_SBI_LOCALFLOCK
)
2197 seq_puts(seq
, ",localflock");
2199 if (sbi
->ll_flags
& LL_SBI_USER_XATTR
)
2200 seq_puts(seq
, ",user_xattr");
2202 if (sbi
->ll_flags
& LL_SBI_LAZYSTATFS
)
2203 seq_puts(seq
, ",lazystatfs");
2205 if (sbi
->ll_flags
& LL_SBI_USER_FID2PATH
)
2206 seq_puts(seq
, ",user_fid2path");
2212 * Get obd name by cmd, and copy out to user space
2214 int ll_get_obd_name(struct inode
*inode
, unsigned int cmd
, unsigned long arg
)
2216 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
2217 struct obd_device
*obd
;
2219 if (cmd
== OBD_IOC_GETDTNAME
)
2220 obd
= class_exp2obd(sbi
->ll_dt_exp
);
2221 else if (cmd
== OBD_IOC_GETMDNAME
)
2222 obd
= class_exp2obd(sbi
->ll_md_exp
);
2229 if (copy_to_user((void __user
*)arg
, obd
->obd_name
,
2230 strlen(obd
->obd_name
) + 1))
2237 * Get lustre file system name by \a sbi. If \a buf is provided(non-NULL), the
2238 * fsname will be returned in this buffer; otherwise, a static buffer will be
2239 * used to store the fsname and returned to caller.
2241 char *ll_get_fsname(struct super_block
*sb
, char *buf
, int buflen
)
2243 static char fsname_static
[MTI_NAME_MAXLEN
];
2244 struct lustre_sb_info
*lsi
= s2lsi(sb
);
2249 /* this means the caller wants to use static buffer
2250 * and it doesn't care about race. Usually this is
2251 * in error reporting path
2253 buf
= fsname_static
;
2254 buflen
= sizeof(fsname_static
);
2257 len
= strlen(lsi
->lsi_lmd
->lmd_profile
);
2258 ptr
= strrchr(lsi
->lsi_lmd
->lmd_profile
, '-');
2259 if (ptr
&& (strcmp(ptr
, "-client") == 0))
2262 if (unlikely(len
>= buflen
))
2264 strncpy(buf
, lsi
->lsi_lmd
->lmd_profile
, len
);
2270 void ll_dirty_page_discard_warn(struct page
*page
, int ioret
)
2272 char *buf
, *path
= NULL
;
2273 struct dentry
*dentry
= NULL
;
2274 struct ccc_object
*obj
= cl_inode2ccc(page
->mapping
->host
);
2276 /* this can be called inside spin lock so use GFP_ATOMIC. */
2277 buf
= (char *)__get_free_page(GFP_ATOMIC
);
2279 dentry
= d_find_alias(page
->mapping
->host
);
2281 path
= dentry_path_raw(dentry
, buf
, PAGE_SIZE
);
2285 "%s: dirty page discard: %s/fid: " DFID
"/%s may get corrupted (rc %d)\n",
2286 ll_get_fsname(page
->mapping
->host
->i_sb
, NULL
, 0),
2287 s2lsi(page
->mapping
->host
->i_sb
)->lsi_lmd
->lmd_dev
,
2288 PFID(&obj
->cob_header
.coh_lu
.loh_fid
),
2289 (path
&& !IS_ERR(path
)) ? path
: "", ioret
);
2295 free_page((unsigned long)buf
);