2 * Copyright 2012 Xyratex Technology Limited
4 * Copyright (c) 2013, 2015, Intel Corporation.
6 * Author: Andrew Perepechko <Andrew_Perepechko@xyratex.com>
10 #define DEBUG_SUBSYSTEM S_LLITE
13 #include <linux/sched.h>
15 #include "../include/obd_support.h"
16 #include "../include/lustre_lite.h"
17 #include "../include/lustre_dlm.h"
18 #include "../include/lustre_ver.h"
19 #include "llite_internal.h"
21 /* If we ever have hundreds of extended attributes, we might want to consider
22 * using a hash or a tree structure instead of list for faster lookups.
24 struct ll_xattr_entry
{
25 struct list_head xe_list
; /* protected with
26 * lli_xattrs_list_rwsem
28 char *xe_name
; /* xattr name, \0-terminated */
29 char *xe_value
; /* xattr value */
30 unsigned xe_namelen
; /* strlen(xe_name) + 1 */
31 unsigned xe_vallen
; /* xattr value length */
34 static struct kmem_cache
*xattr_kmem
;
35 static struct lu_kmem_descr xattr_caches
[] = {
37 .ckd_cache
= &xattr_kmem
,
38 .ckd_name
= "xattr_kmem",
39 .ckd_size
= sizeof(struct ll_xattr_entry
)
46 int ll_xattr_init(void)
48 return lu_kmem_init(xattr_caches
);
51 void ll_xattr_fini(void)
53 lu_kmem_fini(xattr_caches
);
57 * Initializes xattr cache for an inode.
59 * This initializes the xattr list and marks cache presence.
61 static void ll_xattr_cache_init(struct ll_inode_info
*lli
)
63 INIT_LIST_HEAD(&lli
->lli_xattrs
);
64 lli
->lli_flags
|= LLIF_XATTR_CACHE
;
68 * This looks for a specific extended attribute.
70 * Find in @cache and return @xattr_name attribute in @xattr,
71 * for the NULL @xattr_name return the first cached @xattr.
74 * \retval -ENODATA if not found
76 static int ll_xattr_cache_find(struct list_head
*cache
,
77 const char *xattr_name
,
78 struct ll_xattr_entry
**xattr
)
80 struct ll_xattr_entry
*entry
;
82 list_for_each_entry(entry
, cache
, xe_list
) {
83 /* xattr_name == NULL means look for any entry */
84 if (!xattr_name
|| strcmp(xattr_name
, entry
->xe_name
) == 0) {
86 CDEBUG(D_CACHE
, "find: [%s]=%.*s\n",
87 entry
->xe_name
, entry
->xe_vallen
,
99 * Add @xattr_name attr with @xattr_val value and @xattr_val_len length,
102 * \retval -ENOMEM if no memory could be allocated for the cached attr
103 * \retval -EPROTO if duplicate xattr is being added
105 static int ll_xattr_cache_add(struct list_head
*cache
,
106 const char *xattr_name
,
107 const char *xattr_val
,
108 unsigned xattr_val_len
)
110 struct ll_xattr_entry
*xattr
;
112 if (ll_xattr_cache_find(cache
, xattr_name
, &xattr
) == 0) {
113 CDEBUG(D_CACHE
, "duplicate xattr: [%s]\n", xattr_name
);
117 xattr
= kmem_cache_zalloc(xattr_kmem
, GFP_NOFS
);
119 CDEBUG(D_CACHE
, "failed to allocate xattr\n");
123 xattr
->xe_name
= kstrdup(xattr_name
, GFP_NOFS
);
124 if (!xattr
->xe_name
) {
125 CDEBUG(D_CACHE
, "failed to alloc xattr name %u\n",
129 xattr
->xe_value
= kmemdup(xattr_val
, xattr_val_len
, GFP_NOFS
);
130 if (!xattr
->xe_value
)
133 xattr
->xe_vallen
= xattr_val_len
;
134 list_add(&xattr
->xe_list
, cache
);
136 CDEBUG(D_CACHE
, "set: [%s]=%.*s\n", xattr_name
, xattr_val_len
,
141 kfree(xattr
->xe_name
);
143 kmem_cache_free(xattr_kmem
, xattr
);
149 * This removes an extended attribute from cache.
151 * Remove @xattr_name attribute from @cache.
154 * \retval -ENODATA if @xattr_name is not cached
156 static int ll_xattr_cache_del(struct list_head
*cache
,
157 const char *xattr_name
)
159 struct ll_xattr_entry
*xattr
;
161 CDEBUG(D_CACHE
, "del xattr: %s\n", xattr_name
);
163 if (ll_xattr_cache_find(cache
, xattr_name
, &xattr
) == 0) {
164 list_del(&xattr
->xe_list
);
165 kfree(xattr
->xe_name
);
166 kfree(xattr
->xe_value
);
167 kmem_cache_free(xattr_kmem
, xattr
);
176 * This iterates cached extended attributes.
178 * Walk over cached attributes in @cache and
179 * fill in @xld_buffer or only calculate buffer
180 * size if @xld_buffer is NULL.
182 * \retval >= 0 buffer list size
183 * \retval -ENODATA if the list cannot fit @xld_size buffer
185 static int ll_xattr_cache_list(struct list_head
*cache
,
189 struct ll_xattr_entry
*xattr
, *tmp
;
192 list_for_each_entry_safe(xattr
, tmp
, cache
, xe_list
) {
193 CDEBUG(D_CACHE
, "list: buffer=%p[%d] name=%s\n",
194 xld_buffer
, xld_tail
, xattr
->xe_name
);
197 xld_size
-= xattr
->xe_namelen
;
200 memcpy(&xld_buffer
[xld_tail
],
201 xattr
->xe_name
, xattr
->xe_namelen
);
203 xld_tail
+= xattr
->xe_namelen
;
213 * Check if the xattr cache is initialized (filled).
215 * \retval 0 @cache is not initialized
216 * \retval 1 @cache is initialized
218 static int ll_xattr_cache_valid(struct ll_inode_info
*lli
)
220 return !!(lli
->lli_flags
& LLIF_XATTR_CACHE
);
224 * This finalizes the xattr cache.
226 * Free all xattr memory. @lli is the inode info pointer.
228 * \retval 0 no error occurred
230 static int ll_xattr_cache_destroy_locked(struct ll_inode_info
*lli
)
232 if (!ll_xattr_cache_valid(lli
))
235 while (ll_xattr_cache_del(&lli
->lli_xattrs
, NULL
) == 0)
237 lli
->lli_flags
&= ~LLIF_XATTR_CACHE
;
242 int ll_xattr_cache_destroy(struct inode
*inode
)
244 struct ll_inode_info
*lli
= ll_i2info(inode
);
247 down_write(&lli
->lli_xattrs_list_rwsem
);
248 rc
= ll_xattr_cache_destroy_locked(lli
);
249 up_write(&lli
->lli_xattrs_list_rwsem
);
255 * Match or enqueue a PR lock.
257 * Find or request an LDLM lock with xattr data.
258 * Since LDLM does not provide API for atomic match_or_enqueue,
259 * the function handles it with a separate enq lock.
260 * If successful, the function exits with the list lock held.
262 * \retval 0 no error occurred
263 * \retval -ENOMEM not enough memory
265 static int ll_xattr_find_get_lock(struct inode
*inode
,
266 struct lookup_intent
*oit
,
267 struct ptlrpc_request
**req
)
270 struct lustre_handle lockh
= { 0 };
271 struct md_op_data
*op_data
;
272 struct ll_inode_info
*lli
= ll_i2info(inode
);
273 struct ldlm_enqueue_info einfo
= {
274 .ei_type
= LDLM_IBITS
,
275 .ei_mode
= it_to_lock_mode(oit
),
276 .ei_cb_bl
= &ll_md_blocking_ast
,
277 .ei_cb_cp
= &ldlm_completion_ast
,
279 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
280 struct obd_export
*exp
= sbi
->ll_md_exp
;
283 mutex_lock(&lli
->lli_xattrs_enq_lock
);
284 /* inode may have been shrunk and recreated, so data is gone, match lock
285 * only when data exists.
287 if (ll_xattr_cache_valid(lli
)) {
288 /* Try matching first. */
289 mode
= ll_take_md_lock(inode
, MDS_INODELOCK_XATTR
, &lockh
, 0,
292 /* fake oit in mdc_revalidate_lock() manner */
293 oit
->it_lock_handle
= lockh
.cookie
;
294 oit
->it_lock_mode
= mode
;
299 /* Enqueue if the lock isn't cached locally. */
300 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, NULL
, 0, 0,
301 LUSTRE_OPC_ANY
, NULL
);
302 if (IS_ERR(op_data
)) {
303 mutex_unlock(&lli
->lli_xattrs_enq_lock
);
304 return PTR_ERR(op_data
);
307 op_data
->op_valid
= OBD_MD_FLXATTR
| OBD_MD_FLXATTRLS
;
309 rc
= md_enqueue(exp
, &einfo
, NULL
, oit
, op_data
, &lockh
, 0);
310 ll_finish_md_op_data(op_data
);
314 "md_intent_lock failed with %d for fid "DFID
"\n",
315 rc
, PFID(ll_inode2fid(inode
)));
316 mutex_unlock(&lli
->lli_xattrs_enq_lock
);
320 *req
= oit
->it_request
;
322 down_write(&lli
->lli_xattrs_list_rwsem
);
323 mutex_unlock(&lli
->lli_xattrs_enq_lock
);
329 * Refill the xattr cache.
331 * Fetch and cache the whole of xattrs for @inode, acquiring
332 * a read or a write xattr lock depending on operation in @oit.
333 * Intent is dropped on exit unless the operation is setxattr.
335 * \retval 0 no error occurred
336 * \retval -EPROTO network protocol error
337 * \retval -ENOMEM not enough memory for the cache
339 static int ll_xattr_cache_refill(struct inode
*inode
, struct lookup_intent
*oit
)
341 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
342 struct ptlrpc_request
*req
= NULL
;
343 const char *xdata
, *xval
, *xtail
, *xvtail
;
344 struct ll_inode_info
*lli
= ll_i2info(inode
);
345 struct mdt_body
*body
;
349 rc
= ll_xattr_find_get_lock(inode
, oit
, &req
);
353 /* Do we have the data at this point? */
354 if (ll_xattr_cache_valid(lli
)) {
355 ll_stats_ops_tally(sbi
, LPROC_LL_GETXATTR_HITS
, 1);
360 /* Matched but no cache? Cancelled on error by a parallel refill. */
361 if (unlikely(!req
)) {
362 CDEBUG(D_CACHE
, "cancelled by a parallel getxattr\n");
367 if (oit
->it_status
< 0) {
368 CDEBUG(D_CACHE
, "getxattr intent returned %d for fid "DFID
"\n",
369 oit
->it_status
, PFID(ll_inode2fid(inode
)));
371 /* xattr data is so large that we don't want to cache it */
377 body
= req_capsule_server_get(&req
->rq_pill
, &RMF_MDT_BODY
);
379 CERROR("no MDT BODY in the refill xattr reply\n");
383 /* do not need swab xattr data */
384 xdata
= req_capsule_server_sized_get(&req
->rq_pill
, &RMF_EADATA
,
385 body
->mbo_eadatasize
);
386 xval
= req_capsule_server_sized_get(&req
->rq_pill
, &RMF_EAVALS
,
388 xsizes
= req_capsule_server_sized_get(&req
->rq_pill
, &RMF_EAVALS_LENS
,
389 body
->mbo_max_mdsize
* sizeof(__u32
));
390 if (!xdata
|| !xval
|| !xsizes
) {
391 CERROR("wrong setxattr reply\n");
396 xtail
= xdata
+ body
->mbo_eadatasize
;
397 xvtail
= xval
+ body
->mbo_aclsize
;
399 CDEBUG(D_CACHE
, "caching: xdata=%p xtail=%p\n", xdata
, xtail
);
401 ll_xattr_cache_init(lli
);
403 for (i
= 0; i
< body
->mbo_max_mdsize
; i
++) {
404 CDEBUG(D_CACHE
, "caching [%s]=%.*s\n", xdata
, *xsizes
, xval
);
405 /* Perform consistency checks: attr names and vals in pill */
406 if (!memchr(xdata
, 0, xtail
- xdata
)) {
407 CERROR("xattr protocol violation (names are broken)\n");
409 } else if (xval
+ *xsizes
> xvtail
) {
410 CERROR("xattr protocol violation (vals are broken)\n");
412 } else if (OBD_FAIL_CHECK(OBD_FAIL_LLITE_XATTR_ENOMEM
)) {
414 } else if (!strcmp(xdata
, XATTR_NAME_ACL_ACCESS
)) {
415 /* Filter out ACL ACCESS since it's cached separately */
416 CDEBUG(D_CACHE
, "not caching %s\n",
417 XATTR_NAME_ACL_ACCESS
);
420 rc
= ll_xattr_cache_add(&lli
->lli_xattrs
, xdata
, xval
,
424 ll_xattr_cache_destroy_locked(lli
);
427 xdata
+= strlen(xdata
) + 1;
432 if (xdata
!= xtail
|| xval
!= xvtail
)
433 CERROR("a hole in xattr data\n");
435 ll_set_lock_data(sbi
->ll_md_exp
, inode
, oit
, NULL
);
440 ll_intent_drop_lock(oit
);
443 up_write(&lli
->lli_xattrs_list_rwsem
);
445 ptlrpc_req_finished(req
);
450 up_write(&lli
->lli_xattrs_list_rwsem
);
452 ldlm_lock_decref_and_cancel((struct lustre_handle
*)
453 &oit
->it_lock_handle
,
460 * Get an xattr value or list xattrs using the write-through cache.
462 * Get the xattr value (@valid has OBD_MD_FLXATTR set) of @name or
463 * list xattr names (@valid has OBD_MD_FLXATTRLS set) for @inode.
464 * The resulting value/list is stored in @buffer if the former
465 * is not larger than @size.
467 * \retval 0 no error occurred
468 * \retval -EPROTO network protocol error
469 * \retval -ENOMEM not enough memory for the cache
470 * \retval -ERANGE the buffer is not large enough
471 * \retval -ENODATA no such attr or the list is empty
473 int ll_xattr_cache_get(struct inode
*inode
, const char *name
, char *buffer
,
474 size_t size
, __u64 valid
)
476 struct lookup_intent oit
= { .it_op
= IT_GETXATTR
};
477 struct ll_inode_info
*lli
= ll_i2info(inode
);
480 LASSERT(!!(valid
& OBD_MD_FLXATTR
) ^ !!(valid
& OBD_MD_FLXATTRLS
));
482 down_read(&lli
->lli_xattrs_list_rwsem
);
483 if (!ll_xattr_cache_valid(lli
)) {
484 up_read(&lli
->lli_xattrs_list_rwsem
);
485 rc
= ll_xattr_cache_refill(inode
, &oit
);
488 downgrade_write(&lli
->lli_xattrs_list_rwsem
);
490 ll_stats_ops_tally(ll_i2sbi(inode
), LPROC_LL_GETXATTR_HITS
, 1);
493 if (valid
& OBD_MD_FLXATTR
) {
494 struct ll_xattr_entry
*xattr
;
496 rc
= ll_xattr_cache_find(&lli
->lli_xattrs
, name
, &xattr
);
498 rc
= xattr
->xe_vallen
;
499 /* zero size means we are only requested size in rc */
501 if (size
>= xattr
->xe_vallen
)
502 memcpy(buffer
, xattr
->xe_value
,
508 } else if (valid
& OBD_MD_FLXATTRLS
) {
509 rc
= ll_xattr_cache_list(&lli
->lli_xattrs
,
510 size
? buffer
: NULL
, size
);
515 up_read(&lli
->lli_xattrs_list_rwsem
);