2 * pNFS functions to call and manage layout drivers.
4 * Copyright (c) 2002 [year of first publication]
5 * The Regents of the University of Michigan
8 * Dean Hildebrand <dhildebz@umich.edu>
10 * Permission is granted to use, copy, create derivative works, and
11 * redistribute this software and such derivative works for any purpose,
12 * so long as the name of the University of Michigan is not used in
13 * any advertising or publicity pertaining to the use or distribution
14 * of this software without specific, written prior authorization. If
15 * the above copyright notice or any other identification of the
16 * University of Michigan is included in any copy of any portion of
17 * this software, then the disclaimer below must also be included.
19 * This software is provided as is, without representation or warranty
20 * of any kind either express or implied, including without limitation
21 * the implied warranties of merchantability, fitness for a particular
22 * purpose, or noninfringement. The Regents of the University of
23 * Michigan shall not be liable for any damages, including special,
24 * indirect, incidental, or consequential damages, with respect to any
25 * claim arising out of or in connection with the use of the software,
26 * even if it has been or is hereafter advised of the possibility of
30 #include <linux/nfs_fs.h>
34 #define NFSDBG_FACILITY NFSDBG_PNFS
39 * protects pnfs_modules_tbl.
41 static DEFINE_SPINLOCK(pnfs_spinlock
);
44 * pnfs_modules_tbl holds all pnfs modules
46 static LIST_HEAD(pnfs_modules_tbl
);
48 /* Return the registered pnfs layout driver module matching given id */
49 static struct pnfs_layoutdriver_type
*
50 find_pnfs_driver_locked(u32 id
)
52 struct pnfs_layoutdriver_type
*local
;
54 list_for_each_entry(local
, &pnfs_modules_tbl
, pnfs_tblid
)
59 dprintk("%s: Searching for id %u, found %p\n", __func__
, id
, local
);
63 static struct pnfs_layoutdriver_type
*
64 find_pnfs_driver(u32 id
)
66 struct pnfs_layoutdriver_type
*local
;
68 spin_lock(&pnfs_spinlock
);
69 local
= find_pnfs_driver_locked(id
);
70 spin_unlock(&pnfs_spinlock
);
75 unset_pnfs_layoutdriver(struct nfs_server
*nfss
)
77 if (nfss
->pnfs_curr_ld
) {
78 nfss
->pnfs_curr_ld
->clear_layoutdriver(nfss
);
79 module_put(nfss
->pnfs_curr_ld
->owner
);
81 nfss
->pnfs_curr_ld
= NULL
;
85 * Try to set the server's pnfs module to the pnfs layout type specified by id.
86 * Currently only one pNFS layout driver per filesystem is supported.
88 * @id layout type. Zero (illegal layout type) indicates pNFS not in use.
91 set_pnfs_layoutdriver(struct nfs_server
*server
, u32 id
)
93 struct pnfs_layoutdriver_type
*ld_type
= NULL
;
97 if (!(server
->nfs_client
->cl_exchange_flags
&
98 (EXCHGID4_FLAG_USE_NON_PNFS
| EXCHGID4_FLAG_USE_PNFS_MDS
))) {
99 printk(KERN_ERR
"%s: id %u cl_exchange_flags 0x%x\n", __func__
,
100 id
, server
->nfs_client
->cl_exchange_flags
);
103 ld_type
= find_pnfs_driver(id
);
105 request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX
, id
);
106 ld_type
= find_pnfs_driver(id
);
108 dprintk("%s: No pNFS module found for %u.\n",
113 if (!try_module_get(ld_type
->owner
)) {
114 dprintk("%s: Could not grab reference on module\n", __func__
);
117 server
->pnfs_curr_ld
= ld_type
;
118 if (ld_type
->set_layoutdriver(server
)) {
120 "%s: Error initializing mount point for layout driver %u.\n",
122 module_put(ld_type
->owner
);
125 dprintk("%s: pNFS module for %u set\n", __func__
, id
);
129 dprintk("%s: Using NFSv4 I/O\n", __func__
);
130 server
->pnfs_curr_ld
= NULL
;
134 pnfs_register_layoutdriver(struct pnfs_layoutdriver_type
*ld_type
)
136 int status
= -EINVAL
;
137 struct pnfs_layoutdriver_type
*tmp
;
139 if (ld_type
->id
== 0) {
140 printk(KERN_ERR
"%s id 0 is reserved\n", __func__
);
143 if (!ld_type
->alloc_lseg
|| !ld_type
->free_lseg
) {
144 printk(KERN_ERR
"%s Layout driver must provide "
145 "alloc_lseg and free_lseg.\n", __func__
);
149 spin_lock(&pnfs_spinlock
);
150 tmp
= find_pnfs_driver_locked(ld_type
->id
);
152 list_add(&ld_type
->pnfs_tblid
, &pnfs_modules_tbl
);
154 dprintk("%s Registering id:%u name:%s\n", __func__
, ld_type
->id
,
157 printk(KERN_ERR
"%s Module with id %d already loaded!\n",
158 __func__
, ld_type
->id
);
160 spin_unlock(&pnfs_spinlock
);
164 EXPORT_SYMBOL_GPL(pnfs_register_layoutdriver
);
167 pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type
*ld_type
)
169 dprintk("%s Deregistering id:%u\n", __func__
, ld_type
->id
);
170 spin_lock(&pnfs_spinlock
);
171 list_del(&ld_type
->pnfs_tblid
);
172 spin_unlock(&pnfs_spinlock
);
174 EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver
);
177 * pNFS client layout cache
180 /* Need to hold i_lock if caller does not already hold reference */
182 get_layout_hdr(struct pnfs_layout_hdr
*lo
)
184 atomic_inc(&lo
->plh_refcount
);
188 destroy_layout_hdr(struct pnfs_layout_hdr
*lo
)
190 dprintk("%s: freeing layout cache %p\n", __func__
, lo
);
191 BUG_ON(!list_empty(&lo
->plh_layouts
));
192 NFS_I(lo
->plh_inode
)->layout
= NULL
;
197 put_layout_hdr_locked(struct pnfs_layout_hdr
*lo
)
199 if (atomic_dec_and_test(&lo
->plh_refcount
))
200 destroy_layout_hdr(lo
);
204 put_layout_hdr(struct pnfs_layout_hdr
*lo
)
206 struct inode
*inode
= lo
->plh_inode
;
208 if (atomic_dec_and_lock(&lo
->plh_refcount
, &inode
->i_lock
)) {
209 destroy_layout_hdr(lo
);
210 spin_unlock(&inode
->i_lock
);
215 init_lseg(struct pnfs_layout_hdr
*lo
, struct pnfs_layout_segment
*lseg
)
217 INIT_LIST_HEAD(&lseg
->pls_list
);
218 atomic_set(&lseg
->pls_refcount
, 1);
220 set_bit(NFS_LSEG_VALID
, &lseg
->pls_flags
);
221 lseg
->pls_layout
= lo
;
224 static void free_lseg(struct pnfs_layout_segment
*lseg
)
226 struct inode
*ino
= lseg
->pls_layout
->plh_inode
;
228 NFS_SERVER(ino
)->pnfs_curr_ld
->free_lseg(lseg
);
229 /* Matched by get_layout_hdr in pnfs_insert_layout */
230 put_layout_hdr(NFS_I(ino
)->layout
);
233 /* The use of tmp_list is necessary because pnfs_curr_ld->free_lseg
234 * could sleep, so must be called outside of the lock.
235 * Returns 1 if object was removed, otherwise return 0.
238 put_lseg_locked(struct pnfs_layout_segment
*lseg
,
239 struct list_head
*tmp_list
)
241 dprintk("%s: lseg %p ref %d valid %d\n", __func__
, lseg
,
242 atomic_read(&lseg
->pls_refcount
),
243 test_bit(NFS_LSEG_VALID
, &lseg
->pls_flags
));
244 if (atomic_dec_and_test(&lseg
->pls_refcount
)) {
245 struct inode
*ino
= lseg
->pls_layout
->plh_inode
;
247 BUG_ON(test_bit(NFS_LSEG_VALID
, &lseg
->pls_flags
));
248 list_del(&lseg
->pls_list
);
249 if (list_empty(&lseg
->pls_layout
->plh_segs
)) {
250 set_bit(NFS_LAYOUT_DESTROYED
, &lseg
->pls_layout
->plh_flags
);
251 /* Matched by initial refcount set in alloc_init_layout_hdr */
252 put_layout_hdr_locked(lseg
->pls_layout
);
254 rpc_wake_up(&NFS_SERVER(ino
)->roc_rpcwaitq
);
255 list_add(&lseg
->pls_list
, tmp_list
);
262 should_free_lseg(u32 lseg_iomode
, u32 recall_iomode
)
264 return (recall_iomode
== IOMODE_ANY
||
265 lseg_iomode
== recall_iomode
);
268 /* Returns 1 if lseg is removed from list, 0 otherwise */
269 static int mark_lseg_invalid(struct pnfs_layout_segment
*lseg
,
270 struct list_head
*tmp_list
)
274 if (test_and_clear_bit(NFS_LSEG_VALID
, &lseg
->pls_flags
)) {
275 /* Remove the reference keeping the lseg in the
276 * list. It will now be removed when all
277 * outstanding io is finished.
279 rv
= put_lseg_locked(lseg
, tmp_list
);
284 /* Returns count of number of matching invalid lsegs remaining in list
288 mark_matching_lsegs_invalid(struct pnfs_layout_hdr
*lo
,
289 struct list_head
*tmp_list
,
292 struct pnfs_layout_segment
*lseg
, *next
;
293 int invalid
= 0, removed
= 0;
295 dprintk("%s:Begin lo %p\n", __func__
, lo
);
297 if (list_empty(&lo
->plh_segs
)) {
298 if (!test_and_set_bit(NFS_LAYOUT_DESTROYED
, &lo
->plh_flags
))
299 put_layout_hdr_locked(lo
);
302 list_for_each_entry_safe(lseg
, next
, &lo
->plh_segs
, pls_list
)
303 if (should_free_lseg(lseg
->pls_range
.iomode
, iomode
)) {
304 dprintk("%s: freeing lseg %p iomode %d "
305 "offset %llu length %llu\n", __func__
,
306 lseg
, lseg
->pls_range
.iomode
, lseg
->pls_range
.offset
,
307 lseg
->pls_range
.length
);
309 removed
+= mark_lseg_invalid(lseg
, tmp_list
);
311 dprintk("%s:Return %i\n", __func__
, invalid
- removed
);
312 return invalid
- removed
;
315 /* note free_me must contain lsegs from a single layout_hdr */
317 pnfs_free_lseg_list(struct list_head
*free_me
)
319 struct pnfs_layout_segment
*lseg
, *tmp
;
320 struct pnfs_layout_hdr
*lo
;
322 if (list_empty(free_me
))
325 lo
= list_first_entry(free_me
, struct pnfs_layout_segment
,
326 pls_list
)->pls_layout
;
328 if (test_bit(NFS_LAYOUT_DESTROYED
, &lo
->plh_flags
)) {
329 struct nfs_client
*clp
;
331 clp
= NFS_SERVER(lo
->plh_inode
)->nfs_client
;
332 spin_lock(&clp
->cl_lock
);
333 list_del_init(&lo
->plh_layouts
);
334 spin_unlock(&clp
->cl_lock
);
336 list_for_each_entry_safe(lseg
, tmp
, free_me
, pls_list
) {
337 list_del(&lseg
->pls_list
);
343 pnfs_destroy_layout(struct nfs_inode
*nfsi
)
345 struct pnfs_layout_hdr
*lo
;
348 spin_lock(&nfsi
->vfs_inode
.i_lock
);
351 lo
->plh_block_lgets
++; /* permanently block new LAYOUTGETs */
352 mark_matching_lsegs_invalid(lo
, &tmp_list
, IOMODE_ANY
);
354 spin_unlock(&nfsi
->vfs_inode
.i_lock
);
355 pnfs_free_lseg_list(&tmp_list
);
359 * Called by the state manger to remove all layouts established under an
363 pnfs_destroy_all_layouts(struct nfs_client
*clp
)
365 struct pnfs_layout_hdr
*lo
;
368 spin_lock(&clp
->cl_lock
);
369 list_splice_init(&clp
->cl_layouts
, &tmp_list
);
370 spin_unlock(&clp
->cl_lock
);
372 while (!list_empty(&tmp_list
)) {
373 lo
= list_entry(tmp_list
.next
, struct pnfs_layout_hdr
,
375 dprintk("%s freeing layout for inode %lu\n", __func__
,
376 lo
->plh_inode
->i_ino
);
377 pnfs_destroy_layout(NFS_I(lo
->plh_inode
));
381 /* update lo->plh_stateid with new if is more recent */
383 pnfs_set_layout_stateid(struct pnfs_layout_hdr
*lo
, const nfs4_stateid
*new,
388 oldseq
= be32_to_cpu(lo
->plh_stateid
.stateid
.seqid
);
389 newseq
= be32_to_cpu(new->stateid
.seqid
);
390 if ((int)(newseq
- oldseq
) > 0) {
391 memcpy(&lo
->plh_stateid
, &new->stateid
, sizeof(new->stateid
));
392 if (update_barrier
) {
393 u32 new_barrier
= be32_to_cpu(new->stateid
.seqid
);
395 if ((int)(new_barrier
- lo
->plh_barrier
))
396 lo
->plh_barrier
= new_barrier
;
398 /* Because of wraparound, we want to keep the barrier
399 * "close" to the current seqids. It needs to be
400 * within 2**31 to count as "behind", so if it
401 * gets too near that limit, give us a litle leeway
402 * and bring it to within 2**30.
403 * NOTE - and yes, this is all unsigned arithmetic.
405 if (unlikely((newseq
- lo
->plh_barrier
) > (3 << 29)))
406 lo
->plh_barrier
= newseq
- (1 << 30);
411 /* lget is set to 1 if called from inside send_layoutget call chain */
413 pnfs_layoutgets_blocked(struct pnfs_layout_hdr
*lo
, nfs4_stateid
*stateid
,
417 (int)(lo
->plh_barrier
- be32_to_cpu(stateid
->stateid
.seqid
)) >= 0)
419 return lo
->plh_block_lgets
||
420 test_bit(NFS_LAYOUT_DESTROYED
, &lo
->plh_flags
) ||
421 test_bit(NFS_LAYOUT_BULK_RECALL
, &lo
->plh_flags
) ||
422 (list_empty(&lo
->plh_segs
) &&
423 (atomic_read(&lo
->plh_outstanding
) > lget
));
427 pnfs_choose_layoutget_stateid(nfs4_stateid
*dst
, struct pnfs_layout_hdr
*lo
,
428 struct nfs4_state
*open_state
)
432 dprintk("--> %s\n", __func__
);
433 spin_lock(&lo
->plh_inode
->i_lock
);
434 if (pnfs_layoutgets_blocked(lo
, NULL
, 1)) {
436 } else if (list_empty(&lo
->plh_segs
)) {
440 seq
= read_seqbegin(&open_state
->seqlock
);
441 memcpy(dst
->data
, open_state
->stateid
.data
,
442 sizeof(open_state
->stateid
.data
));
443 } while (read_seqretry(&open_state
->seqlock
, seq
));
445 memcpy(dst
->data
, lo
->plh_stateid
.data
, sizeof(lo
->plh_stateid
.data
));
446 spin_unlock(&lo
->plh_inode
->i_lock
);
447 dprintk("<-- %s\n", __func__
);
452 * Get layout from server.
453 * for now, assume that whole file layouts are requested.
455 * arg->length: all ones
457 static struct pnfs_layout_segment
*
458 send_layoutget(struct pnfs_layout_hdr
*lo
,
459 struct nfs_open_context
*ctx
,
462 struct inode
*ino
= lo
->plh_inode
;
463 struct nfs_server
*server
= NFS_SERVER(ino
);
464 struct nfs4_layoutget
*lgp
;
465 struct pnfs_layout_segment
*lseg
= NULL
;
467 dprintk("--> %s\n", __func__
);
470 lgp
= kzalloc(sizeof(*lgp
), GFP_KERNEL
);
473 lgp
->args
.minlength
= NFS4_MAX_UINT64
;
474 lgp
->args
.maxcount
= PNFS_LAYOUT_MAXSIZE
;
475 lgp
->args
.range
.iomode
= iomode
;
476 lgp
->args
.range
.offset
= 0;
477 lgp
->args
.range
.length
= NFS4_MAX_UINT64
;
478 lgp
->args
.type
= server
->pnfs_curr_ld
->id
;
479 lgp
->args
.inode
= ino
;
480 lgp
->args
.ctx
= get_nfs_open_context(ctx
);
483 /* Synchronously retrieve layout information from server and
486 nfs4_proc_layoutget(lgp
);
488 /* remember that LAYOUTGET failed and suspend trying */
489 set_bit(lo_fail_bit(iomode
), &lo
->plh_flags
);
494 bool pnfs_roc(struct inode
*ino
)
496 struct pnfs_layout_hdr
*lo
;
497 struct pnfs_layout_segment
*lseg
, *tmp
;
501 spin_lock(&ino
->i_lock
);
502 lo
= NFS_I(ino
)->layout
;
503 if (!lo
|| !test_and_clear_bit(NFS_LAYOUT_ROC
, &lo
->plh_flags
) ||
504 test_bit(NFS_LAYOUT_BULK_RECALL
, &lo
->plh_flags
))
506 list_for_each_entry_safe(lseg
, tmp
, &lo
->plh_segs
, pls_list
)
507 if (test_bit(NFS_LSEG_ROC
, &lseg
->pls_flags
)) {
508 mark_lseg_invalid(lseg
, &tmp_list
);
513 lo
->plh_block_lgets
++;
514 get_layout_hdr(lo
); /* matched in pnfs_roc_release */
515 spin_unlock(&ino
->i_lock
);
516 pnfs_free_lseg_list(&tmp_list
);
520 spin_unlock(&ino
->i_lock
);
524 void pnfs_roc_release(struct inode
*ino
)
526 struct pnfs_layout_hdr
*lo
;
528 spin_lock(&ino
->i_lock
);
529 lo
= NFS_I(ino
)->layout
;
530 lo
->plh_block_lgets
--;
531 put_layout_hdr_locked(lo
);
532 spin_unlock(&ino
->i_lock
);
535 void pnfs_roc_set_barrier(struct inode
*ino
, u32 barrier
)
537 struct pnfs_layout_hdr
*lo
;
539 spin_lock(&ino
->i_lock
);
540 lo
= NFS_I(ino
)->layout
;
541 if ((int)(barrier
- lo
->plh_barrier
) > 0)
542 lo
->plh_barrier
= barrier
;
543 spin_unlock(&ino
->i_lock
);
546 bool pnfs_roc_drain(struct inode
*ino
, u32
*barrier
)
548 struct nfs_inode
*nfsi
= NFS_I(ino
);
549 struct pnfs_layout_segment
*lseg
;
552 spin_lock(&ino
->i_lock
);
553 list_for_each_entry(lseg
, &nfsi
->layout
->plh_segs
, pls_list
)
554 if (test_bit(NFS_LSEG_ROC
, &lseg
->pls_flags
)) {
559 struct pnfs_layout_hdr
*lo
= nfsi
->layout
;
560 u32 current_seqid
= be32_to_cpu(lo
->plh_stateid
.stateid
.seqid
);
562 /* Since close does not return a layout stateid for use as
563 * a barrier, we choose the worst-case barrier.
565 *barrier
= current_seqid
+ atomic_read(&lo
->plh_outstanding
);
567 spin_unlock(&ino
->i_lock
);
572 * Compare two layout segments for sorting into layout cache.
573 * We want to preferentially return RW over RO layouts, so ensure those
577 cmp_layout(u32 iomode1
, u32 iomode2
)
579 /* read > read/write */
580 return (int)(iomode2
== IOMODE_READ
) - (int)(iomode1
== IOMODE_READ
);
584 pnfs_insert_layout(struct pnfs_layout_hdr
*lo
,
585 struct pnfs_layout_segment
*lseg
)
587 struct pnfs_layout_segment
*lp
;
590 dprintk("%s:Begin\n", __func__
);
592 assert_spin_locked(&lo
->plh_inode
->i_lock
);
593 list_for_each_entry(lp
, &lo
->plh_segs
, pls_list
) {
594 if (cmp_layout(lp
->pls_range
.iomode
, lseg
->pls_range
.iomode
) > 0)
596 list_add_tail(&lseg
->pls_list
, &lp
->pls_list
);
597 dprintk("%s: inserted lseg %p "
598 "iomode %d offset %llu length %llu before "
599 "lp %p iomode %d offset %llu length %llu\n",
600 __func__
, lseg
, lseg
->pls_range
.iomode
,
601 lseg
->pls_range
.offset
, lseg
->pls_range
.length
,
602 lp
, lp
->pls_range
.iomode
, lp
->pls_range
.offset
,
603 lp
->pls_range
.length
);
608 list_add_tail(&lseg
->pls_list
, &lo
->plh_segs
);
609 dprintk("%s: inserted lseg %p "
610 "iomode %d offset %llu length %llu at tail\n",
611 __func__
, lseg
, lseg
->pls_range
.iomode
,
612 lseg
->pls_range
.offset
, lseg
->pls_range
.length
);
616 dprintk("%s:Return\n", __func__
);
619 static struct pnfs_layout_hdr
*
620 alloc_init_layout_hdr(struct inode
*ino
)
622 struct pnfs_layout_hdr
*lo
;
624 lo
= kzalloc(sizeof(struct pnfs_layout_hdr
), GFP_KERNEL
);
627 atomic_set(&lo
->plh_refcount
, 1);
628 INIT_LIST_HEAD(&lo
->plh_layouts
);
629 INIT_LIST_HEAD(&lo
->plh_segs
);
630 INIT_LIST_HEAD(&lo
->plh_bulk_recall
);
635 static struct pnfs_layout_hdr
*
636 pnfs_find_alloc_layout(struct inode
*ino
)
638 struct nfs_inode
*nfsi
= NFS_I(ino
);
639 struct pnfs_layout_hdr
*new = NULL
;
641 dprintk("%s Begin ino=%p layout=%p\n", __func__
, ino
, nfsi
->layout
);
643 assert_spin_locked(&ino
->i_lock
);
645 if (test_bit(NFS_LAYOUT_DESTROYED
, &nfsi
->layout
->plh_flags
))
650 spin_unlock(&ino
->i_lock
);
651 new = alloc_init_layout_hdr(ino
);
652 spin_lock(&ino
->i_lock
);
654 if (likely(nfsi
->layout
== NULL
)) /* Won the race? */
662 * iomode matching rules:
673 is_matching_lseg(struct pnfs_layout_segment
*lseg
, u32 iomode
)
675 return (iomode
!= IOMODE_RW
|| lseg
->pls_range
.iomode
== IOMODE_RW
);
679 * lookup range in layout
681 static struct pnfs_layout_segment
*
682 pnfs_find_lseg(struct pnfs_layout_hdr
*lo
, u32 iomode
)
684 struct pnfs_layout_segment
*lseg
, *ret
= NULL
;
686 dprintk("%s:Begin\n", __func__
);
688 assert_spin_locked(&lo
->plh_inode
->i_lock
);
689 list_for_each_entry(lseg
, &lo
->plh_segs
, pls_list
) {
690 if (test_bit(NFS_LSEG_VALID
, &lseg
->pls_flags
) &&
691 is_matching_lseg(lseg
, iomode
)) {
695 if (cmp_layout(iomode
, lseg
->pls_range
.iomode
) > 0)
699 dprintk("%s:Return lseg %p ref %d\n",
700 __func__
, ret
, ret
? atomic_read(&ret
->pls_refcount
) : 0);
705 * Layout segment is retreived from the server if not cached.
706 * The appropriate layout segment is referenced and returned to the caller.
708 struct pnfs_layout_segment
*
709 pnfs_update_layout(struct inode
*ino
,
710 struct nfs_open_context
*ctx
,
711 enum pnfs_iomode iomode
)
713 struct nfs_inode
*nfsi
= NFS_I(ino
);
714 struct nfs_client
*clp
= NFS_SERVER(ino
)->nfs_client
;
715 struct pnfs_layout_hdr
*lo
;
716 struct pnfs_layout_segment
*lseg
= NULL
;
719 if (!pnfs_enabled_sb(NFS_SERVER(ino
)))
721 spin_lock(&ino
->i_lock
);
722 lo
= pnfs_find_alloc_layout(ino
);
724 dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__
);
728 /* Do we even need to bother with this? */
729 if (test_bit(NFS4CLNT_LAYOUTRECALL
, &clp
->cl_state
) ||
730 test_bit(NFS_LAYOUT_BULK_RECALL
, &lo
->plh_flags
)) {
731 dprintk("%s matches recall, use MDS\n", __func__
);
734 /* Check to see if the layout for the given range already exists */
735 lseg
= pnfs_find_lseg(lo
, iomode
);
739 /* if LAYOUTGET already failed once we don't try again */
740 if (test_bit(lo_fail_bit(iomode
), &nfsi
->layout
->plh_flags
))
743 if (pnfs_layoutgets_blocked(lo
, NULL
, 0))
745 atomic_inc(&lo
->plh_outstanding
);
748 if (list_empty(&lo
->plh_segs
))
750 spin_unlock(&ino
->i_lock
);
752 /* The lo must be on the clp list if there is any
753 * chance of a CB_LAYOUTRECALL(FILE) coming in.
755 spin_lock(&clp
->cl_lock
);
756 BUG_ON(!list_empty(&lo
->plh_layouts
));
757 list_add_tail(&lo
->plh_layouts
, &clp
->cl_layouts
);
758 spin_unlock(&clp
->cl_lock
);
761 lseg
= send_layoutget(lo
, ctx
, iomode
);
762 if (!lseg
&& first
) {
763 spin_lock(&clp
->cl_lock
);
764 list_del_init(&lo
->plh_layouts
);
765 spin_unlock(&clp
->cl_lock
);
767 atomic_dec(&lo
->plh_outstanding
);
770 dprintk("%s end, state 0x%lx lseg %p\n", __func__
,
771 nfsi
->layout
? nfsi
->layout
->plh_flags
: -1, lseg
);
774 spin_unlock(&ino
->i_lock
);
779 pnfs_layout_process(struct nfs4_layoutget
*lgp
)
781 struct pnfs_layout_hdr
*lo
= NFS_I(lgp
->args
.inode
)->layout
;
782 struct nfs4_layoutget_res
*res
= &lgp
->res
;
783 struct pnfs_layout_segment
*lseg
;
784 struct inode
*ino
= lo
->plh_inode
;
785 struct nfs_client
*clp
= NFS_SERVER(ino
)->nfs_client
;
788 /* Verify we got what we asked for.
789 * Note that because the xdr parsing only accepts a single
790 * element array, this can fail even if the server is behaving
793 if (lgp
->args
.range
.iomode
> res
->range
.iomode
||
794 res
->range
.offset
!= 0 ||
795 res
->range
.length
!= NFS4_MAX_UINT64
) {
799 /* Inject layout blob into I/O device driver */
800 lseg
= NFS_SERVER(ino
)->pnfs_curr_ld
->alloc_lseg(lo
, res
);
801 if (!lseg
|| IS_ERR(lseg
)) {
805 status
= PTR_ERR(lseg
);
806 dprintk("%s: Could not allocate layout: error %d\n",
811 spin_lock(&ino
->i_lock
);
812 if (test_bit(NFS4CLNT_LAYOUTRECALL
, &clp
->cl_state
) ||
813 test_bit(NFS_LAYOUT_BULK_RECALL
, &lo
->plh_flags
)) {
814 dprintk("%s forget reply due to recall\n", __func__
);
815 goto out_forget_reply
;
818 if (pnfs_layoutgets_blocked(lo
, &res
->stateid
, 1)) {
819 dprintk("%s forget reply due to state\n", __func__
);
820 goto out_forget_reply
;
823 lseg
->pls_range
= res
->range
;
825 pnfs_insert_layout(lo
, lseg
);
827 if (res
->return_on_close
) {
828 set_bit(NFS_LSEG_ROC
, &lseg
->pls_flags
);
829 set_bit(NFS_LAYOUT_ROC
, &lo
->plh_flags
);
832 /* Done processing layoutget. Set the layout stateid */
833 pnfs_set_layout_stateid(lo
, &res
->stateid
, false);
834 spin_unlock(&ino
->i_lock
);
839 spin_unlock(&ino
->i_lock
);
840 lseg
->pls_layout
= lo
;
841 NFS_SERVER(ino
)->pnfs_curr_ld
->free_lseg(lseg
);
846 * Device ID cache. Currently supports one layout type per struct nfs_client.
847 * Add layout type to the lookup key to expand to support multiple types.
850 pnfs_alloc_init_deviceid_cache(struct nfs_client
*clp
,
851 void (*free_callback
)(struct pnfs_deviceid_node
*))
853 struct pnfs_deviceid_cache
*c
;
855 c
= kzalloc(sizeof(struct pnfs_deviceid_cache
), GFP_KERNEL
);
858 spin_lock(&clp
->cl_lock
);
859 if (clp
->cl_devid_cache
!= NULL
) {
860 atomic_inc(&clp
->cl_devid_cache
->dc_ref
);
861 dprintk("%s [kref [%d]]\n", __func__
,
862 atomic_read(&clp
->cl_devid_cache
->dc_ref
));
865 /* kzalloc initializes hlists */
866 spin_lock_init(&c
->dc_lock
);
867 atomic_set(&c
->dc_ref
, 1);
868 c
->dc_free_callback
= free_callback
;
869 clp
->cl_devid_cache
= c
;
870 dprintk("%s [new]\n", __func__
);
872 spin_unlock(&clp
->cl_lock
);
875 EXPORT_SYMBOL_GPL(pnfs_alloc_init_deviceid_cache
);
878 * Called from pnfs_layoutdriver_type->free_lseg
879 * last layout segment reference frees deviceid
882 pnfs_put_deviceid(struct pnfs_deviceid_cache
*c
,
883 struct pnfs_deviceid_node
*devid
)
885 struct nfs4_deviceid
*id
= &devid
->de_id
;
886 struct pnfs_deviceid_node
*d
;
887 struct hlist_node
*n
;
888 long h
= nfs4_deviceid_hash(id
);
890 dprintk("%s [%d]\n", __func__
, atomic_read(&devid
->de_ref
));
891 if (!atomic_dec_and_lock(&devid
->de_ref
, &c
->dc_lock
))
894 hlist_for_each_entry_rcu(d
, n
, &c
->dc_deviceids
[h
], de_node
)
895 if (!memcmp(&d
->de_id
, id
, sizeof(*id
))) {
896 hlist_del_rcu(&d
->de_node
);
897 spin_unlock(&c
->dc_lock
);
899 c
->dc_free_callback(devid
);
902 spin_unlock(&c
->dc_lock
);
903 /* Why wasn't it found in the list? */
906 EXPORT_SYMBOL_GPL(pnfs_put_deviceid
);
908 /* Find and reference a deviceid */
909 struct pnfs_deviceid_node
*
910 pnfs_find_get_deviceid(struct pnfs_deviceid_cache
*c
, struct nfs4_deviceid
*id
)
912 struct pnfs_deviceid_node
*d
;
913 struct hlist_node
*n
;
914 long hash
= nfs4_deviceid_hash(id
);
916 dprintk("--> %s hash %ld\n", __func__
, hash
);
918 hlist_for_each_entry_rcu(d
, n
, &c
->dc_deviceids
[hash
], de_node
) {
919 if (!memcmp(&d
->de_id
, id
, sizeof(*id
))) {
920 if (!atomic_inc_not_zero(&d
->de_ref
)) {
932 EXPORT_SYMBOL_GPL(pnfs_find_get_deviceid
);
935 * Add a deviceid to the cache.
936 * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new
938 struct pnfs_deviceid_node
*
939 pnfs_add_deviceid(struct pnfs_deviceid_cache
*c
, struct pnfs_deviceid_node
*new)
941 struct pnfs_deviceid_node
*d
;
942 long hash
= nfs4_deviceid_hash(&new->de_id
);
944 dprintk("--> %s hash %ld\n", __func__
, hash
);
945 spin_lock(&c
->dc_lock
);
946 d
= pnfs_find_get_deviceid(c
, &new->de_id
);
948 spin_unlock(&c
->dc_lock
);
949 dprintk("%s [discard]\n", __func__
);
950 c
->dc_free_callback(new);
953 INIT_HLIST_NODE(&new->de_node
);
954 atomic_set(&new->de_ref
, 1);
955 hlist_add_head_rcu(&new->de_node
, &c
->dc_deviceids
[hash
]);
956 spin_unlock(&c
->dc_lock
);
957 dprintk("%s [new]\n", __func__
);
960 EXPORT_SYMBOL_GPL(pnfs_add_deviceid
);
963 pnfs_put_deviceid_cache(struct nfs_client
*clp
)
965 struct pnfs_deviceid_cache
*local
= clp
->cl_devid_cache
;
967 dprintk("--> %s ({%d})\n", __func__
, atomic_read(&local
->dc_ref
));
968 if (atomic_dec_and_lock(&local
->dc_ref
, &clp
->cl_lock
)) {
970 /* Verify cache is empty */
971 for (i
= 0; i
< NFS4_DEVICE_ID_HASH_SIZE
; i
++)
972 BUG_ON(!hlist_empty(&local
->dc_deviceids
[i
]));
973 clp
->cl_devid_cache
= NULL
;
974 spin_unlock(&clp
->cl_lock
);
978 EXPORT_SYMBOL_GPL(pnfs_put_deviceid_cache
);