2 * Copyright (c) 2014 Christoph Hellwig.
4 #include <linux/kmod.h>
5 #include <linux/file.h>
6 #include <linux/jhash.h>
7 #include <linux/sched.h>
8 #include <linux/sunrpc/addr.h>
13 #define NFSDDBG_FACILITY NFSDDBG_PNFS
16 struct list_head lo_perstate
;
17 struct nfs4_layout_stateid
*lo_state
;
18 struct nfsd4_layout_seg lo_seg
;
21 static struct kmem_cache
*nfs4_layout_cache
;
22 static struct kmem_cache
*nfs4_layout_stateid_cache
;
24 static struct nfsd4_callback_ops nfsd4_cb_layout_ops
;
25 static const struct lock_manager_operations nfsd4_layouts_lm_ops
;
27 const struct nfsd4_layout_ops
*nfsd4_layout_ops
[LAYOUT_TYPE_MAX
] = {
30 /* pNFS device ID to export fsid mapping */
31 #define DEVID_HASH_BITS 8
32 #define DEVID_HASH_SIZE (1 << DEVID_HASH_BITS)
33 #define DEVID_HASH_MASK (DEVID_HASH_SIZE - 1)
34 static u64 nfsd_devid_seq
= 1;
35 static struct list_head nfsd_devid_hash
[DEVID_HASH_SIZE
];
36 static DEFINE_SPINLOCK(nfsd_devid_lock
);
38 static inline u32
devid_hashfn(u64 idx
)
40 return jhash_2words(idx
, idx
>> 32, 0) & DEVID_HASH_MASK
;
44 nfsd4_alloc_devid_map(const struct svc_fh
*fhp
)
46 const struct knfsd_fh
*fh
= &fhp
->fh_handle
;
47 size_t fsid_len
= key_len(fh
->fh_fsid_type
);
48 struct nfsd4_deviceid_map
*map
, *old
;
51 map
= kzalloc(sizeof(*map
) + fsid_len
, GFP_KERNEL
);
55 map
->fsid_type
= fh
->fh_fsid_type
;
56 memcpy(&map
->fsid
, fh
->fh_fsid
, fsid_len
);
58 spin_lock(&nfsd_devid_lock
);
59 if (fhp
->fh_export
->ex_devid_map
)
62 for (i
= 0; i
< DEVID_HASH_SIZE
; i
++) {
63 list_for_each_entry(old
, &nfsd_devid_hash
[i
], hash
) {
64 if (old
->fsid_type
!= fh
->fh_fsid_type
)
66 if (memcmp(old
->fsid
, fh
->fh_fsid
,
67 key_len(old
->fsid_type
)))
70 fhp
->fh_export
->ex_devid_map
= old
;
75 map
->idx
= nfsd_devid_seq
++;
76 list_add_tail_rcu(&map
->hash
, &nfsd_devid_hash
[devid_hashfn(map
->idx
)]);
77 fhp
->fh_export
->ex_devid_map
= map
;
81 spin_unlock(&nfsd_devid_lock
);
85 struct nfsd4_deviceid_map
*
86 nfsd4_find_devid_map(int idx
)
88 struct nfsd4_deviceid_map
*map
, *ret
= NULL
;
91 list_for_each_entry_rcu(map
, &nfsd_devid_hash
[devid_hashfn(idx
)], hash
)
100 nfsd4_set_deviceid(struct nfsd4_deviceid
*id
, const struct svc_fh
*fhp
,
101 u32 device_generation
)
103 if (!fhp
->fh_export
->ex_devid_map
) {
104 nfsd4_alloc_devid_map(fhp
);
105 if (!fhp
->fh_export
->ex_devid_map
)
109 id
->fsid_idx
= fhp
->fh_export
->ex_devid_map
->idx
;
110 id
->generation
= device_generation
;
115 void nfsd4_setup_layout_type(struct svc_export
*exp
)
117 if (exp
->ex_flags
& NFSEXP_NOPNFS
)
122 nfsd4_free_layout_stateid(struct nfs4_stid
*stid
)
124 struct nfs4_layout_stateid
*ls
= layoutstateid(stid
);
125 struct nfs4_client
*clp
= ls
->ls_stid
.sc_client
;
126 struct nfs4_file
*fp
= ls
->ls_stid
.sc_file
;
128 spin_lock(&clp
->cl_lock
);
129 list_del_init(&ls
->ls_perclnt
);
130 spin_unlock(&clp
->cl_lock
);
132 spin_lock(&fp
->fi_lock
);
133 list_del_init(&ls
->ls_perfile
);
134 spin_unlock(&fp
->fi_lock
);
136 vfs_setlease(ls
->ls_file
, F_UNLCK
, NULL
, (void **)&ls
);
140 atomic_dec(&ls
->ls_stid
.sc_file
->fi_lo_recalls
);
142 kmem_cache_free(nfs4_layout_stateid_cache
, ls
);
146 nfsd4_layout_setlease(struct nfs4_layout_stateid
*ls
)
148 struct file_lock
*fl
;
151 fl
= locks_alloc_lock();
155 fl
->fl_lmops
= &nfsd4_layouts_lm_ops
;
156 fl
->fl_flags
= FL_LAYOUT
;
157 fl
->fl_type
= F_RDLCK
;
158 fl
->fl_end
= OFFSET_MAX
;
160 fl
->fl_pid
= current
->tgid
;
161 fl
->fl_file
= ls
->ls_file
;
163 status
= vfs_setlease(fl
->fl_file
, fl
->fl_type
, &fl
, NULL
);
172 static struct nfs4_layout_stateid
*
173 nfsd4_alloc_layout_stateid(struct nfsd4_compound_state
*cstate
,
174 struct nfs4_stid
*parent
, u32 layout_type
)
176 struct nfs4_client
*clp
= cstate
->clp
;
177 struct nfs4_file
*fp
= parent
->sc_file
;
178 struct nfs4_layout_stateid
*ls
;
179 struct nfs4_stid
*stp
;
181 stp
= nfs4_alloc_stid(cstate
->clp
, nfs4_layout_stateid_cache
);
184 stp
->sc_free
= nfsd4_free_layout_stateid
;
188 ls
= layoutstateid(stp
);
189 INIT_LIST_HEAD(&ls
->ls_perclnt
);
190 INIT_LIST_HEAD(&ls
->ls_perfile
);
191 spin_lock_init(&ls
->ls_lock
);
192 INIT_LIST_HEAD(&ls
->ls_layouts
);
193 ls
->ls_layout_type
= layout_type
;
194 nfsd4_init_cb(&ls
->ls_recall
, clp
, &nfsd4_cb_layout_ops
,
195 NFSPROC4_CLNT_CB_LAYOUT
);
197 if (parent
->sc_type
== NFS4_DELEG_STID
)
198 ls
->ls_file
= get_file(fp
->fi_deleg_file
);
200 ls
->ls_file
= find_any_file(fp
);
201 BUG_ON(!ls
->ls_file
);
203 if (nfsd4_layout_setlease(ls
)) {
205 kmem_cache_free(nfs4_layout_stateid_cache
, ls
);
209 spin_lock(&clp
->cl_lock
);
210 stp
->sc_type
= NFS4_LAYOUT_STID
;
211 list_add(&ls
->ls_perclnt
, &clp
->cl_lo_states
);
212 spin_unlock(&clp
->cl_lock
);
214 spin_lock(&fp
->fi_lock
);
215 list_add(&ls
->ls_perfile
, &fp
->fi_lo_states
);
216 spin_unlock(&fp
->fi_lock
);
222 nfsd4_preprocess_layout_stateid(struct svc_rqst
*rqstp
,
223 struct nfsd4_compound_state
*cstate
, stateid_t
*stateid
,
224 bool create
, u32 layout_type
, struct nfs4_layout_stateid
**lsp
)
226 struct nfs4_layout_stateid
*ls
;
227 struct nfs4_stid
*stid
;
228 unsigned char typemask
= NFS4_LAYOUT_STID
;
232 typemask
|= (NFS4_OPEN_STID
| NFS4_LOCK_STID
| NFS4_DELEG_STID
);
234 status
= nfsd4_lookup_stateid(cstate
, stateid
, typemask
, &stid
,
235 net_generic(SVC_NET(rqstp
), nfsd_net_id
));
239 if (!fh_match(&cstate
->current_fh
.fh_handle
,
240 &stid
->sc_file
->fi_fhandle
)) {
241 status
= nfserr_bad_stateid
;
245 if (stid
->sc_type
!= NFS4_LAYOUT_STID
) {
246 ls
= nfsd4_alloc_layout_stateid(cstate
, stid
, layout_type
);
249 status
= nfserr_jukebox
;
253 ls
= container_of(stid
, struct nfs4_layout_stateid
, ls_stid
);
255 status
= nfserr_bad_stateid
;
256 if (stateid
->si_generation
> stid
->sc_stateid
.si_generation
)
258 if (layout_type
!= ls
->ls_layout_type
)
272 nfsd4_recall_file_layout(struct nfs4_layout_stateid
*ls
)
274 spin_lock(&ls
->ls_lock
);
278 ls
->ls_recalled
= true;
279 atomic_inc(&ls
->ls_stid
.sc_file
->fi_lo_recalls
);
280 if (list_empty(&ls
->ls_layouts
))
283 atomic_inc(&ls
->ls_stid
.sc_count
);
284 update_stateid(&ls
->ls_stid
.sc_stateid
);
285 memcpy(&ls
->ls_recall_sid
, &ls
->ls_stid
.sc_stateid
, sizeof(stateid_t
));
286 nfsd4_run_cb(&ls
->ls_recall
);
289 spin_unlock(&ls
->ls_lock
);
293 layout_end(struct nfsd4_layout_seg
*seg
)
295 u64 end
= seg
->offset
+ seg
->length
;
296 return end
>= seg
->offset
? end
: NFS4_MAX_UINT64
;
300 layout_update_len(struct nfsd4_layout_seg
*lo
, u64 end
)
302 if (end
== NFS4_MAX_UINT64
)
303 lo
->length
= NFS4_MAX_UINT64
;
305 lo
->length
= end
- lo
->offset
;
309 layouts_overlapping(struct nfs4_layout
*lo
, struct nfsd4_layout_seg
*s
)
311 if (s
->iomode
!= IOMODE_ANY
&& s
->iomode
!= lo
->lo_seg
.iomode
)
313 if (layout_end(&lo
->lo_seg
) <= s
->offset
)
315 if (layout_end(s
) <= lo
->lo_seg
.offset
)
321 layouts_try_merge(struct nfsd4_layout_seg
*lo
, struct nfsd4_layout_seg
*new)
323 if (lo
->iomode
!= new->iomode
)
325 if (layout_end(new) < lo
->offset
)
327 if (layout_end(lo
) < new->offset
)
330 lo
->offset
= min(lo
->offset
, new->offset
);
331 layout_update_len(lo
, max(layout_end(lo
), layout_end(new)));
336 nfsd4_recall_conflict(struct nfs4_layout_stateid
*ls
)
338 struct nfs4_file
*fp
= ls
->ls_stid
.sc_file
;
339 struct nfs4_layout_stateid
*l
, *n
;
340 __be32 nfserr
= nfs_ok
;
342 assert_spin_locked(&fp
->fi_lock
);
344 list_for_each_entry_safe(l
, n
, &fp
->fi_lo_states
, ls_perfile
) {
346 nfsd4_recall_file_layout(l
);
347 nfserr
= nfserr_recallconflict
;
355 nfsd4_insert_layout(struct nfsd4_layoutget
*lgp
, struct nfs4_layout_stateid
*ls
)
357 struct nfsd4_layout_seg
*seg
= &lgp
->lg_seg
;
358 struct nfs4_file
*fp
= ls
->ls_stid
.sc_file
;
359 struct nfs4_layout
*lp
, *new = NULL
;
362 spin_lock(&fp
->fi_lock
);
363 nfserr
= nfsd4_recall_conflict(ls
);
366 spin_lock(&ls
->ls_lock
);
367 list_for_each_entry(lp
, &ls
->ls_layouts
, lo_perstate
) {
368 if (layouts_try_merge(&lp
->lo_seg
, seg
))
371 spin_unlock(&ls
->ls_lock
);
372 spin_unlock(&fp
->fi_lock
);
374 new = kmem_cache_alloc(nfs4_layout_cache
, GFP_KERNEL
);
376 return nfserr_jukebox
;
377 memcpy(&new->lo_seg
, seg
, sizeof(lp
->lo_seg
));
380 spin_lock(&fp
->fi_lock
);
381 nfserr
= nfsd4_recall_conflict(ls
);
384 spin_lock(&ls
->ls_lock
);
385 list_for_each_entry(lp
, &ls
->ls_layouts
, lo_perstate
) {
386 if (layouts_try_merge(&lp
->lo_seg
, seg
))
390 atomic_inc(&ls
->ls_stid
.sc_count
);
391 list_add_tail(&new->lo_perstate
, &ls
->ls_layouts
);
394 update_stateid(&ls
->ls_stid
.sc_stateid
);
395 memcpy(&lgp
->lg_sid
, &ls
->ls_stid
.sc_stateid
, sizeof(stateid_t
));
396 spin_unlock(&ls
->ls_lock
);
398 spin_unlock(&fp
->fi_lock
);
400 kmem_cache_free(nfs4_layout_cache
, new);
405 nfsd4_free_layouts(struct list_head
*reaplist
)
407 while (!list_empty(reaplist
)) {
408 struct nfs4_layout
*lp
= list_first_entry(reaplist
,
409 struct nfs4_layout
, lo_perstate
);
411 list_del(&lp
->lo_perstate
);
412 nfs4_put_stid(&lp
->lo_state
->ls_stid
);
413 kmem_cache_free(nfs4_layout_cache
, lp
);
418 nfsd4_return_file_layout(struct nfs4_layout
*lp
, struct nfsd4_layout_seg
*seg
,
419 struct list_head
*reaplist
)
421 struct nfsd4_layout_seg
*lo
= &lp
->lo_seg
;
422 u64 end
= layout_end(lo
);
424 if (seg
->offset
<= lo
->offset
) {
425 if (layout_end(seg
) >= end
) {
426 list_move_tail(&lp
->lo_perstate
, reaplist
);
431 /* retain the whole layout segment on a split. */
432 if (layout_end(seg
) < end
) {
433 dprintk("%s: split not supported\n", __func__
);
437 lo
->offset
= layout_end(seg
);
440 layout_update_len(lo
, end
);
444 nfsd4_return_file_layouts(struct svc_rqst
*rqstp
,
445 struct nfsd4_compound_state
*cstate
,
446 struct nfsd4_layoutreturn
*lrp
)
448 struct nfs4_layout_stateid
*ls
;
449 struct nfs4_layout
*lp
, *n
;
454 nfserr
= nfsd4_preprocess_layout_stateid(rqstp
, cstate
, &lrp
->lr_sid
,
455 false, lrp
->lr_layout_type
,
460 spin_lock(&ls
->ls_lock
);
461 list_for_each_entry_safe(lp
, n
, &ls
->ls_layouts
, lo_perstate
) {
462 if (layouts_overlapping(lp
, &lrp
->lr_seg
)) {
463 nfsd4_return_file_layout(lp
, &lrp
->lr_seg
, &reaplist
);
467 if (!list_empty(&ls
->ls_layouts
)) {
469 update_stateid(&ls
->ls_stid
.sc_stateid
);
470 memcpy(&lrp
->lr_sid
, &ls
->ls_stid
.sc_stateid
,
473 lrp
->lrs_present
= 1;
475 nfs4_unhash_stid(&ls
->ls_stid
);
476 lrp
->lrs_present
= 0;
478 spin_unlock(&ls
->ls_lock
);
480 nfs4_put_stid(&ls
->ls_stid
);
481 nfsd4_free_layouts(&reaplist
);
486 nfsd4_return_client_layouts(struct svc_rqst
*rqstp
,
487 struct nfsd4_compound_state
*cstate
,
488 struct nfsd4_layoutreturn
*lrp
)
490 struct nfs4_layout_stateid
*ls
, *n
;
491 struct nfs4_client
*clp
= cstate
->clp
;
492 struct nfs4_layout
*lp
, *t
;
495 lrp
->lrs_present
= 0;
497 spin_lock(&clp
->cl_lock
);
498 list_for_each_entry_safe(ls
, n
, &clp
->cl_lo_states
, ls_perclnt
) {
499 if (lrp
->lr_return_type
== RETURN_FSID
&&
500 !fh_fsid_match(&ls
->ls_stid
.sc_file
->fi_fhandle
,
501 &cstate
->current_fh
.fh_handle
))
504 spin_lock(&ls
->ls_lock
);
505 list_for_each_entry_safe(lp
, t
, &ls
->ls_layouts
, lo_perstate
) {
506 if (lrp
->lr_seg
.iomode
== IOMODE_ANY
||
507 lrp
->lr_seg
.iomode
== lp
->lo_seg
.iomode
)
508 list_move_tail(&lp
->lo_perstate
, &reaplist
);
510 spin_unlock(&ls
->ls_lock
);
512 spin_unlock(&clp
->cl_lock
);
514 nfsd4_free_layouts(&reaplist
);
519 nfsd4_return_all_layouts(struct nfs4_layout_stateid
*ls
,
520 struct list_head
*reaplist
)
522 spin_lock(&ls
->ls_lock
);
523 list_splice_init(&ls
->ls_layouts
, reaplist
);
524 spin_unlock(&ls
->ls_lock
);
528 nfsd4_return_all_client_layouts(struct nfs4_client
*clp
)
530 struct nfs4_layout_stateid
*ls
, *n
;
533 spin_lock(&clp
->cl_lock
);
534 list_for_each_entry_safe(ls
, n
, &clp
->cl_lo_states
, ls_perclnt
)
535 nfsd4_return_all_layouts(ls
, &reaplist
);
536 spin_unlock(&clp
->cl_lock
);
538 nfsd4_free_layouts(&reaplist
);
542 nfsd4_return_all_file_layouts(struct nfs4_client
*clp
, struct nfs4_file
*fp
)
544 struct nfs4_layout_stateid
*ls
, *n
;
547 spin_lock(&fp
->fi_lock
);
548 list_for_each_entry_safe(ls
, n
, &fp
->fi_lo_states
, ls_perfile
) {
549 if (ls
->ls_stid
.sc_client
== clp
)
550 nfsd4_return_all_layouts(ls
, &reaplist
);
552 spin_unlock(&fp
->fi_lock
);
554 nfsd4_free_layouts(&reaplist
);
558 nfsd4_cb_layout_fail(struct nfs4_layout_stateid
*ls
)
560 struct nfs4_client
*clp
= ls
->ls_stid
.sc_client
;
561 char addr_str
[INET6_ADDRSTRLEN
];
562 static char *envp
[] = {
565 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
571 rpc_ntop((struct sockaddr
*)&clp
->cl_addr
, addr_str
, sizeof(addr_str
));
574 "nfsd: client %s failed to respond to layout recall. "
575 " Fencing..\n", addr_str
);
577 argv
[0] = "/sbin/nfsd-recall-failed";
579 argv
[2] = ls
->ls_file
->f_path
.mnt
->mnt_sb
->s_id
;
582 error
= call_usermodehelper(argv
[0], argv
, envp
, UMH_WAIT_PROC
);
584 printk(KERN_ERR
"nfsd: fence failed for client %s: %d!\n",
590 nfsd4_cb_layout_done(struct nfsd4_callback
*cb
, struct rpc_task
*task
)
592 struct nfs4_layout_stateid
*ls
=
593 container_of(cb
, struct nfs4_layout_stateid
, ls_recall
);
596 switch (task
->tk_status
) {
599 case -NFS4ERR_NOMATCHING_LAYOUT
:
603 /* Poll the client until it's done with the layout */
604 /* FIXME: cap number of retries.
605 * The pnfs standard states that we need to only expire
606 * the client after at-least "lease time" .eg lease-time * 2
607 * when failing to communicate a recall
609 rpc_delay(task
, HZ
/100); /* 10 mili-seconds */
613 * Unknown error or non-responding client, we'll need to fence.
615 nfsd4_cb_layout_fail(ls
);
621 nfsd4_cb_layout_release(struct nfsd4_callback
*cb
)
623 struct nfs4_layout_stateid
*ls
=
624 container_of(cb
, struct nfs4_layout_stateid
, ls_recall
);
627 nfsd4_return_all_layouts(ls
, &reaplist
);
628 nfsd4_free_layouts(&reaplist
);
629 nfs4_put_stid(&ls
->ls_stid
);
632 static struct nfsd4_callback_ops nfsd4_cb_layout_ops
= {
633 .done
= nfsd4_cb_layout_done
,
634 .release
= nfsd4_cb_layout_release
,
638 nfsd4_layout_lm_break(struct file_lock
*fl
)
641 * We don't want the locks code to timeout the lease for us;
642 * we'll remove it ourself if a layout isn't returned
645 fl
->fl_break_time
= 0;
646 nfsd4_recall_file_layout(fl
->fl_owner
);
651 nfsd4_layout_lm_change(struct file_lock
*onlist
, int arg
,
652 struct list_head
*dispose
)
654 BUG_ON(!(arg
& F_UNLCK
));
655 return lease_modify(onlist
, arg
, dispose
);
658 static const struct lock_manager_operations nfsd4_layouts_lm_ops
= {
659 .lm_break
= nfsd4_layout_lm_break
,
660 .lm_change
= nfsd4_layout_lm_change
,
664 nfsd4_init_pnfs(void)
668 for (i
= 0; i
< DEVID_HASH_SIZE
; i
++)
669 INIT_LIST_HEAD(&nfsd_devid_hash
[i
]);
671 nfs4_layout_cache
= kmem_cache_create("nfs4_layout",
672 sizeof(struct nfs4_layout
), 0, 0, NULL
);
673 if (!nfs4_layout_cache
)
676 nfs4_layout_stateid_cache
= kmem_cache_create("nfs4_layout_stateid",
677 sizeof(struct nfs4_layout_stateid
), 0, 0, NULL
);
678 if (!nfs4_layout_stateid_cache
) {
679 kmem_cache_destroy(nfs4_layout_cache
);
686 nfsd4_exit_pnfs(void)
690 kmem_cache_destroy(nfs4_layout_cache
);
691 kmem_cache_destroy(nfs4_layout_stateid_cache
);
693 for (i
= 0; i
< DEVID_HASH_SIZE
; i
++) {
694 struct nfsd4_deviceid_map
*map
, *n
;
696 list_for_each_entry_safe(map
, n
, &nfsd_devid_hash
[i
], hash
)