2 * pNFS functions to call and manage layout drivers.
4 * Copyright (c) 2002 [year of first publication]
5 * The Regents of the University of Michigan
8 * Dean Hildebrand <dhildebz@umich.edu>
10 * Permission is granted to use, copy, create derivative works, and
11 * redistribute this software and such derivative works for any purpose,
12 * so long as the name of the University of Michigan is not used in
13 * any advertising or publicity pertaining to the use or distribution
14 * of this software without specific, written prior authorization. If
15 * the above copyright notice or any other identification of the
16 * University of Michigan is included in any copy of any portion of
17 * this software, then the disclaimer below must also be included.
19 * This software is provided as is, without representation or warranty
20 * of any kind either express or implied, including without limitation
21 * the implied warranties of merchantability, fitness for a particular
22 * purpose, or noninfringement. The Regents of the University of
23 * Michigan shall not be liable for any damages, including special,
24 * indirect, incidental, or consequential damages, with respect to any
25 * claim arising out of or in connection with the use of the software,
26 * even if it has been or is hereafter advised of the possibility of
30 #include <linux/nfs_fs.h>
34 #define NFSDBG_FACILITY NFSDBG_PNFS
39 * protects pnfs_modules_tbl.
41 static DEFINE_SPINLOCK(pnfs_spinlock
);
44 * pnfs_modules_tbl holds all pnfs modules
46 static LIST_HEAD(pnfs_modules_tbl
);
48 /* Return the registered pnfs layout driver module matching given id */
49 static struct pnfs_layoutdriver_type
*
50 find_pnfs_driver_locked(u32 id
)
52 struct pnfs_layoutdriver_type
*local
;
54 list_for_each_entry(local
, &pnfs_modules_tbl
, pnfs_tblid
)
59 dprintk("%s: Searching for id %u, found %p\n", __func__
, id
, local
);
63 static struct pnfs_layoutdriver_type
*
64 find_pnfs_driver(u32 id
)
66 struct pnfs_layoutdriver_type
*local
;
68 spin_lock(&pnfs_spinlock
);
69 local
= find_pnfs_driver_locked(id
);
70 spin_unlock(&pnfs_spinlock
);
75 unset_pnfs_layoutdriver(struct nfs_server
*nfss
)
77 if (nfss
->pnfs_curr_ld
) {
78 nfss
->pnfs_curr_ld
->clear_layoutdriver(nfss
);
79 module_put(nfss
->pnfs_curr_ld
->owner
);
81 nfss
->pnfs_curr_ld
= NULL
;
85 * Try to set the server's pnfs module to the pnfs layout type specified by id.
86 * Currently only one pNFS layout driver per filesystem is supported.
88 * @id layout type. Zero (illegal layout type) indicates pNFS not in use.
91 set_pnfs_layoutdriver(struct nfs_server
*server
, u32 id
)
93 struct pnfs_layoutdriver_type
*ld_type
= NULL
;
97 if (!(server
->nfs_client
->cl_exchange_flags
&
98 (EXCHGID4_FLAG_USE_NON_PNFS
| EXCHGID4_FLAG_USE_PNFS_MDS
))) {
99 printk(KERN_ERR
"%s: id %u cl_exchange_flags 0x%x\n", __func__
,
100 id
, server
->nfs_client
->cl_exchange_flags
);
103 ld_type
= find_pnfs_driver(id
);
105 request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX
, id
);
106 ld_type
= find_pnfs_driver(id
);
108 dprintk("%s: No pNFS module found for %u.\n",
113 if (!try_module_get(ld_type
->owner
)) {
114 dprintk("%s: Could not grab reference on module\n", __func__
);
117 server
->pnfs_curr_ld
= ld_type
;
118 if (ld_type
->set_layoutdriver(server
)) {
120 "%s: Error initializing mount point for layout driver %u.\n",
122 module_put(ld_type
->owner
);
125 dprintk("%s: pNFS module for %u set\n", __func__
, id
);
129 dprintk("%s: Using NFSv4 I/O\n", __func__
);
130 server
->pnfs_curr_ld
= NULL
;
134 pnfs_register_layoutdriver(struct pnfs_layoutdriver_type
*ld_type
)
136 int status
= -EINVAL
;
137 struct pnfs_layoutdriver_type
*tmp
;
139 if (ld_type
->id
== 0) {
140 printk(KERN_ERR
"%s id 0 is reserved\n", __func__
);
143 if (!ld_type
->alloc_lseg
|| !ld_type
->free_lseg
) {
144 printk(KERN_ERR
"%s Layout driver must provide "
145 "alloc_lseg and free_lseg.\n", __func__
);
149 spin_lock(&pnfs_spinlock
);
150 tmp
= find_pnfs_driver_locked(ld_type
->id
);
152 list_add(&ld_type
->pnfs_tblid
, &pnfs_modules_tbl
);
154 dprintk("%s Registering id:%u name:%s\n", __func__
, ld_type
->id
,
157 printk(KERN_ERR
"%s Module with id %d already loaded!\n",
158 __func__
, ld_type
->id
);
160 spin_unlock(&pnfs_spinlock
);
164 EXPORT_SYMBOL_GPL(pnfs_register_layoutdriver
);
167 pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type
*ld_type
)
169 dprintk("%s Deregistering id:%u\n", __func__
, ld_type
->id
);
170 spin_lock(&pnfs_spinlock
);
171 list_del(&ld_type
->pnfs_tblid
);
172 spin_unlock(&pnfs_spinlock
);
174 EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver
);
177 * pNFS client layout cache
180 /* Need to hold i_lock if caller does not already hold reference */
182 get_layout_hdr(struct pnfs_layout_hdr
*lo
)
184 atomic_inc(&lo
->plh_refcount
);
188 destroy_layout_hdr(struct pnfs_layout_hdr
*lo
)
190 dprintk("%s: freeing layout cache %p\n", __func__
, lo
);
191 BUG_ON(!list_empty(&lo
->plh_layouts
));
192 NFS_I(lo
->plh_inode
)->layout
= NULL
;
197 put_layout_hdr_locked(struct pnfs_layout_hdr
*lo
)
199 if (atomic_dec_and_test(&lo
->plh_refcount
))
200 destroy_layout_hdr(lo
);
204 put_layout_hdr(struct pnfs_layout_hdr
*lo
)
206 struct inode
*inode
= lo
->plh_inode
;
208 if (atomic_dec_and_lock(&lo
->plh_refcount
, &inode
->i_lock
)) {
209 destroy_layout_hdr(lo
);
210 spin_unlock(&inode
->i_lock
);
215 init_lseg(struct pnfs_layout_hdr
*lo
, struct pnfs_layout_segment
*lseg
)
217 INIT_LIST_HEAD(&lseg
->pls_list
);
218 atomic_set(&lseg
->pls_refcount
, 1);
220 set_bit(NFS_LSEG_VALID
, &lseg
->pls_flags
);
221 lseg
->pls_layout
= lo
;
224 static void free_lseg(struct pnfs_layout_segment
*lseg
)
226 struct inode
*ino
= lseg
->pls_layout
->plh_inode
;
228 NFS_SERVER(ino
)->pnfs_curr_ld
->free_lseg(lseg
);
229 /* Matched by get_layout_hdr in pnfs_insert_layout */
230 put_layout_hdr(NFS_I(ino
)->layout
);
233 /* The use of tmp_list is necessary because pnfs_curr_ld->free_lseg
234 * could sleep, so must be called outside of the lock.
235 * Returns 1 if object was removed, otherwise return 0.
238 put_lseg_locked(struct pnfs_layout_segment
*lseg
,
239 struct list_head
*tmp_list
)
241 dprintk("%s: lseg %p ref %d valid %d\n", __func__
, lseg
,
242 atomic_read(&lseg
->pls_refcount
),
243 test_bit(NFS_LSEG_VALID
, &lseg
->pls_flags
));
244 if (atomic_dec_and_test(&lseg
->pls_refcount
)) {
245 struct inode
*ino
= lseg
->pls_layout
->plh_inode
;
247 BUG_ON(test_bit(NFS_LSEG_VALID
, &lseg
->pls_flags
));
248 list_del(&lseg
->pls_list
);
249 if (list_empty(&lseg
->pls_layout
->plh_segs
)) {
250 struct nfs_client
*clp
;
252 clp
= NFS_SERVER(ino
)->nfs_client
;
253 spin_lock(&clp
->cl_lock
);
254 /* List does not take a reference, so no need for put here */
255 list_del_init(&lseg
->pls_layout
->plh_layouts
);
256 spin_unlock(&clp
->cl_lock
);
257 clear_bit(NFS_LAYOUT_BULK_RECALL
, &lseg
->pls_layout
->plh_flags
);
259 rpc_wake_up(&NFS_SERVER(ino
)->roc_rpcwaitq
);
260 list_add(&lseg
->pls_list
, tmp_list
);
267 should_free_lseg(u32 lseg_iomode
, u32 recall_iomode
)
269 return (recall_iomode
== IOMODE_ANY
||
270 lseg_iomode
== recall_iomode
);
273 /* Returns 1 if lseg is removed from list, 0 otherwise */
274 static int mark_lseg_invalid(struct pnfs_layout_segment
*lseg
,
275 struct list_head
*tmp_list
)
279 if (test_and_clear_bit(NFS_LSEG_VALID
, &lseg
->pls_flags
)) {
280 /* Remove the reference keeping the lseg in the
281 * list. It will now be removed when all
282 * outstanding io is finished.
284 rv
= put_lseg_locked(lseg
, tmp_list
);
289 /* Returns count of number of matching invalid lsegs remaining in list
293 mark_matching_lsegs_invalid(struct pnfs_layout_hdr
*lo
,
294 struct list_head
*tmp_list
,
297 struct pnfs_layout_segment
*lseg
, *next
;
298 int invalid
= 0, removed
= 0;
300 dprintk("%s:Begin lo %p\n", __func__
, lo
);
302 list_for_each_entry_safe(lseg
, next
, &lo
->plh_segs
, pls_list
)
303 if (should_free_lseg(lseg
->pls_range
.iomode
, iomode
)) {
304 dprintk("%s: freeing lseg %p iomode %d "
305 "offset %llu length %llu\n", __func__
,
306 lseg
, lseg
->pls_range
.iomode
, lseg
->pls_range
.offset
,
307 lseg
->pls_range
.length
);
309 removed
+= mark_lseg_invalid(lseg
, tmp_list
);
311 dprintk("%s:Return %i\n", __func__
, invalid
- removed
);
312 return invalid
- removed
;
316 pnfs_free_lseg_list(struct list_head
*free_me
)
318 struct pnfs_layout_segment
*lseg
, *tmp
;
320 list_for_each_entry_safe(lseg
, tmp
, free_me
, pls_list
) {
321 list_del(&lseg
->pls_list
);
327 pnfs_destroy_layout(struct nfs_inode
*nfsi
)
329 struct pnfs_layout_hdr
*lo
;
332 spin_lock(&nfsi
->vfs_inode
.i_lock
);
335 set_bit(NFS_LAYOUT_DESTROYED
, &nfsi
->layout
->plh_flags
);
336 mark_matching_lsegs_invalid(lo
, &tmp_list
, IOMODE_ANY
);
337 /* Matched by refcount set to 1 in alloc_init_layout_hdr */
338 put_layout_hdr_locked(lo
);
340 spin_unlock(&nfsi
->vfs_inode
.i_lock
);
341 pnfs_free_lseg_list(&tmp_list
);
345 * Called by the state manger to remove all layouts established under an
349 pnfs_destroy_all_layouts(struct nfs_client
*clp
)
351 struct pnfs_layout_hdr
*lo
;
354 spin_lock(&clp
->cl_lock
);
355 list_splice_init(&clp
->cl_layouts
, &tmp_list
);
356 spin_unlock(&clp
->cl_lock
);
358 while (!list_empty(&tmp_list
)) {
359 lo
= list_entry(tmp_list
.next
, struct pnfs_layout_hdr
,
361 dprintk("%s freeing layout for inode %lu\n", __func__
,
362 lo
->plh_inode
->i_ino
);
363 pnfs_destroy_layout(NFS_I(lo
->plh_inode
));
367 /* update lo->plh_stateid with new if is more recent */
369 pnfs_set_layout_stateid(struct pnfs_layout_hdr
*lo
, const nfs4_stateid
*new,
374 oldseq
= be32_to_cpu(lo
->plh_stateid
.stateid
.seqid
);
375 newseq
= be32_to_cpu(new->stateid
.seqid
);
376 if ((int)(newseq
- oldseq
) > 0) {
377 memcpy(&lo
->plh_stateid
, &new->stateid
, sizeof(new->stateid
));
378 if (update_barrier
) {
379 u32 new_barrier
= be32_to_cpu(new->stateid
.seqid
);
381 if ((int)(new_barrier
- lo
->plh_barrier
))
382 lo
->plh_barrier
= new_barrier
;
384 /* Because of wraparound, we want to keep the barrier
385 * "close" to the current seqids. It needs to be
386 * within 2**31 to count as "behind", so if it
387 * gets too near that limit, give us a litle leeway
388 * and bring it to within 2**30.
389 * NOTE - and yes, this is all unsigned arithmetic.
391 if (unlikely((newseq
- lo
->plh_barrier
) > (3 << 29)))
392 lo
->plh_barrier
= newseq
- (1 << 30);
397 /* lget is set to 1 if called from inside send_layoutget call chain */
399 pnfs_layoutgets_blocked(struct pnfs_layout_hdr
*lo
, nfs4_stateid
*stateid
,
403 (int)(lo
->plh_barrier
- be32_to_cpu(stateid
->stateid
.seqid
)) >= 0)
405 return lo
->plh_block_lgets
||
406 test_bit(NFS_LAYOUT_BULK_RECALL
, &lo
->plh_flags
) ||
407 (list_empty(&lo
->plh_segs
) &&
408 (atomic_read(&lo
->plh_outstanding
) > lget
));
412 pnfs_choose_layoutget_stateid(nfs4_stateid
*dst
, struct pnfs_layout_hdr
*lo
,
413 struct nfs4_state
*open_state
)
417 dprintk("--> %s\n", __func__
);
418 spin_lock(&lo
->plh_inode
->i_lock
);
419 if (pnfs_layoutgets_blocked(lo
, NULL
, 1)) {
421 } else if (list_empty(&lo
->plh_segs
)) {
425 seq
= read_seqbegin(&open_state
->seqlock
);
426 memcpy(dst
->data
, open_state
->stateid
.data
,
427 sizeof(open_state
->stateid
.data
));
428 } while (read_seqretry(&open_state
->seqlock
, seq
));
430 memcpy(dst
->data
, lo
->plh_stateid
.data
, sizeof(lo
->plh_stateid
.data
));
431 spin_unlock(&lo
->plh_inode
->i_lock
);
432 dprintk("<-- %s\n", __func__
);
437 * Get layout from server.
438 * for now, assume that whole file layouts are requested.
440 * arg->length: all ones
442 static struct pnfs_layout_segment
*
443 send_layoutget(struct pnfs_layout_hdr
*lo
,
444 struct nfs_open_context
*ctx
,
447 struct inode
*ino
= lo
->plh_inode
;
448 struct nfs_server
*server
= NFS_SERVER(ino
);
449 struct nfs4_layoutget
*lgp
;
450 struct pnfs_layout_segment
*lseg
= NULL
;
452 dprintk("--> %s\n", __func__
);
455 lgp
= kzalloc(sizeof(*lgp
), GFP_KERNEL
);
458 lgp
->args
.minlength
= NFS4_MAX_UINT64
;
459 lgp
->args
.maxcount
= PNFS_LAYOUT_MAXSIZE
;
460 lgp
->args
.range
.iomode
= iomode
;
461 lgp
->args
.range
.offset
= 0;
462 lgp
->args
.range
.length
= NFS4_MAX_UINT64
;
463 lgp
->args
.type
= server
->pnfs_curr_ld
->id
;
464 lgp
->args
.inode
= ino
;
465 lgp
->args
.ctx
= get_nfs_open_context(ctx
);
468 /* Synchronously retrieve layout information from server and
471 nfs4_proc_layoutget(lgp
);
473 /* remember that LAYOUTGET failed and suspend trying */
474 set_bit(lo_fail_bit(iomode
), &lo
->plh_flags
);
479 bool pnfs_roc(struct inode
*ino
)
481 struct pnfs_layout_hdr
*lo
;
482 struct pnfs_layout_segment
*lseg
, *tmp
;
486 spin_lock(&ino
->i_lock
);
487 lo
= NFS_I(ino
)->layout
;
488 if (!lo
|| !test_and_clear_bit(NFS_LAYOUT_ROC
, &lo
->plh_flags
) ||
489 test_bit(NFS_LAYOUT_BULK_RECALL
, &lo
->plh_flags
))
491 list_for_each_entry_safe(lseg
, tmp
, &lo
->plh_segs
, pls_list
)
492 if (test_bit(NFS_LSEG_ROC
, &lseg
->pls_flags
)) {
493 mark_lseg_invalid(lseg
, &tmp_list
);
498 lo
->plh_block_lgets
++;
499 get_layout_hdr(lo
); /* matched in pnfs_roc_release */
500 spin_unlock(&ino
->i_lock
);
501 pnfs_free_lseg_list(&tmp_list
);
505 spin_unlock(&ino
->i_lock
);
509 void pnfs_roc_release(struct inode
*ino
)
511 struct pnfs_layout_hdr
*lo
;
513 spin_lock(&ino
->i_lock
);
514 lo
= NFS_I(ino
)->layout
;
515 lo
->plh_block_lgets
--;
516 put_layout_hdr_locked(lo
);
517 spin_unlock(&ino
->i_lock
);
520 void pnfs_roc_set_barrier(struct inode
*ino
, u32 barrier
)
522 struct pnfs_layout_hdr
*lo
;
524 spin_lock(&ino
->i_lock
);
525 lo
= NFS_I(ino
)->layout
;
526 if ((int)(barrier
- lo
->plh_barrier
) > 0)
527 lo
->plh_barrier
= barrier
;
528 spin_unlock(&ino
->i_lock
);
531 bool pnfs_roc_drain(struct inode
*ino
, u32
*barrier
)
533 struct nfs_inode
*nfsi
= NFS_I(ino
);
534 struct pnfs_layout_segment
*lseg
;
537 spin_lock(&ino
->i_lock
);
538 list_for_each_entry(lseg
, &nfsi
->layout
->plh_segs
, pls_list
)
539 if (test_bit(NFS_LSEG_ROC
, &lseg
->pls_flags
)) {
544 struct pnfs_layout_hdr
*lo
= nfsi
->layout
;
545 u32 current_seqid
= be32_to_cpu(lo
->plh_stateid
.stateid
.seqid
);
547 /* Since close does not return a layout stateid for use as
548 * a barrier, we choose the worst-case barrier.
550 *barrier
= current_seqid
+ atomic_read(&lo
->plh_outstanding
);
552 spin_unlock(&ino
->i_lock
);
557 * Compare two layout segments for sorting into layout cache.
558 * We want to preferentially return RW over RO layouts, so ensure those
562 cmp_layout(u32 iomode1
, u32 iomode2
)
564 /* read > read/write */
565 return (int)(iomode2
== IOMODE_READ
) - (int)(iomode1
== IOMODE_READ
);
569 pnfs_insert_layout(struct pnfs_layout_hdr
*lo
,
570 struct pnfs_layout_segment
*lseg
)
572 struct pnfs_layout_segment
*lp
;
575 dprintk("%s:Begin\n", __func__
);
577 assert_spin_locked(&lo
->plh_inode
->i_lock
);
578 list_for_each_entry(lp
, &lo
->plh_segs
, pls_list
) {
579 if (cmp_layout(lp
->pls_range
.iomode
, lseg
->pls_range
.iomode
) > 0)
581 list_add_tail(&lseg
->pls_list
, &lp
->pls_list
);
582 dprintk("%s: inserted lseg %p "
583 "iomode %d offset %llu length %llu before "
584 "lp %p iomode %d offset %llu length %llu\n",
585 __func__
, lseg
, lseg
->pls_range
.iomode
,
586 lseg
->pls_range
.offset
, lseg
->pls_range
.length
,
587 lp
, lp
->pls_range
.iomode
, lp
->pls_range
.offset
,
588 lp
->pls_range
.length
);
593 list_add_tail(&lseg
->pls_list
, &lo
->plh_segs
);
594 dprintk("%s: inserted lseg %p "
595 "iomode %d offset %llu length %llu at tail\n",
596 __func__
, lseg
, lseg
->pls_range
.iomode
,
597 lseg
->pls_range
.offset
, lseg
->pls_range
.length
);
601 dprintk("%s:Return\n", __func__
);
604 static struct pnfs_layout_hdr
*
605 alloc_init_layout_hdr(struct inode
*ino
)
607 struct pnfs_layout_hdr
*lo
;
609 lo
= kzalloc(sizeof(struct pnfs_layout_hdr
), GFP_KERNEL
);
612 atomic_set(&lo
->plh_refcount
, 1);
613 INIT_LIST_HEAD(&lo
->plh_layouts
);
614 INIT_LIST_HEAD(&lo
->plh_segs
);
615 INIT_LIST_HEAD(&lo
->plh_bulk_recall
);
620 static struct pnfs_layout_hdr
*
621 pnfs_find_alloc_layout(struct inode
*ino
)
623 struct nfs_inode
*nfsi
= NFS_I(ino
);
624 struct pnfs_layout_hdr
*new = NULL
;
626 dprintk("%s Begin ino=%p layout=%p\n", __func__
, ino
, nfsi
->layout
);
628 assert_spin_locked(&ino
->i_lock
);
630 if (test_bit(NFS_LAYOUT_DESTROYED
, &nfsi
->layout
->plh_flags
))
635 spin_unlock(&ino
->i_lock
);
636 new = alloc_init_layout_hdr(ino
);
637 spin_lock(&ino
->i_lock
);
639 if (likely(nfsi
->layout
== NULL
)) /* Won the race? */
647 * iomode matching rules:
658 is_matching_lseg(struct pnfs_layout_segment
*lseg
, u32 iomode
)
660 return (iomode
!= IOMODE_RW
|| lseg
->pls_range
.iomode
== IOMODE_RW
);
664 * lookup range in layout
666 static struct pnfs_layout_segment
*
667 pnfs_find_lseg(struct pnfs_layout_hdr
*lo
, u32 iomode
)
669 struct pnfs_layout_segment
*lseg
, *ret
= NULL
;
671 dprintk("%s:Begin\n", __func__
);
673 assert_spin_locked(&lo
->plh_inode
->i_lock
);
674 list_for_each_entry(lseg
, &lo
->plh_segs
, pls_list
) {
675 if (test_bit(NFS_LSEG_VALID
, &lseg
->pls_flags
) &&
676 is_matching_lseg(lseg
, iomode
)) {
680 if (cmp_layout(iomode
, lseg
->pls_range
.iomode
) > 0)
684 dprintk("%s:Return lseg %p ref %d\n",
685 __func__
, ret
, ret
? atomic_read(&ret
->pls_refcount
) : 0);
690 * Layout segment is retreived from the server if not cached.
691 * The appropriate layout segment is referenced and returned to the caller.
693 struct pnfs_layout_segment
*
694 pnfs_update_layout(struct inode
*ino
,
695 struct nfs_open_context
*ctx
,
696 enum pnfs_iomode iomode
)
698 struct nfs_inode
*nfsi
= NFS_I(ino
);
699 struct nfs_client
*clp
= NFS_SERVER(ino
)->nfs_client
;
700 struct pnfs_layout_hdr
*lo
;
701 struct pnfs_layout_segment
*lseg
= NULL
;
703 if (!pnfs_enabled_sb(NFS_SERVER(ino
)))
705 spin_lock(&ino
->i_lock
);
706 lo
= pnfs_find_alloc_layout(ino
);
708 dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__
);
712 /* Do we even need to bother with this? */
713 if (test_bit(NFS4CLNT_LAYOUTRECALL
, &clp
->cl_state
) ||
714 test_bit(NFS_LAYOUT_BULK_RECALL
, &lo
->plh_flags
)) {
715 dprintk("%s matches recall, use MDS\n", __func__
);
718 /* Check to see if the layout for the given range already exists */
719 lseg
= pnfs_find_lseg(lo
, iomode
);
723 /* if LAYOUTGET already failed once we don't try again */
724 if (test_bit(lo_fail_bit(iomode
), &nfsi
->layout
->plh_flags
))
727 if (pnfs_layoutgets_blocked(lo
, NULL
, 0))
729 atomic_inc(&lo
->plh_outstanding
);
732 if (list_empty(&lo
->plh_segs
)) {
733 /* The lo must be on the clp list if there is any
734 * chance of a CB_LAYOUTRECALL(FILE) coming in.
736 spin_lock(&clp
->cl_lock
);
737 BUG_ON(!list_empty(&lo
->plh_layouts
));
738 list_add_tail(&lo
->plh_layouts
, &clp
->cl_layouts
);
739 spin_unlock(&clp
->cl_lock
);
741 spin_unlock(&ino
->i_lock
);
743 lseg
= send_layoutget(lo
, ctx
, iomode
);
745 spin_lock(&ino
->i_lock
);
746 if (list_empty(&lo
->plh_segs
)) {
747 spin_lock(&clp
->cl_lock
);
748 list_del_init(&lo
->plh_layouts
);
749 spin_unlock(&clp
->cl_lock
);
750 clear_bit(NFS_LAYOUT_BULK_RECALL
, &lo
->plh_flags
);
752 spin_unlock(&ino
->i_lock
);
754 atomic_dec(&lo
->plh_outstanding
);
757 dprintk("%s end, state 0x%lx lseg %p\n", __func__
,
758 nfsi
->layout
->plh_flags
, lseg
);
761 spin_unlock(&ino
->i_lock
);
766 pnfs_layout_process(struct nfs4_layoutget
*lgp
)
768 struct pnfs_layout_hdr
*lo
= NFS_I(lgp
->args
.inode
)->layout
;
769 struct nfs4_layoutget_res
*res
= &lgp
->res
;
770 struct pnfs_layout_segment
*lseg
;
771 struct inode
*ino
= lo
->plh_inode
;
772 struct nfs_client
*clp
= NFS_SERVER(ino
)->nfs_client
;
775 /* Verify we got what we asked for.
776 * Note that because the xdr parsing only accepts a single
777 * element array, this can fail even if the server is behaving
780 if (lgp
->args
.range
.iomode
> res
->range
.iomode
||
781 res
->range
.offset
!= 0 ||
782 res
->range
.length
!= NFS4_MAX_UINT64
) {
786 /* Inject layout blob into I/O device driver */
787 lseg
= NFS_SERVER(ino
)->pnfs_curr_ld
->alloc_lseg(lo
, res
);
788 if (!lseg
|| IS_ERR(lseg
)) {
792 status
= PTR_ERR(lseg
);
793 dprintk("%s: Could not allocate layout: error %d\n",
798 spin_lock(&ino
->i_lock
);
799 if (test_bit(NFS4CLNT_LAYOUTRECALL
, &clp
->cl_state
) ||
800 test_bit(NFS_LAYOUT_BULK_RECALL
, &lo
->plh_flags
)) {
801 dprintk("%s forget reply due to recall\n", __func__
);
802 goto out_forget_reply
;
805 if (pnfs_layoutgets_blocked(lo
, &res
->stateid
, 1)) {
806 dprintk("%s forget reply due to state\n", __func__
);
807 goto out_forget_reply
;
810 lseg
->pls_range
= res
->range
;
812 pnfs_insert_layout(lo
, lseg
);
814 if (res
->return_on_close
) {
815 set_bit(NFS_LSEG_ROC
, &lseg
->pls_flags
);
816 set_bit(NFS_LAYOUT_ROC
, &lo
->plh_flags
);
819 /* Done processing layoutget. Set the layout stateid */
820 pnfs_set_layout_stateid(lo
, &res
->stateid
, false);
821 spin_unlock(&ino
->i_lock
);
826 spin_unlock(&ino
->i_lock
);
827 lseg
->pls_layout
= lo
;
828 NFS_SERVER(ino
)->pnfs_curr_ld
->free_lseg(lseg
);
833 * Device ID cache. Currently supports one layout type per struct nfs_client.
834 * Add layout type to the lookup key to expand to support multiple types.
837 pnfs_alloc_init_deviceid_cache(struct nfs_client
*clp
,
838 void (*free_callback
)(struct pnfs_deviceid_node
*))
840 struct pnfs_deviceid_cache
*c
;
842 c
= kzalloc(sizeof(struct pnfs_deviceid_cache
), GFP_KERNEL
);
845 spin_lock(&clp
->cl_lock
);
846 if (clp
->cl_devid_cache
!= NULL
) {
847 atomic_inc(&clp
->cl_devid_cache
->dc_ref
);
848 dprintk("%s [kref [%d]]\n", __func__
,
849 atomic_read(&clp
->cl_devid_cache
->dc_ref
));
852 /* kzalloc initializes hlists */
853 spin_lock_init(&c
->dc_lock
);
854 atomic_set(&c
->dc_ref
, 1);
855 c
->dc_free_callback
= free_callback
;
856 clp
->cl_devid_cache
= c
;
857 dprintk("%s [new]\n", __func__
);
859 spin_unlock(&clp
->cl_lock
);
862 EXPORT_SYMBOL_GPL(pnfs_alloc_init_deviceid_cache
);
865 * Called from pnfs_layoutdriver_type->free_lseg
866 * last layout segment reference frees deviceid
869 pnfs_put_deviceid(struct pnfs_deviceid_cache
*c
,
870 struct pnfs_deviceid_node
*devid
)
872 struct nfs4_deviceid
*id
= &devid
->de_id
;
873 struct pnfs_deviceid_node
*d
;
874 struct hlist_node
*n
;
875 long h
= nfs4_deviceid_hash(id
);
877 dprintk("%s [%d]\n", __func__
, atomic_read(&devid
->de_ref
));
878 if (!atomic_dec_and_lock(&devid
->de_ref
, &c
->dc_lock
))
881 hlist_for_each_entry_rcu(d
, n
, &c
->dc_deviceids
[h
], de_node
)
882 if (!memcmp(&d
->de_id
, id
, sizeof(*id
))) {
883 hlist_del_rcu(&d
->de_node
);
884 spin_unlock(&c
->dc_lock
);
886 c
->dc_free_callback(devid
);
889 spin_unlock(&c
->dc_lock
);
890 /* Why wasn't it found in the list? */
893 EXPORT_SYMBOL_GPL(pnfs_put_deviceid
);
895 /* Find and reference a deviceid */
896 struct pnfs_deviceid_node
*
897 pnfs_find_get_deviceid(struct pnfs_deviceid_cache
*c
, struct nfs4_deviceid
*id
)
899 struct pnfs_deviceid_node
*d
;
900 struct hlist_node
*n
;
901 long hash
= nfs4_deviceid_hash(id
);
903 dprintk("--> %s hash %ld\n", __func__
, hash
);
905 hlist_for_each_entry_rcu(d
, n
, &c
->dc_deviceids
[hash
], de_node
) {
906 if (!memcmp(&d
->de_id
, id
, sizeof(*id
))) {
907 if (!atomic_inc_not_zero(&d
->de_ref
)) {
919 EXPORT_SYMBOL_GPL(pnfs_find_get_deviceid
);
922 * Add a deviceid to the cache.
923 * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new
925 struct pnfs_deviceid_node
*
926 pnfs_add_deviceid(struct pnfs_deviceid_cache
*c
, struct pnfs_deviceid_node
*new)
928 struct pnfs_deviceid_node
*d
;
929 long hash
= nfs4_deviceid_hash(&new->de_id
);
931 dprintk("--> %s hash %ld\n", __func__
, hash
);
932 spin_lock(&c
->dc_lock
);
933 d
= pnfs_find_get_deviceid(c
, &new->de_id
);
935 spin_unlock(&c
->dc_lock
);
936 dprintk("%s [discard]\n", __func__
);
937 c
->dc_free_callback(new);
940 INIT_HLIST_NODE(&new->de_node
);
941 atomic_set(&new->de_ref
, 1);
942 hlist_add_head_rcu(&new->de_node
, &c
->dc_deviceids
[hash
]);
943 spin_unlock(&c
->dc_lock
);
944 dprintk("%s [new]\n", __func__
);
947 EXPORT_SYMBOL_GPL(pnfs_add_deviceid
);
950 pnfs_put_deviceid_cache(struct nfs_client
*clp
)
952 struct pnfs_deviceid_cache
*local
= clp
->cl_devid_cache
;
954 dprintk("--> %s cl_devid_cache %p\n", __func__
, clp
->cl_devid_cache
);
955 if (atomic_dec_and_lock(&local
->dc_ref
, &clp
->cl_lock
)) {
957 /* Verify cache is empty */
958 for (i
= 0; i
< NFS4_DEVICE_ID_HASH_SIZE
; i
++)
959 BUG_ON(!hlist_empty(&local
->dc_deviceids
[i
]));
960 clp
->cl_devid_cache
= NULL
;
961 spin_unlock(&clp
->cl_lock
);
965 EXPORT_SYMBOL_GPL(pnfs_put_deviceid_cache
);