2 * pNFS functions to call and manage layout drivers.
4 * Copyright (c) 2002 [year of first publication]
5 * The Regents of the University of Michigan
8 * Dean Hildebrand <dhildebz@umich.edu>
10 * Permission is granted to use, copy, create derivative works, and
11 * redistribute this software and such derivative works for any purpose,
12 * so long as the name of the University of Michigan is not used in
13 * any advertising or publicity pertaining to the use or distribution
14 * of this software without specific, written prior authorization. If
15 * the above copyright notice or any other identification of the
16 * University of Michigan is included in any copy of any portion of
17 * this software, then the disclaimer below must also be included.
19 * This software is provided as is, without representation or warranty
20 * of any kind either express or implied, including without limitation
21 * the implied warranties of merchantability, fitness for a particular
22 * purpose, or noninfringement. The Regents of the University of
23 * Michigan shall not be liable for any damages, including special,
24 * indirect, incidental, or consequential damages, with respect to any
25 * claim arising out of or in connection with the use of the software,
26 * even if it has been or is hereafter advised of the possibility of
30 #include <linux/nfs_fs.h>
34 #define NFSDBG_FACILITY NFSDBG_PNFS
39 * protects pnfs_modules_tbl.
41 static DEFINE_SPINLOCK(pnfs_spinlock
);
44 * pnfs_modules_tbl holds all pnfs modules
46 static LIST_HEAD(pnfs_modules_tbl
);
48 /* Return the registered pnfs layout driver module matching given id */
49 static struct pnfs_layoutdriver_type
*
50 find_pnfs_driver_locked(u32 id
)
52 struct pnfs_layoutdriver_type
*local
;
54 list_for_each_entry(local
, &pnfs_modules_tbl
, pnfs_tblid
)
59 dprintk("%s: Searching for id %u, found %p\n", __func__
, id
, local
);
63 static struct pnfs_layoutdriver_type
*
64 find_pnfs_driver(u32 id
)
66 struct pnfs_layoutdriver_type
*local
;
68 spin_lock(&pnfs_spinlock
);
69 local
= find_pnfs_driver_locked(id
);
70 spin_unlock(&pnfs_spinlock
);
75 unset_pnfs_layoutdriver(struct nfs_server
*nfss
)
77 if (nfss
->pnfs_curr_ld
) {
78 nfss
->pnfs_curr_ld
->clear_layoutdriver(nfss
);
79 module_put(nfss
->pnfs_curr_ld
->owner
);
81 nfss
->pnfs_curr_ld
= NULL
;
85 * Try to set the server's pnfs module to the pnfs layout type specified by id.
86 * Currently only one pNFS layout driver per filesystem is supported.
88 * @id layout type. Zero (illegal layout type) indicates pNFS not in use.
91 set_pnfs_layoutdriver(struct nfs_server
*server
, u32 id
)
93 struct pnfs_layoutdriver_type
*ld_type
= NULL
;
97 if (!(server
->nfs_client
->cl_exchange_flags
&
98 (EXCHGID4_FLAG_USE_NON_PNFS
| EXCHGID4_FLAG_USE_PNFS_MDS
))) {
99 printk(KERN_ERR
"%s: id %u cl_exchange_flags 0x%x\n", __func__
,
100 id
, server
->nfs_client
->cl_exchange_flags
);
103 ld_type
= find_pnfs_driver(id
);
105 request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX
, id
);
106 ld_type
= find_pnfs_driver(id
);
108 dprintk("%s: No pNFS module found for %u.\n",
113 if (!try_module_get(ld_type
->owner
)) {
114 dprintk("%s: Could not grab reference on module\n", __func__
);
117 server
->pnfs_curr_ld
= ld_type
;
118 if (ld_type
->set_layoutdriver(server
)) {
120 "%s: Error initializing mount point for layout driver %u.\n",
122 module_put(ld_type
->owner
);
125 dprintk("%s: pNFS module for %u set\n", __func__
, id
);
129 dprintk("%s: Using NFSv4 I/O\n", __func__
);
130 server
->pnfs_curr_ld
= NULL
;
134 pnfs_register_layoutdriver(struct pnfs_layoutdriver_type
*ld_type
)
136 int status
= -EINVAL
;
137 struct pnfs_layoutdriver_type
*tmp
;
139 if (ld_type
->id
== 0) {
140 printk(KERN_ERR
"%s id 0 is reserved\n", __func__
);
143 if (!ld_type
->alloc_lseg
|| !ld_type
->free_lseg
) {
144 printk(KERN_ERR
"%s Layout driver must provide "
145 "alloc_lseg and free_lseg.\n", __func__
);
149 spin_lock(&pnfs_spinlock
);
150 tmp
= find_pnfs_driver_locked(ld_type
->id
);
152 list_add(&ld_type
->pnfs_tblid
, &pnfs_modules_tbl
);
154 dprintk("%s Registering id:%u name:%s\n", __func__
, ld_type
->id
,
157 printk(KERN_ERR
"%s Module with id %d already loaded!\n",
158 __func__
, ld_type
->id
);
160 spin_unlock(&pnfs_spinlock
);
164 EXPORT_SYMBOL_GPL(pnfs_register_layoutdriver
);
167 pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type
*ld_type
)
169 dprintk("%s Deregistering id:%u\n", __func__
, ld_type
->id
);
170 spin_lock(&pnfs_spinlock
);
171 list_del(&ld_type
->pnfs_tblid
);
172 spin_unlock(&pnfs_spinlock
);
174 EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver
);
177 * pNFS client layout cache
180 /* Need to hold i_lock if caller does not already hold reference */
182 get_layout_hdr(struct pnfs_layout_hdr
*lo
)
184 atomic_inc(&lo
->plh_refcount
);
188 destroy_layout_hdr(struct pnfs_layout_hdr
*lo
)
190 dprintk("%s: freeing layout cache %p\n", __func__
, lo
);
191 BUG_ON(!list_empty(&lo
->plh_layouts
));
192 NFS_I(lo
->plh_inode
)->layout
= NULL
;
197 put_layout_hdr_locked(struct pnfs_layout_hdr
*lo
)
199 if (atomic_dec_and_test(&lo
->plh_refcount
))
200 destroy_layout_hdr(lo
);
204 put_layout_hdr(struct pnfs_layout_hdr
*lo
)
206 struct inode
*inode
= lo
->plh_inode
;
208 if (atomic_dec_and_lock(&lo
->plh_refcount
, &inode
->i_lock
)) {
209 destroy_layout_hdr(lo
);
210 spin_unlock(&inode
->i_lock
);
215 init_lseg(struct pnfs_layout_hdr
*lo
, struct pnfs_layout_segment
*lseg
)
217 INIT_LIST_HEAD(&lseg
->pls_list
);
218 atomic_set(&lseg
->pls_refcount
, 1);
220 set_bit(NFS_LSEG_VALID
, &lseg
->pls_flags
);
221 lseg
->pls_layout
= lo
;
224 static void free_lseg(struct pnfs_layout_segment
*lseg
)
226 struct inode
*ino
= lseg
->pls_layout
->plh_inode
;
228 NFS_SERVER(ino
)->pnfs_curr_ld
->free_lseg(lseg
);
229 /* Matched by get_layout_hdr in pnfs_insert_layout */
230 put_layout_hdr(NFS_I(ino
)->layout
);
234 put_lseg_common(struct pnfs_layout_segment
*lseg
)
236 struct inode
*inode
= lseg
->pls_layout
->plh_inode
;
238 BUG_ON(test_bit(NFS_LSEG_VALID
, &lseg
->pls_flags
));
239 list_del_init(&lseg
->pls_list
);
240 if (list_empty(&lseg
->pls_layout
->plh_segs
)) {
241 set_bit(NFS_LAYOUT_DESTROYED
, &lseg
->pls_layout
->plh_flags
);
242 /* Matched by initial refcount set in alloc_init_layout_hdr */
243 put_layout_hdr_locked(lseg
->pls_layout
);
245 rpc_wake_up(&NFS_SERVER(inode
)->roc_rpcwaitq
);
249 put_lseg(struct pnfs_layout_segment
*lseg
)
256 dprintk("%s: lseg %p ref %d valid %d\n", __func__
, lseg
,
257 atomic_read(&lseg
->pls_refcount
),
258 test_bit(NFS_LSEG_VALID
, &lseg
->pls_flags
));
259 inode
= lseg
->pls_layout
->plh_inode
;
260 if (atomic_dec_and_lock(&lseg
->pls_refcount
, &inode
->i_lock
)) {
263 put_lseg_common(lseg
);
264 list_add(&lseg
->pls_list
, &free_me
);
265 spin_unlock(&inode
->i_lock
);
266 pnfs_free_lseg_list(&free_me
);
271 should_free_lseg(u32 lseg_iomode
, u32 recall_iomode
)
273 return (recall_iomode
== IOMODE_ANY
||
274 lseg_iomode
== recall_iomode
);
277 /* Returns 1 if lseg is removed from list, 0 otherwise */
278 static int mark_lseg_invalid(struct pnfs_layout_segment
*lseg
,
279 struct list_head
*tmp_list
)
283 if (test_and_clear_bit(NFS_LSEG_VALID
, &lseg
->pls_flags
)) {
284 /* Remove the reference keeping the lseg in the
285 * list. It will now be removed when all
286 * outstanding io is finished.
288 dprintk("%s: lseg %p ref %d\n", __func__
, lseg
,
289 atomic_read(&lseg
->pls_refcount
));
290 if (atomic_dec_and_test(&lseg
->pls_refcount
)) {
291 put_lseg_common(lseg
);
292 list_add(&lseg
->pls_list
, tmp_list
);
299 /* Returns count of number of matching invalid lsegs remaining in list
303 mark_matching_lsegs_invalid(struct pnfs_layout_hdr
*lo
,
304 struct list_head
*tmp_list
,
307 struct pnfs_layout_segment
*lseg
, *next
;
308 int invalid
= 0, removed
= 0;
310 dprintk("%s:Begin lo %p\n", __func__
, lo
);
312 if (list_empty(&lo
->plh_segs
)) {
313 if (!test_and_set_bit(NFS_LAYOUT_DESTROYED
, &lo
->plh_flags
))
314 put_layout_hdr_locked(lo
);
317 list_for_each_entry_safe(lseg
, next
, &lo
->plh_segs
, pls_list
)
318 if (should_free_lseg(lseg
->pls_range
.iomode
, iomode
)) {
319 dprintk("%s: freeing lseg %p iomode %d "
320 "offset %llu length %llu\n", __func__
,
321 lseg
, lseg
->pls_range
.iomode
, lseg
->pls_range
.offset
,
322 lseg
->pls_range
.length
);
324 removed
+= mark_lseg_invalid(lseg
, tmp_list
);
326 dprintk("%s:Return %i\n", __func__
, invalid
- removed
);
327 return invalid
- removed
;
330 /* note free_me must contain lsegs from a single layout_hdr */
332 pnfs_free_lseg_list(struct list_head
*free_me
)
334 struct pnfs_layout_segment
*lseg
, *tmp
;
335 struct pnfs_layout_hdr
*lo
;
337 if (list_empty(free_me
))
340 lo
= list_first_entry(free_me
, struct pnfs_layout_segment
,
341 pls_list
)->pls_layout
;
343 if (test_bit(NFS_LAYOUT_DESTROYED
, &lo
->plh_flags
)) {
344 struct nfs_client
*clp
;
346 clp
= NFS_SERVER(lo
->plh_inode
)->nfs_client
;
347 spin_lock(&clp
->cl_lock
);
348 list_del_init(&lo
->plh_layouts
);
349 spin_unlock(&clp
->cl_lock
);
351 list_for_each_entry_safe(lseg
, tmp
, free_me
, pls_list
) {
352 list_del(&lseg
->pls_list
);
358 pnfs_destroy_layout(struct nfs_inode
*nfsi
)
360 struct pnfs_layout_hdr
*lo
;
363 spin_lock(&nfsi
->vfs_inode
.i_lock
);
366 lo
->plh_block_lgets
++; /* permanently block new LAYOUTGETs */
367 mark_matching_lsegs_invalid(lo
, &tmp_list
, IOMODE_ANY
);
369 spin_unlock(&nfsi
->vfs_inode
.i_lock
);
370 pnfs_free_lseg_list(&tmp_list
);
374 * Called by the state manger to remove all layouts established under an
378 pnfs_destroy_all_layouts(struct nfs_client
*clp
)
380 struct pnfs_layout_hdr
*lo
;
383 spin_lock(&clp
->cl_lock
);
384 list_splice_init(&clp
->cl_layouts
, &tmp_list
);
385 spin_unlock(&clp
->cl_lock
);
387 while (!list_empty(&tmp_list
)) {
388 lo
= list_entry(tmp_list
.next
, struct pnfs_layout_hdr
,
390 dprintk("%s freeing layout for inode %lu\n", __func__
,
391 lo
->plh_inode
->i_ino
);
392 pnfs_destroy_layout(NFS_I(lo
->plh_inode
));
396 /* update lo->plh_stateid with new if is more recent */
398 pnfs_set_layout_stateid(struct pnfs_layout_hdr
*lo
, const nfs4_stateid
*new,
403 oldseq
= be32_to_cpu(lo
->plh_stateid
.stateid
.seqid
);
404 newseq
= be32_to_cpu(new->stateid
.seqid
);
405 if ((int)(newseq
- oldseq
) > 0) {
406 memcpy(&lo
->plh_stateid
, &new->stateid
, sizeof(new->stateid
));
407 if (update_barrier
) {
408 u32 new_barrier
= be32_to_cpu(new->stateid
.seqid
);
410 if ((int)(new_barrier
- lo
->plh_barrier
))
411 lo
->plh_barrier
= new_barrier
;
413 /* Because of wraparound, we want to keep the barrier
414 * "close" to the current seqids. It needs to be
415 * within 2**31 to count as "behind", so if it
416 * gets too near that limit, give us a litle leeway
417 * and bring it to within 2**30.
418 * NOTE - and yes, this is all unsigned arithmetic.
420 if (unlikely((newseq
- lo
->plh_barrier
) > (3 << 29)))
421 lo
->plh_barrier
= newseq
- (1 << 30);
426 /* lget is set to 1 if called from inside send_layoutget call chain */
428 pnfs_layoutgets_blocked(struct pnfs_layout_hdr
*lo
, nfs4_stateid
*stateid
,
432 (int)(lo
->plh_barrier
- be32_to_cpu(stateid
->stateid
.seqid
)) >= 0)
434 return lo
->plh_block_lgets
||
435 test_bit(NFS_LAYOUT_DESTROYED
, &lo
->plh_flags
) ||
436 test_bit(NFS_LAYOUT_BULK_RECALL
, &lo
->plh_flags
) ||
437 (list_empty(&lo
->plh_segs
) &&
438 (atomic_read(&lo
->plh_outstanding
) > lget
));
442 pnfs_choose_layoutget_stateid(nfs4_stateid
*dst
, struct pnfs_layout_hdr
*lo
,
443 struct nfs4_state
*open_state
)
447 dprintk("--> %s\n", __func__
);
448 spin_lock(&lo
->plh_inode
->i_lock
);
449 if (pnfs_layoutgets_blocked(lo
, NULL
, 1)) {
451 } else if (list_empty(&lo
->plh_segs
)) {
455 seq
= read_seqbegin(&open_state
->seqlock
);
456 memcpy(dst
->data
, open_state
->stateid
.data
,
457 sizeof(open_state
->stateid
.data
));
458 } while (read_seqretry(&open_state
->seqlock
, seq
));
460 memcpy(dst
->data
, lo
->plh_stateid
.data
, sizeof(lo
->plh_stateid
.data
));
461 spin_unlock(&lo
->plh_inode
->i_lock
);
462 dprintk("<-- %s\n", __func__
);
467 * Get layout from server.
468 * for now, assume that whole file layouts are requested.
470 * arg->length: all ones
472 static struct pnfs_layout_segment
*
473 send_layoutget(struct pnfs_layout_hdr
*lo
,
474 struct nfs_open_context
*ctx
,
477 struct inode
*ino
= lo
->plh_inode
;
478 struct nfs_server
*server
= NFS_SERVER(ino
);
479 struct nfs4_layoutget
*lgp
;
480 struct pnfs_layout_segment
*lseg
= NULL
;
482 dprintk("--> %s\n", __func__
);
485 lgp
= kzalloc(sizeof(*lgp
), GFP_KERNEL
);
488 lgp
->args
.minlength
= NFS4_MAX_UINT64
;
489 lgp
->args
.maxcount
= PNFS_LAYOUT_MAXSIZE
;
490 lgp
->args
.range
.iomode
= iomode
;
491 lgp
->args
.range
.offset
= 0;
492 lgp
->args
.range
.length
= NFS4_MAX_UINT64
;
493 lgp
->args
.type
= server
->pnfs_curr_ld
->id
;
494 lgp
->args
.inode
= ino
;
495 lgp
->args
.ctx
= get_nfs_open_context(ctx
);
498 /* Synchronously retrieve layout information from server and
501 nfs4_proc_layoutget(lgp
);
503 /* remember that LAYOUTGET failed and suspend trying */
504 set_bit(lo_fail_bit(iomode
), &lo
->plh_flags
);
509 bool pnfs_roc(struct inode
*ino
)
511 struct pnfs_layout_hdr
*lo
;
512 struct pnfs_layout_segment
*lseg
, *tmp
;
516 spin_lock(&ino
->i_lock
);
517 lo
= NFS_I(ino
)->layout
;
518 if (!lo
|| !test_and_clear_bit(NFS_LAYOUT_ROC
, &lo
->plh_flags
) ||
519 test_bit(NFS_LAYOUT_BULK_RECALL
, &lo
->plh_flags
))
521 list_for_each_entry_safe(lseg
, tmp
, &lo
->plh_segs
, pls_list
)
522 if (test_bit(NFS_LSEG_ROC
, &lseg
->pls_flags
)) {
523 mark_lseg_invalid(lseg
, &tmp_list
);
528 lo
->plh_block_lgets
++;
529 get_layout_hdr(lo
); /* matched in pnfs_roc_release */
530 spin_unlock(&ino
->i_lock
);
531 pnfs_free_lseg_list(&tmp_list
);
535 spin_unlock(&ino
->i_lock
);
539 void pnfs_roc_release(struct inode
*ino
)
541 struct pnfs_layout_hdr
*lo
;
543 spin_lock(&ino
->i_lock
);
544 lo
= NFS_I(ino
)->layout
;
545 lo
->plh_block_lgets
--;
546 put_layout_hdr_locked(lo
);
547 spin_unlock(&ino
->i_lock
);
550 void pnfs_roc_set_barrier(struct inode
*ino
, u32 barrier
)
552 struct pnfs_layout_hdr
*lo
;
554 spin_lock(&ino
->i_lock
);
555 lo
= NFS_I(ino
)->layout
;
556 if ((int)(barrier
- lo
->plh_barrier
) > 0)
557 lo
->plh_barrier
= barrier
;
558 spin_unlock(&ino
->i_lock
);
561 bool pnfs_roc_drain(struct inode
*ino
, u32
*barrier
)
563 struct nfs_inode
*nfsi
= NFS_I(ino
);
564 struct pnfs_layout_segment
*lseg
;
567 spin_lock(&ino
->i_lock
);
568 list_for_each_entry(lseg
, &nfsi
->layout
->plh_segs
, pls_list
)
569 if (test_bit(NFS_LSEG_ROC
, &lseg
->pls_flags
)) {
574 struct pnfs_layout_hdr
*lo
= nfsi
->layout
;
575 u32 current_seqid
= be32_to_cpu(lo
->plh_stateid
.stateid
.seqid
);
577 /* Since close does not return a layout stateid for use as
578 * a barrier, we choose the worst-case barrier.
580 *barrier
= current_seqid
+ atomic_read(&lo
->plh_outstanding
);
582 spin_unlock(&ino
->i_lock
);
587 * Compare two layout segments for sorting into layout cache.
588 * We want to preferentially return RW over RO layouts, so ensure those
592 cmp_layout(u32 iomode1
, u32 iomode2
)
594 /* read > read/write */
595 return (int)(iomode2
== IOMODE_READ
) - (int)(iomode1
== IOMODE_READ
);
599 pnfs_insert_layout(struct pnfs_layout_hdr
*lo
,
600 struct pnfs_layout_segment
*lseg
)
602 struct pnfs_layout_segment
*lp
;
605 dprintk("%s:Begin\n", __func__
);
607 assert_spin_locked(&lo
->plh_inode
->i_lock
);
608 list_for_each_entry(lp
, &lo
->plh_segs
, pls_list
) {
609 if (cmp_layout(lp
->pls_range
.iomode
, lseg
->pls_range
.iomode
) > 0)
611 list_add_tail(&lseg
->pls_list
, &lp
->pls_list
);
612 dprintk("%s: inserted lseg %p "
613 "iomode %d offset %llu length %llu before "
614 "lp %p iomode %d offset %llu length %llu\n",
615 __func__
, lseg
, lseg
->pls_range
.iomode
,
616 lseg
->pls_range
.offset
, lseg
->pls_range
.length
,
617 lp
, lp
->pls_range
.iomode
, lp
->pls_range
.offset
,
618 lp
->pls_range
.length
);
623 list_add_tail(&lseg
->pls_list
, &lo
->plh_segs
);
624 dprintk("%s: inserted lseg %p "
625 "iomode %d offset %llu length %llu at tail\n",
626 __func__
, lseg
, lseg
->pls_range
.iomode
,
627 lseg
->pls_range
.offset
, lseg
->pls_range
.length
);
631 dprintk("%s:Return\n", __func__
);
634 static struct pnfs_layout_hdr
*
635 alloc_init_layout_hdr(struct inode
*ino
)
637 struct pnfs_layout_hdr
*lo
;
639 lo
= kzalloc(sizeof(struct pnfs_layout_hdr
), GFP_KERNEL
);
642 atomic_set(&lo
->plh_refcount
, 1);
643 INIT_LIST_HEAD(&lo
->plh_layouts
);
644 INIT_LIST_HEAD(&lo
->plh_segs
);
645 INIT_LIST_HEAD(&lo
->plh_bulk_recall
);
650 static struct pnfs_layout_hdr
*
651 pnfs_find_alloc_layout(struct inode
*ino
)
653 struct nfs_inode
*nfsi
= NFS_I(ino
);
654 struct pnfs_layout_hdr
*new = NULL
;
656 dprintk("%s Begin ino=%p layout=%p\n", __func__
, ino
, nfsi
->layout
);
658 assert_spin_locked(&ino
->i_lock
);
660 if (test_bit(NFS_LAYOUT_DESTROYED
, &nfsi
->layout
->plh_flags
))
665 spin_unlock(&ino
->i_lock
);
666 new = alloc_init_layout_hdr(ino
);
667 spin_lock(&ino
->i_lock
);
669 if (likely(nfsi
->layout
== NULL
)) /* Won the race? */
677 * iomode matching rules:
688 is_matching_lseg(struct pnfs_layout_segment
*lseg
, u32 iomode
)
690 return (iomode
!= IOMODE_RW
|| lseg
->pls_range
.iomode
== IOMODE_RW
);
694 * lookup range in layout
696 static struct pnfs_layout_segment
*
697 pnfs_find_lseg(struct pnfs_layout_hdr
*lo
, u32 iomode
)
699 struct pnfs_layout_segment
*lseg
, *ret
= NULL
;
701 dprintk("%s:Begin\n", __func__
);
703 assert_spin_locked(&lo
->plh_inode
->i_lock
);
704 list_for_each_entry(lseg
, &lo
->plh_segs
, pls_list
) {
705 if (test_bit(NFS_LSEG_VALID
, &lseg
->pls_flags
) &&
706 is_matching_lseg(lseg
, iomode
)) {
707 ret
= get_lseg(lseg
);
710 if (cmp_layout(iomode
, lseg
->pls_range
.iomode
) > 0)
714 dprintk("%s:Return lseg %p ref %d\n",
715 __func__
, ret
, ret
? atomic_read(&ret
->pls_refcount
) : 0);
720 * Layout segment is retreived from the server if not cached.
721 * The appropriate layout segment is referenced and returned to the caller.
723 struct pnfs_layout_segment
*
724 pnfs_update_layout(struct inode
*ino
,
725 struct nfs_open_context
*ctx
,
726 enum pnfs_iomode iomode
)
728 struct nfs_inode
*nfsi
= NFS_I(ino
);
729 struct nfs_client
*clp
= NFS_SERVER(ino
)->nfs_client
;
730 struct pnfs_layout_hdr
*lo
;
731 struct pnfs_layout_segment
*lseg
= NULL
;
734 if (!pnfs_enabled_sb(NFS_SERVER(ino
)))
736 spin_lock(&ino
->i_lock
);
737 lo
= pnfs_find_alloc_layout(ino
);
739 dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__
);
743 /* Do we even need to bother with this? */
744 if (test_bit(NFS4CLNT_LAYOUTRECALL
, &clp
->cl_state
) ||
745 test_bit(NFS_LAYOUT_BULK_RECALL
, &lo
->plh_flags
)) {
746 dprintk("%s matches recall, use MDS\n", __func__
);
749 /* Check to see if the layout for the given range already exists */
750 lseg
= pnfs_find_lseg(lo
, iomode
);
754 /* if LAYOUTGET already failed once we don't try again */
755 if (test_bit(lo_fail_bit(iomode
), &nfsi
->layout
->plh_flags
))
758 if (pnfs_layoutgets_blocked(lo
, NULL
, 0))
760 atomic_inc(&lo
->plh_outstanding
);
763 if (list_empty(&lo
->plh_segs
))
765 spin_unlock(&ino
->i_lock
);
767 /* The lo must be on the clp list if there is any
768 * chance of a CB_LAYOUTRECALL(FILE) coming in.
770 spin_lock(&clp
->cl_lock
);
771 BUG_ON(!list_empty(&lo
->plh_layouts
));
772 list_add_tail(&lo
->plh_layouts
, &clp
->cl_layouts
);
773 spin_unlock(&clp
->cl_lock
);
776 lseg
= send_layoutget(lo
, ctx
, iomode
);
777 if (!lseg
&& first
) {
778 spin_lock(&clp
->cl_lock
);
779 list_del_init(&lo
->plh_layouts
);
780 spin_unlock(&clp
->cl_lock
);
782 atomic_dec(&lo
->plh_outstanding
);
785 dprintk("%s end, state 0x%lx lseg %p\n", __func__
,
786 nfsi
->layout
? nfsi
->layout
->plh_flags
: -1, lseg
);
789 spin_unlock(&ino
->i_lock
);
794 pnfs_layout_process(struct nfs4_layoutget
*lgp
)
796 struct pnfs_layout_hdr
*lo
= NFS_I(lgp
->args
.inode
)->layout
;
797 struct nfs4_layoutget_res
*res
= &lgp
->res
;
798 struct pnfs_layout_segment
*lseg
;
799 struct inode
*ino
= lo
->plh_inode
;
800 struct nfs_client
*clp
= NFS_SERVER(ino
)->nfs_client
;
803 /* Verify we got what we asked for.
804 * Note that because the xdr parsing only accepts a single
805 * element array, this can fail even if the server is behaving
808 if (lgp
->args
.range
.iomode
> res
->range
.iomode
||
809 res
->range
.offset
!= 0 ||
810 res
->range
.length
!= NFS4_MAX_UINT64
) {
814 /* Inject layout blob into I/O device driver */
815 lseg
= NFS_SERVER(ino
)->pnfs_curr_ld
->alloc_lseg(lo
, res
);
816 if (!lseg
|| IS_ERR(lseg
)) {
820 status
= PTR_ERR(lseg
);
821 dprintk("%s: Could not allocate layout: error %d\n",
826 spin_lock(&ino
->i_lock
);
827 if (test_bit(NFS4CLNT_LAYOUTRECALL
, &clp
->cl_state
) ||
828 test_bit(NFS_LAYOUT_BULK_RECALL
, &lo
->plh_flags
)) {
829 dprintk("%s forget reply due to recall\n", __func__
);
830 goto out_forget_reply
;
833 if (pnfs_layoutgets_blocked(lo
, &res
->stateid
, 1)) {
834 dprintk("%s forget reply due to state\n", __func__
);
835 goto out_forget_reply
;
838 lseg
->pls_range
= res
->range
;
839 *lgp
->lsegpp
= get_lseg(lseg
);
840 pnfs_insert_layout(lo
, lseg
);
842 if (res
->return_on_close
) {
843 set_bit(NFS_LSEG_ROC
, &lseg
->pls_flags
);
844 set_bit(NFS_LAYOUT_ROC
, &lo
->plh_flags
);
847 /* Done processing layoutget. Set the layout stateid */
848 pnfs_set_layout_stateid(lo
, &res
->stateid
, false);
849 spin_unlock(&ino
->i_lock
);
854 spin_unlock(&ino
->i_lock
);
855 lseg
->pls_layout
= lo
;
856 NFS_SERVER(ino
)->pnfs_curr_ld
->free_lseg(lseg
);
860 static int pnfs_read_pg_test(struct nfs_pageio_descriptor
*pgio
,
861 struct nfs_page
*prev
,
862 struct nfs_page
*req
)
864 if (pgio
->pg_count
== prev
->wb_bytes
) {
865 /* This is first coelesce call for a series of nfs_pages */
866 pgio
->pg_lseg
= pnfs_update_layout(pgio
->pg_inode
,
870 return NFS_SERVER(pgio
->pg_inode
)->pnfs_curr_ld
->pg_test(pgio
, prev
, req
);
874 pnfs_pageio_init_read(struct nfs_pageio_descriptor
*pgio
, struct inode
*inode
)
876 struct pnfs_layoutdriver_type
*ld
;
878 ld
= NFS_SERVER(inode
)->pnfs_curr_ld
;
879 pgio
->pg_test
= (ld
&& ld
->pg_test
) ? pnfs_read_pg_test
: NULL
;
883 * Device ID cache. Currently supports one layout type per struct nfs_client.
884 * Add layout type to the lookup key to expand to support multiple types.
887 pnfs_alloc_init_deviceid_cache(struct nfs_client
*clp
,
888 void (*free_callback
)(struct pnfs_deviceid_node
*))
890 struct pnfs_deviceid_cache
*c
;
892 c
= kzalloc(sizeof(struct pnfs_deviceid_cache
), GFP_KERNEL
);
895 spin_lock(&clp
->cl_lock
);
896 if (clp
->cl_devid_cache
!= NULL
) {
897 atomic_inc(&clp
->cl_devid_cache
->dc_ref
);
898 dprintk("%s [kref [%d]]\n", __func__
,
899 atomic_read(&clp
->cl_devid_cache
->dc_ref
));
902 /* kzalloc initializes hlists */
903 spin_lock_init(&c
->dc_lock
);
904 atomic_set(&c
->dc_ref
, 1);
905 c
->dc_free_callback
= free_callback
;
906 clp
->cl_devid_cache
= c
;
907 dprintk("%s [new]\n", __func__
);
909 spin_unlock(&clp
->cl_lock
);
912 EXPORT_SYMBOL_GPL(pnfs_alloc_init_deviceid_cache
);
915 * Called from pnfs_layoutdriver_type->free_lseg
916 * last layout segment reference frees deviceid
919 pnfs_put_deviceid(struct pnfs_deviceid_cache
*c
,
920 struct pnfs_deviceid_node
*devid
)
922 struct nfs4_deviceid
*id
= &devid
->de_id
;
923 struct pnfs_deviceid_node
*d
;
924 struct hlist_node
*n
;
925 long h
= nfs4_deviceid_hash(id
);
927 dprintk("%s [%d]\n", __func__
, atomic_read(&devid
->de_ref
));
928 if (!atomic_dec_and_lock(&devid
->de_ref
, &c
->dc_lock
))
931 hlist_for_each_entry_rcu(d
, n
, &c
->dc_deviceids
[h
], de_node
)
932 if (!memcmp(&d
->de_id
, id
, sizeof(*id
))) {
933 hlist_del_rcu(&d
->de_node
);
934 spin_unlock(&c
->dc_lock
);
936 c
->dc_free_callback(devid
);
939 spin_unlock(&c
->dc_lock
);
940 /* Why wasn't it found in the list? */
943 EXPORT_SYMBOL_GPL(pnfs_put_deviceid
);
945 /* Find and reference a deviceid */
946 struct pnfs_deviceid_node
*
947 pnfs_find_get_deviceid(struct pnfs_deviceid_cache
*c
, struct nfs4_deviceid
*id
)
949 struct pnfs_deviceid_node
*d
;
950 struct hlist_node
*n
;
951 long hash
= nfs4_deviceid_hash(id
);
953 dprintk("--> %s hash %ld\n", __func__
, hash
);
955 hlist_for_each_entry_rcu(d
, n
, &c
->dc_deviceids
[hash
], de_node
) {
956 if (!memcmp(&d
->de_id
, id
, sizeof(*id
))) {
957 if (!atomic_inc_not_zero(&d
->de_ref
)) {
969 EXPORT_SYMBOL_GPL(pnfs_find_get_deviceid
);
972 * Add a deviceid to the cache.
973 * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new
975 struct pnfs_deviceid_node
*
976 pnfs_add_deviceid(struct pnfs_deviceid_cache
*c
, struct pnfs_deviceid_node
*new)
978 struct pnfs_deviceid_node
*d
;
979 long hash
= nfs4_deviceid_hash(&new->de_id
);
981 dprintk("--> %s hash %ld\n", __func__
, hash
);
982 spin_lock(&c
->dc_lock
);
983 d
= pnfs_find_get_deviceid(c
, &new->de_id
);
985 spin_unlock(&c
->dc_lock
);
986 dprintk("%s [discard]\n", __func__
);
987 c
->dc_free_callback(new);
990 INIT_HLIST_NODE(&new->de_node
);
991 atomic_set(&new->de_ref
, 1);
992 hlist_add_head_rcu(&new->de_node
, &c
->dc_deviceids
[hash
]);
993 spin_unlock(&c
->dc_lock
);
994 dprintk("%s [new]\n", __func__
);
997 EXPORT_SYMBOL_GPL(pnfs_add_deviceid
);
1000 pnfs_put_deviceid_cache(struct nfs_client
*clp
)
1002 struct pnfs_deviceid_cache
*local
= clp
->cl_devid_cache
;
1004 dprintk("--> %s ({%d})\n", __func__
, atomic_read(&local
->dc_ref
));
1005 if (atomic_dec_and_lock(&local
->dc_ref
, &clp
->cl_lock
)) {
1007 /* Verify cache is empty */
1008 for (i
= 0; i
< NFS4_DEVICE_ID_HASH_SIZE
; i
++)
1009 BUG_ON(!hlist_empty(&local
->dc_deviceids
[i
]));
1010 clp
->cl_devid_cache
= NULL
;
1011 spin_unlock(&clp
->cl_lock
);
1015 EXPORT_SYMBOL_GPL(pnfs_put_deviceid_cache
);