2 * pNFS Objects layout implementation over open-osd initiator library
4 * Copyright (C) 2009 Panasas Inc. [year of first publication]
7 * Benny Halevy <bhalevy@panasas.com>
8 * Boaz Harrosh <bharrosh@panasas.com>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2
12 * See the file COPYING included with this distribution for more details.
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in the
22 * documentation and/or other materials provided with the distribution.
23 * 3. Neither the name of the Panasas company nor the names of its
24 * contributors may be used to endorse or promote products derived
25 * from this software without specific prior written permission.
27 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
28 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
29 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
30 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
34 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 #include <linux/module.h>
41 #include <scsi/osd_initiator.h>
43 #include "objlayout.h"
45 #define NFSDBG_FACILITY NFSDBG_PNFS_LD
47 #define _LLU(x) ((unsigned long long)x)
49 enum { BIO_MAX_PAGES_KMALLOC
=
50 (PAGE_SIZE
- sizeof(struct bio
)) / sizeof(struct bio_vec
),
53 struct objio_dev_ent
{
54 struct nfs4_deviceid_node id_node
;
59 objio_free_deviceid_node(struct nfs4_deviceid_node
*d
)
61 struct objio_dev_ent
*de
= container_of(d
, struct objio_dev_ent
, id_node
);
63 dprintk("%s: free od=%p\n", __func__
, de
->od
);
64 osduld_put_device(de
->od
);
68 static struct objio_dev_ent
*_dev_list_find(const struct nfs_server
*nfss
,
69 const struct nfs4_deviceid
*d_id
)
71 struct nfs4_deviceid_node
*d
;
72 struct objio_dev_ent
*de
;
74 d
= nfs4_find_get_deviceid(nfss
->pnfs_curr_ld
, nfss
->nfs_client
, d_id
);
78 de
= container_of(d
, struct objio_dev_ent
, id_node
);
82 static struct objio_dev_ent
*
83 _dev_list_add(const struct nfs_server
*nfss
,
84 const struct nfs4_deviceid
*d_id
, struct osd_dev
*od
,
87 struct nfs4_deviceid_node
*d
;
88 struct objio_dev_ent
*de
= kzalloc(sizeof(*de
), gfp_flags
);
89 struct objio_dev_ent
*n
;
92 dprintk("%s: -ENOMEM od=%p\n", __func__
, od
);
96 dprintk("%s: Adding od=%p\n", __func__
, od
);
97 nfs4_init_deviceid_node(&de
->id_node
,
103 d
= nfs4_insert_deviceid_node(&de
->id_node
);
104 n
= container_of(d
, struct objio_dev_ent
, id_node
);
106 dprintk("%s: Race with other n->od=%p\n", __func__
, n
->od
);
107 objio_free_deviceid_node(&de
->id_node
);
114 struct caps_buffers
{
115 u8 caps_key
[OSD_CRYPTO_KEYID_SIZE
];
116 u8 creds
[OSD_CAP_LEN
];
119 struct objio_segment
{
120 struct pnfs_layout_segment lseg
;
122 struct pnfs_osd_object_cred
*comps
;
125 unsigned stripe_unit
;
126 unsigned group_width
; /* Data stripe_units without integrity comps */
128 unsigned group_count
;
130 unsigned max_io_size
;
132 unsigned comps_index
;
134 /* variable length */
135 struct objio_dev_ent
*ods
[];
138 static inline struct objio_segment
*
139 OBJIO_LSEG(struct pnfs_layout_segment
*lseg
)
141 return container_of(lseg
, struct objio_segment
, lseg
);
145 typedef int (*objio_done_fn
)(struct objio_state
*ios
);
149 struct objlayout_io_res oir
;
158 struct objio_segment
*layout
;
164 unsigned long length
;
165 unsigned numdevs
; /* Actually used devs in this IO */
166 /* A per-device variable array of size numdevs */
167 struct _objio_per_comp
{
169 struct osd_request
*or;
170 unsigned long length
;
176 /* Send and wait for a get_device_info of devices in the layout,
177 then look them up with the osd_initiator library */
178 static struct objio_dev_ent
*_device_lookup(struct pnfs_layout_hdr
*pnfslay
,
179 struct objio_segment
*objio_seg
, unsigned comp
,
182 struct pnfs_osd_deviceaddr
*deviceaddr
;
183 struct nfs4_deviceid
*d_id
;
184 struct objio_dev_ent
*ode
;
186 struct osd_dev_info odi
;
189 d_id
= &objio_seg
->comps
[comp
].oc_object_id
.oid_device_id
;
191 ode
= _dev_list_find(NFS_SERVER(pnfslay
->plh_inode
), d_id
);
195 err
= objlayout_get_deviceinfo(pnfslay
, d_id
, &deviceaddr
, gfp_flags
);
197 dprintk("%s: objlayout_get_deviceinfo dev(%llx:%llx) =>%d\n",
198 __func__
, _DEVID_LO(d_id
), _DEVID_HI(d_id
), err
);
202 odi
.systemid_len
= deviceaddr
->oda_systemid
.len
;
203 if (odi
.systemid_len
> sizeof(odi
.systemid
)) {
206 } else if (odi
.systemid_len
)
207 memcpy(odi
.systemid
, deviceaddr
->oda_systemid
.data
,
209 odi
.osdname_len
= deviceaddr
->oda_osdname
.len
;
210 odi
.osdname
= (u8
*)deviceaddr
->oda_osdname
.data
;
212 if (!odi
.osdname_len
&& !odi
.systemid_len
) {
213 dprintk("%s: !odi.osdname_len && !odi.systemid_len\n",
219 od
= osduld_info_lookup(&odi
);
220 if (unlikely(IS_ERR(od
))) {
222 dprintk("%s: osduld_info_lookup => %d\n", __func__
, err
);
226 ode
= _dev_list_add(NFS_SERVER(pnfslay
->plh_inode
), d_id
, od
,
230 dprintk("%s: return=%d\n", __func__
, err
);
231 objlayout_put_deviceinfo(deviceaddr
);
232 return err
? ERR_PTR(err
) : ode
;
235 static int objio_devices_lookup(struct pnfs_layout_hdr
*pnfslay
,
236 struct objio_segment
*objio_seg
,
242 /* lookup all devices */
243 for (i
= 0; i
< objio_seg
->num_comps
; i
++) {
244 struct objio_dev_ent
*ode
;
246 ode
= _device_lookup(pnfslay
, objio_seg
, i
, gfp_flags
);
247 if (unlikely(IS_ERR(ode
))) {
251 objio_seg
->ods
[i
] = ode
;
256 dprintk("%s: return=%d\n", __func__
, err
);
260 static int _verify_data_map(struct pnfs_osd_layout
*layout
)
262 struct pnfs_osd_data_map
*data_map
= &layout
->olo_map
;
266 /* FIXME: Only raid0 for now. if not go through MDS */
267 if (data_map
->odm_raid_algorithm
!= PNFS_OSD_RAID_0
) {
268 printk(KERN_ERR
"Only RAID_0 for now\n");
271 if (0 != (data_map
->odm_num_comps
% (data_map
->odm_mirror_cnt
+ 1))) {
272 printk(KERN_ERR
"Data Map wrong, num_comps=%u mirrors=%u\n",
273 data_map
->odm_num_comps
, data_map
->odm_mirror_cnt
);
277 if (data_map
->odm_group_width
)
278 group_width
= data_map
->odm_group_width
;
280 group_width
= data_map
->odm_num_comps
/
281 (data_map
->odm_mirror_cnt
+ 1);
283 stripe_length
= (u64
)data_map
->odm_stripe_unit
* group_width
;
284 if (stripe_length
>= (1ULL << 32)) {
285 printk(KERN_ERR
"Total Stripe length(0x%llx)"
286 " >= 32bit is not supported\n", _LLU(stripe_length
));
290 if (0 != (data_map
->odm_stripe_unit
& ~PAGE_MASK
)) {
291 printk(KERN_ERR
"Stripe Unit(0x%llx)"
292 " must be Multples of PAGE_SIZE(0x%lx)\n",
293 _LLU(data_map
->odm_stripe_unit
), PAGE_SIZE
);
300 static void copy_single_comp(struct pnfs_osd_object_cred
*cur_comp
,
301 struct pnfs_osd_object_cred
*src_comp
,
302 struct caps_buffers
*caps_p
)
304 WARN_ON(src_comp
->oc_cap_key
.cred_len
> sizeof(caps_p
->caps_key
));
305 WARN_ON(src_comp
->oc_cap
.cred_len
> sizeof(caps_p
->creds
));
307 *cur_comp
= *src_comp
;
309 memcpy(caps_p
->caps_key
, src_comp
->oc_cap_key
.cred
,
310 sizeof(caps_p
->caps_key
));
311 cur_comp
->oc_cap_key
.cred
= caps_p
->caps_key
;
313 memcpy(caps_p
->creds
, src_comp
->oc_cap
.cred
,
314 sizeof(caps_p
->creds
));
315 cur_comp
->oc_cap
.cred
= caps_p
->creds
;
318 int objio_alloc_lseg(struct pnfs_layout_segment
**outp
,
319 struct pnfs_layout_hdr
*pnfslay
,
320 struct pnfs_layout_range
*range
,
321 struct xdr_stream
*xdr
,
324 struct objio_segment
*objio_seg
;
325 struct pnfs_osd_xdr_decode_layout_iter iter
;
326 struct pnfs_osd_layout layout
;
327 struct pnfs_osd_object_cred
*cur_comp
, src_comp
;
328 struct caps_buffers
*caps_p
;
331 err
= pnfs_osd_xdr_decode_layout_map(&layout
, &iter
, xdr
);
335 err
= _verify_data_map(&layout
);
339 objio_seg
= kzalloc(sizeof(*objio_seg
) +
340 sizeof(objio_seg
->ods
[0]) * layout
.olo_num_comps
+
341 sizeof(*objio_seg
->comps
) * layout
.olo_num_comps
+
342 sizeof(struct caps_buffers
) * layout
.olo_num_comps
,
347 objio_seg
->comps
= (void *)(objio_seg
->ods
+ layout
.olo_num_comps
);
348 cur_comp
= objio_seg
->comps
;
349 caps_p
= (void *)(cur_comp
+ layout
.olo_num_comps
);
350 while (pnfs_osd_xdr_decode_layout_comp(&src_comp
, &iter
, xdr
, &err
))
351 copy_single_comp(cur_comp
++, &src_comp
, caps_p
++);
355 objio_seg
->num_comps
= layout
.olo_num_comps
;
356 objio_seg
->comps_index
= layout
.olo_comps_index
;
357 err
= objio_devices_lookup(pnfslay
, objio_seg
, gfp_flags
);
361 objio_seg
->mirrors_p1
= layout
.olo_map
.odm_mirror_cnt
+ 1;
362 objio_seg
->stripe_unit
= layout
.olo_map
.odm_stripe_unit
;
363 if (layout
.olo_map
.odm_group_width
) {
364 objio_seg
->group_width
= layout
.olo_map
.odm_group_width
;
365 objio_seg
->group_depth
= layout
.olo_map
.odm_group_depth
;
366 objio_seg
->group_count
= layout
.olo_map
.odm_num_comps
/
367 objio_seg
->mirrors_p1
/
368 objio_seg
->group_width
;
370 objio_seg
->group_width
= layout
.olo_map
.odm_num_comps
/
371 objio_seg
->mirrors_p1
;
372 objio_seg
->group_depth
= -1;
373 objio_seg
->group_count
= 1;
376 /* Cache this calculation it will hit for every page */
377 objio_seg
->max_io_size
= (BIO_MAX_PAGES_KMALLOC
* PAGE_SIZE
-
378 objio_seg
->stripe_unit
) *
379 objio_seg
->group_width
;
381 *outp
= &objio_seg
->lseg
;
386 dprintk("%s: Error: return %d\n", __func__
, err
);
391 void objio_free_lseg(struct pnfs_layout_segment
*lseg
)
394 struct objio_segment
*objio_seg
= OBJIO_LSEG(lseg
);
396 for (i
= 0; i
< objio_seg
->num_comps
; i
++) {
397 if (!objio_seg
->ods
[i
])
399 nfs4_put_deviceid_node(&objio_seg
->ods
[i
]->id_node
);
405 objio_alloc_io_state(struct pnfs_layout_hdr
*pnfs_layout_type
,
406 struct pnfs_layout_segment
*lseg
, struct page
**pages
, unsigned pgbase
,
407 loff_t offset
, size_t count
, void *rpcdata
, gfp_t gfp_flags
,
408 struct objio_state
**outp
)
410 struct objio_segment
*objio_seg
= OBJIO_LSEG(lseg
);
411 struct objio_state
*ios
;
412 struct __alloc_objio_state
{
413 struct objio_state objios
;
414 struct _objio_per_comp per_dev
[objio_seg
->num_comps
];
415 struct pnfs_osd_ioerr ioerrs
[objio_seg
->num_comps
];
418 aos
= kzalloc(sizeof(*aos
), gfp_flags
);
424 ios
->layout
= objio_seg
;
425 objlayout_init_ioerrs(&aos
->objios
.oir
, objio_seg
->num_comps
,
426 aos
->ioerrs
, rpcdata
, pnfs_layout_type
);
429 ios
->pgbase
= pgbase
;
430 ios
->nr_pages
= (pgbase
+ count
+ PAGE_SIZE
- 1) >> PAGE_SHIFT
;
431 ios
->offset
= offset
;
434 BUG_ON(ios
->nr_pages
> (pgbase
+ count
+ PAGE_SIZE
- 1) >> PAGE_SHIFT
);
440 void objio_free_result(struct objlayout_io_res
*oir
)
442 struct objio_state
*ios
= container_of(oir
, struct objio_state
, oir
);
447 enum pnfs_osd_errno
osd_pri_2_pnfs_err(enum osd_err_priority oep
)
450 case OSD_ERR_PRI_NO_ERROR
:
451 return (enum pnfs_osd_errno
)0;
453 case OSD_ERR_PRI_CLEAR_PAGES
:
457 case OSD_ERR_PRI_RESOURCE
:
458 return PNFS_OSD_ERR_RESOURCE
;
459 case OSD_ERR_PRI_BAD_CRED
:
460 return PNFS_OSD_ERR_BAD_CRED
;
461 case OSD_ERR_PRI_NO_ACCESS
:
462 return PNFS_OSD_ERR_NO_ACCESS
;
463 case OSD_ERR_PRI_UNREACHABLE
:
464 return PNFS_OSD_ERR_UNREACHABLE
;
465 case OSD_ERR_PRI_NOT_FOUND
:
466 return PNFS_OSD_ERR_NOT_FOUND
;
467 case OSD_ERR_PRI_NO_SPACE
:
468 return PNFS_OSD_ERR_NO_SPACE
;
472 case OSD_ERR_PRI_EIO
:
473 return PNFS_OSD_ERR_EIO
;
477 static void _clear_bio(struct bio
*bio
)
482 __bio_for_each_segment(bv
, bio
, i
, 0) {
483 unsigned this_count
= bv
->bv_len
;
485 if (likely(PAGE_SIZE
== this_count
))
486 clear_highpage(bv
->bv_page
);
488 zero_user(bv
->bv_page
, bv
->bv_offset
, this_count
);
492 static int _io_check(struct objio_state
*ios
, bool is_write
)
494 enum osd_err_priority oep
= OSD_ERR_PRI_NO_ERROR
;
498 for (i
= 0; i
< ios
->numdevs
; i
++) {
499 struct osd_sense_info osi
;
500 struct osd_request
*or = ios
->per_dev
[i
].or;
506 ret
= osd_req_decode_sense(or, &osi
);
510 if (OSD_ERR_PRI_CLEAR_PAGES
== osi
.osd_err_pri
) {
511 /* start read offset passed endof file */
513 _clear_bio(ios
->per_dev
[i
].bio
);
514 dprintk("%s: start read offset passed end of file "
515 "offset=0x%llx, length=0x%lx\n", __func__
,
516 _LLU(ios
->per_dev
[i
].offset
),
517 ios
->per_dev
[i
].length
);
519 continue; /* we recovered */
521 objlayout_io_set_result(&ios
->oir
, i
,
522 &ios
->layout
->comps
[i
].oc_object_id
,
523 osd_pri_2_pnfs_err(osi
.osd_err_pri
),
524 ios
->per_dev
[i
].offset
,
525 ios
->per_dev
[i
].length
,
528 if (osi
.osd_err_pri
>= oep
) {
529 oep
= osi
.osd_err_pri
;
538 * Common IO state helpers.
540 static void _io_free(struct objio_state
*ios
)
544 for (i
= 0; i
< ios
->numdevs
; i
++) {
545 struct _objio_per_comp
*per_dev
= &ios
->per_dev
[i
];
548 osd_end_request(per_dev
->or);
553 bio_put(per_dev
->bio
);
559 struct osd_dev
*_io_od(struct objio_state
*ios
, unsigned dev
)
561 unsigned min_dev
= ios
->layout
->comps_index
;
562 unsigned max_dev
= min_dev
+ ios
->layout
->num_comps
;
564 BUG_ON(dev
< min_dev
|| max_dev
<= dev
);
565 return ios
->layout
->ods
[dev
- min_dev
]->od
;
568 struct _striping_info
{
575 static void _calc_stripe_info(struct objio_state
*ios
, u64 file_offset
,
576 struct _striping_info
*si
)
578 u32 stripe_unit
= ios
->layout
->stripe_unit
;
579 u32 group_width
= ios
->layout
->group_width
;
580 u64 group_depth
= ios
->layout
->group_depth
;
581 u32 U
= stripe_unit
* group_width
;
583 u64 T
= U
* group_depth
;
584 u64 S
= T
* ios
->layout
->group_count
;
585 u64 M
= div64_u64(file_offset
, S
);
588 G = (L - (M * S)) / T
589 H = (L - (M * S)) % T
591 u64 LmodU
= file_offset
- M
* S
;
592 u32 G
= div64_u64(LmodU
, T
);
593 u64 H
= LmodU
- G
* T
;
595 u32 N
= div_u64(H
, U
);
597 div_u64_rem(file_offset
, stripe_unit
, &si
->unit_off
);
598 si
->obj_offset
= si
->unit_off
+ (N
* stripe_unit
) +
599 (M
* group_depth
* stripe_unit
);
601 /* "H - (N * U)" is just "H % U" so it's bound to u32 */
602 si
->dev
= (u32
)(H
- (N
* U
)) / stripe_unit
+ G
* group_width
;
603 si
->dev
*= ios
->layout
->mirrors_p1
;
605 si
->group_length
= T
- H
;
608 static int _add_stripe_unit(struct objio_state
*ios
, unsigned *cur_pg
,
609 unsigned pgbase
, struct _objio_per_comp
*per_dev
, int len
,
612 unsigned pg
= *cur_pg
;
614 struct request_queue
*q
=
615 osd_request_queue(_io_od(ios
, per_dev
->dev
));
617 if (per_dev
->bio
== NULL
) {
618 unsigned pages_in_stripe
= ios
->layout
->group_width
*
619 (ios
->layout
->stripe_unit
/ PAGE_SIZE
);
620 unsigned bio_size
= (ios
->nr_pages
+ pages_in_stripe
) /
621 ios
->layout
->group_width
;
623 if (BIO_MAX_PAGES_KMALLOC
< bio_size
)
624 bio_size
= BIO_MAX_PAGES_KMALLOC
;
626 per_dev
->bio
= bio_kmalloc(gfp_flags
, bio_size
);
627 if (unlikely(!per_dev
->bio
)) {
628 dprintk("Faild to allocate BIO size=%u\n", bio_size
);
633 while (cur_len
> 0) {
634 unsigned pglen
= min_t(unsigned, PAGE_SIZE
- pgbase
, cur_len
);
637 BUG_ON(ios
->nr_pages
<= pg
);
640 added_len
= bio_add_pc_page(q
, per_dev
->bio
,
641 ios
->pages
[pg
], pglen
, pgbase
);
642 if (unlikely(pglen
!= added_len
))
649 per_dev
->length
+= len
;
654 static int _prepare_one_group(struct objio_state
*ios
, u64 length
,
655 struct _striping_info
*si
, unsigned *last_pg
,
658 unsigned stripe_unit
= ios
->layout
->stripe_unit
;
659 unsigned mirrors_p1
= ios
->layout
->mirrors_p1
;
660 unsigned devs_in_group
= ios
->layout
->group_width
* mirrors_p1
;
661 unsigned dev
= si
->dev
;
662 unsigned first_dev
= dev
- (dev
% devs_in_group
);
663 unsigned max_comp
= ios
->numdevs
? ios
->numdevs
- mirrors_p1
: 0;
664 unsigned cur_pg
= *last_pg
;
668 struct _objio_per_comp
*per_dev
= &ios
->per_dev
[dev
- first_dev
];
669 unsigned cur_len
, page_off
= 0;
671 if (!per_dev
->length
) {
674 per_dev
->offset
= si
->obj_offset
+ stripe_unit
-
676 cur_len
= stripe_unit
;
677 } else if (dev
== si
->dev
) {
678 per_dev
->offset
= si
->obj_offset
;
679 cur_len
= stripe_unit
- si
->unit_off
;
680 page_off
= si
->unit_off
& ~PAGE_MASK
;
682 (page_off
!= ios
->pgbase
));
683 } else { /* dev > si->dev */
684 per_dev
->offset
= si
->obj_offset
- si
->unit_off
;
685 cur_len
= stripe_unit
;
688 if (max_comp
< dev
- first_dev
)
689 max_comp
= dev
- first_dev
;
691 cur_len
= stripe_unit
;
693 if (cur_len
>= length
)
696 ret
= _add_stripe_unit(ios
, &cur_pg
, page_off
, per_dev
,
702 dev
= (dev
% devs_in_group
) + first_dev
;
705 ios
->length
+= cur_len
;
708 ios
->numdevs
= max_comp
+ mirrors_p1
;
713 static int _io_rw_pagelist(struct objio_state
*ios
, gfp_t gfp_flags
)
715 u64 length
= ios
->count
;
716 u64 offset
= ios
->offset
;
717 struct _striping_info si
;
718 unsigned last_pg
= 0;
722 _calc_stripe_info(ios
, offset
, &si
);
724 if (length
< si
.group_length
)
725 si
.group_length
= length
;
727 ret
= _prepare_one_group(ios
, si
.group_length
, &si
, &last_pg
, gfp_flags
);
731 offset
+= si
.group_length
;
732 length
-= si
.group_length
;
742 static int _sync_done(struct objio_state
*ios
)
744 struct completion
*waiting
= ios
->private;
750 static void _last_io(struct kref
*kref
)
752 struct objio_state
*ios
= container_of(kref
, struct objio_state
, kref
);
757 static void _done_io(struct osd_request
*or, void *p
)
759 struct objio_state
*ios
= p
;
761 kref_put(&ios
->kref
, _last_io
);
764 static int _io_exec(struct objio_state
*ios
)
766 DECLARE_COMPLETION_ONSTACK(wait
);
769 objio_done_fn saved_done_fn
= ios
->done
;
770 bool sync
= ios
->sync
;
773 ios
->done
= _sync_done
;
774 ios
->private = &wait
;
777 kref_init(&ios
->kref
);
779 for (i
= 0; i
< ios
->numdevs
; i
++) {
780 struct osd_request
*or = ios
->per_dev
[i
].or;
785 kref_get(&ios
->kref
);
786 osd_execute_request_async(or, _done_io
, ios
);
789 kref_put(&ios
->kref
, _last_io
);
792 wait_for_completion(&wait
);
793 ret
= saved_done_fn(ios
);
802 static int _read_done(struct objio_state
*ios
)
805 int ret
= _io_check(ios
, false);
810 status
= ios
->length
;
814 objlayout_read_done(&ios
->oir
, status
, ios
->sync
);
818 static int _read_mirrors(struct objio_state
*ios
, unsigned cur_comp
)
820 struct osd_request
*or = NULL
;
821 struct _objio_per_comp
*per_dev
= &ios
->per_dev
[cur_comp
];
822 unsigned dev
= per_dev
->dev
;
823 struct pnfs_osd_object_cred
*cred
=
824 &ios
->layout
->comps
[cur_comp
];
825 struct osd_obj_id obj
= {
826 .partition
= cred
->oc_object_id
.oid_partition_id
,
827 .id
= cred
->oc_object_id
.oid_object_id
,
831 or = osd_start_request(_io_od(ios
, dev
), GFP_KERNEL
);
838 osd_req_read(or, &obj
, per_dev
->offset
, per_dev
->bio
, per_dev
->length
);
840 ret
= osd_finalize_request(or, 0, cred
->oc_cap
.cred
, NULL
);
842 dprintk("%s: Faild to osd_finalize_request() => %d\n",
847 dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n",
848 __func__
, cur_comp
, dev
, obj
.id
, _LLU(per_dev
->offset
),
855 static int _read_exec(struct objio_state
*ios
)
860 for (i
= 0; i
< ios
->numdevs
; i
+= ios
->layout
->mirrors_p1
) {
861 if (!ios
->per_dev
[i
].length
)
863 ret
= _read_mirrors(ios
, i
);
868 ios
->done
= _read_done
;
869 return _io_exec(ios
);
876 int objio_read_pagelist(struct nfs_read_data
*rdata
)
878 struct objio_state
*ios
;
881 ret
= objio_alloc_io_state(NFS_I(rdata
->inode
)->layout
,
882 rdata
->lseg
, rdata
->args
.pages
, rdata
->args
.pgbase
,
883 rdata
->args
.offset
, rdata
->args
.count
, rdata
,
888 ret
= _io_rw_pagelist(ios
, GFP_KERNEL
);
892 return _read_exec(ios
);
898 static int _write_done(struct objio_state
*ios
)
901 int ret
= _io_check(ios
, true);
906 /* FIXME: should be based on the OSD's persistence model
907 * See OSD2r05 Section 4.13 Data persistence model */
908 ios
->oir
.committed
= NFS_FILE_SYNC
;
909 status
= ios
->length
;
914 objlayout_write_done(&ios
->oir
, status
, ios
->sync
);
918 static int _write_mirrors(struct objio_state
*ios
, unsigned cur_comp
)
920 struct _objio_per_comp
*master_dev
= &ios
->per_dev
[cur_comp
];
921 unsigned dev
= ios
->per_dev
[cur_comp
].dev
;
922 unsigned last_comp
= cur_comp
+ ios
->layout
->mirrors_p1
;
925 for (; cur_comp
< last_comp
; ++cur_comp
, ++dev
) {
926 struct osd_request
*or = NULL
;
927 struct pnfs_osd_object_cred
*cred
=
928 &ios
->layout
->comps
[cur_comp
];
929 struct osd_obj_id obj
= {
930 .partition
= cred
->oc_object_id
.oid_partition_id
,
931 .id
= cred
->oc_object_id
.oid_object_id
,
933 struct _objio_per_comp
*per_dev
= &ios
->per_dev
[cur_comp
];
936 or = osd_start_request(_io_od(ios
, dev
), GFP_NOFS
);
943 if (per_dev
!= master_dev
) {
944 bio
= bio_kmalloc(GFP_NOFS
,
945 master_dev
->bio
->bi_max_vecs
);
946 if (unlikely(!bio
)) {
947 dprintk("Faild to allocate BIO size=%u\n",
948 master_dev
->bio
->bi_max_vecs
);
953 __bio_clone(bio
, master_dev
->bio
);
958 per_dev
->length
= master_dev
->length
;
959 per_dev
->offset
= master_dev
->offset
;
961 bio
= master_dev
->bio
;
962 bio
->bi_rw
|= REQ_WRITE
;
965 osd_req_write(or, &obj
, per_dev
->offset
, bio
, per_dev
->length
);
967 ret
= osd_finalize_request(or, 0, cred
->oc_cap
.cred
, NULL
);
969 dprintk("%s: Faild to osd_finalize_request() => %d\n",
974 dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n",
975 __func__
, cur_comp
, dev
, obj
.id
, _LLU(per_dev
->offset
),
983 static int _write_exec(struct objio_state
*ios
)
988 for (i
= 0; i
< ios
->numdevs
; i
+= ios
->layout
->mirrors_p1
) {
989 if (!ios
->per_dev
[i
].length
)
991 ret
= _write_mirrors(ios
, i
);
996 ios
->done
= _write_done
;
997 return _io_exec(ios
);
1004 int objio_write_pagelist(struct nfs_write_data
*wdata
, int how
)
1006 struct objio_state
*ios
;
1009 ret
= objio_alloc_io_state(NFS_I(wdata
->inode
)->layout
,
1010 wdata
->lseg
, wdata
->args
.pages
, wdata
->args
.pgbase
,
1011 wdata
->args
.offset
, wdata
->args
.count
, wdata
, GFP_NOFS
,
1016 ios
->sync
= 0 != (how
& FLUSH_SYNC
);
1018 /* TODO: ios->stable = stable; */
1019 ret
= _io_rw_pagelist(ios
, GFP_NOFS
);
1023 return _write_exec(ios
);
1026 static bool objio_pg_test(struct nfs_pageio_descriptor
*pgio
,
1027 struct nfs_page
*prev
, struct nfs_page
*req
)
1029 if (!pnfs_generic_pg_test(pgio
, prev
, req
))
1032 return pgio
->pg_count
+ req
->wb_bytes
<=
1033 OBJIO_LSEG(pgio
->pg_lseg
)->max_io_size
;
1036 static const struct nfs_pageio_ops objio_pg_read_ops
= {
1037 .pg_init
= pnfs_generic_pg_init_read
,
1038 .pg_test
= objio_pg_test
,
1039 .pg_doio
= pnfs_generic_pg_readpages
,
1042 static const struct nfs_pageio_ops objio_pg_write_ops
= {
1043 .pg_init
= pnfs_generic_pg_init_write
,
1044 .pg_test
= objio_pg_test
,
1045 .pg_doio
= pnfs_generic_pg_writepages
,
1048 static struct pnfs_layoutdriver_type objlayout_type
= {
1049 .id
= LAYOUT_OSD2_OBJECTS
,
1050 .name
= "LAYOUT_OSD2_OBJECTS",
1051 .flags
= PNFS_LAYOUTRET_ON_SETATTR
,
1053 .alloc_layout_hdr
= objlayout_alloc_layout_hdr
,
1054 .free_layout_hdr
= objlayout_free_layout_hdr
,
1056 .alloc_lseg
= objlayout_alloc_lseg
,
1057 .free_lseg
= objlayout_free_lseg
,
1059 .read_pagelist
= objlayout_read_pagelist
,
1060 .write_pagelist
= objlayout_write_pagelist
,
1061 .pg_read_ops
= &objio_pg_read_ops
,
1062 .pg_write_ops
= &objio_pg_write_ops
,
1064 .free_deviceid_node
= objio_free_deviceid_node
,
1066 .encode_layoutcommit
= objlayout_encode_layoutcommit
,
1067 .encode_layoutreturn
= objlayout_encode_layoutreturn
,
1070 MODULE_DESCRIPTION("pNFS Layout Driver for OSD2 objects");
1071 MODULE_AUTHOR("Benny Halevy <bhalevy@panasas.com>");
1072 MODULE_LICENSE("GPL");
1075 objlayout_init(void)
1077 int ret
= pnfs_register_layoutdriver(&objlayout_type
);
1081 "%s: Registering OSD pNFS Layout Driver failed: error=%d\n",
1084 printk(KERN_INFO
"%s: Registered OSD pNFS Layout Driver\n",
1090 objlayout_exit(void)
1092 pnfs_unregister_layoutdriver(&objlayout_type
);
1093 printk(KERN_INFO
"%s: Unregistered OSD pNFS Layout Driver\n",
1097 MODULE_ALIAS("nfs-layouttype4-2");
1099 module_init(objlayout_init
);
1100 module_exit(objlayout_exit
);