2 * Device operations for the pnfs nfs4 file layout driver.
5 * The Regents of the University of Michigan
8 * Dean Hildebrand <dhildebz@umich.edu>
9 * Garth Goodson <Garth.Goodson@netapp.com>
11 * Permission is granted to use, copy, create derivative works, and
12 * redistribute this software and such derivative works for any purpose,
13 * so long as the name of the University of Michigan is not used in
14 * any advertising or publicity pertaining to the use or distribution
15 * of this software without specific, written prior authorization. If
16 * the above copyright notice or any other identification of the
17 * University of Michigan is included in any copy of any portion of
18 * this software, then the disclaimer below must also be included.
20 * This software is provided as is, without representation or warranty
21 * of any kind either express or implied, including without limitation
22 * the implied warranties of merchantability, fitness for a particular
23 * purpose, or noninfringement. The Regents of the University of
24 * Michigan shall not be liable for any damages, including special,
25 * indirect, incidental, or consequential damages, with respect to any
26 * claim arising out of or in connection with the use of the software,
27 * even if it has been or is hereafter advised of the possibility of
31 #include <linux/nfs_fs.h>
32 #include <linux/vmalloc.h>
35 #include "nfs4filelayout.h"
37 #define NFSDBG_FACILITY NFSDBG_PNFS_LD
42 * Data servers can be mapped to different device ids.
43 * nfs4_pnfs_ds reference counting
44 * - set to 1 on allocation
45 * - incremented when a device id maps a data server already in the cache.
46 * - decremented when deviceid is removed from the cache.
48 DEFINE_SPINLOCK(nfs4_ds_cache_lock
);
49 static LIST_HEAD(nfs4_data_server_cache
);
53 print_ds(struct nfs4_pnfs_ds
*ds
)
56 printk("%s NULL device\n", __func__
);
59 printk(" ip_addr %x port %hu\n"
62 " cl_exchange_flags %x\n",
63 ntohl(ds
->ds_ip_addr
), ntohs(ds
->ds_port
),
64 atomic_read(&ds
->ds_count
), ds
->ds_clp
,
65 ds
->ds_clp
? ds
->ds_clp
->cl_exchange_flags
: 0);
69 print_ds_list(struct nfs4_file_layout_dsaddr
*dsaddr
)
74 printk("%s dsaddr->ds_num %d\n", __func__
,
76 for (i
= 0; i
< dsaddr
->ds_num
; i
++)
77 print_ds(dsaddr
->ds_list
[i
]);
81 void print_deviceid(struct nfs4_deviceid
*id
)
85 dprintk("%s: device id= [%x%x%x%x]\n", __func__
,
86 p
[0], p
[1], p
[2], p
[3]);
89 /* nfs4_ds_cache_lock is held */
90 static struct nfs4_pnfs_ds
*
91 _data_server_lookup_locked(u32 ip_addr
, u32 port
)
93 struct nfs4_pnfs_ds
*ds
;
95 dprintk("_data_server_lookup: ip_addr=%x port=%hu\n",
96 ntohl(ip_addr
), ntohs(port
));
98 list_for_each_entry(ds
, &nfs4_data_server_cache
, ds_node
) {
99 if (ds
->ds_ip_addr
== ip_addr
&&
100 ds
->ds_port
== port
) {
108 destroy_ds(struct nfs4_pnfs_ds
*ds
)
110 dprintk("--> %s\n", __func__
);
115 nfs_put_client(ds
->ds_clp
);
120 nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr
*dsaddr
)
122 struct nfs4_pnfs_ds
*ds
;
125 print_deviceid(&dsaddr
->deviceid
.de_id
);
127 for (i
= 0; i
< dsaddr
->ds_num
; i
++) {
128 ds
= dsaddr
->ds_list
[i
];
130 if (atomic_dec_and_lock(&ds
->ds_count
,
131 &nfs4_ds_cache_lock
)) {
132 list_del_init(&ds
->ds_node
);
133 spin_unlock(&nfs4_ds_cache_lock
);
138 kfree(dsaddr
->stripe_indices
);
143 nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node
*device
)
145 struct nfs4_file_layout_dsaddr
*dsaddr
=
146 container_of(device
, struct nfs4_file_layout_dsaddr
, deviceid
);
148 nfs4_fl_free_deviceid(dsaddr
);
151 static struct nfs4_pnfs_ds
*
152 nfs4_pnfs_ds_add(struct inode
*inode
, u32 ip_addr
, u32 port
)
154 struct nfs4_pnfs_ds
*tmp_ds
, *ds
;
156 ds
= kzalloc(sizeof(*tmp_ds
), GFP_KERNEL
);
160 spin_lock(&nfs4_ds_cache_lock
);
161 tmp_ds
= _data_server_lookup_locked(ip_addr
, port
);
162 if (tmp_ds
== NULL
) {
163 ds
->ds_ip_addr
= ip_addr
;
165 atomic_set(&ds
->ds_count
, 1);
166 INIT_LIST_HEAD(&ds
->ds_node
);
168 list_add(&ds
->ds_node
, &nfs4_data_server_cache
);
169 dprintk("%s add new data server ip 0x%x\n", __func__
,
173 atomic_inc(&tmp_ds
->ds_count
);
174 dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n",
175 __func__
, tmp_ds
->ds_ip_addr
,
176 atomic_read(&tmp_ds
->ds_count
));
179 spin_unlock(&nfs4_ds_cache_lock
);
185 * Currently only support ipv4, and one multi-path address.
187 static struct nfs4_pnfs_ds
*
188 decode_and_add_ds(__be32
**pp
, struct inode
*inode
)
190 struct nfs4_pnfs_ds
*ds
= NULL
;
192 const char *ipend
, *pstr
;
196 __be32
*r_netid
, *r_addr
, *p
= *pp
;
199 nlen
= be32_to_cpup(p
++);
201 p
+= XDR_QUADLEN(nlen
);
204 rlen
= be32_to_cpup(p
++);
206 p
+= XDR_QUADLEN(rlen
);
209 /* Check that netid is "tcp" */
210 if (nlen
!= 3 || memcmp((char *)r_netid
, "tcp", 3)) {
211 dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__
);
215 /* ipv6 length plus port is legal */
216 if (rlen
> INET6_ADDRSTRLEN
+ 8) {
217 dprintk("%s: Invalid address, length %d\n", __func__
,
221 buf
= kmalloc(rlen
+ 1, GFP_KERNEL
);
223 memcpy(buf
, r_addr
, rlen
);
225 /* replace the port dots with dashes for the in4_pton() delimiter*/
226 for (i
= 0; i
< 2; i
++) {
227 char *res
= strrchr(buf
, '.');
229 dprintk("%s: Failed finding expected dots in port\n",
236 /* Currently only support ipv4 address */
237 if (in4_pton(buf
, rlen
, (u8
*)&ip_addr
, '-', &ipend
) == 0) {
238 dprintk("%s: Only ipv4 addresses supported\n", __func__
);
244 sscanf(pstr
, "-%d-%d", &tmp
[0], &tmp
[1]);
245 port
= htons((tmp
[0] << 8) | (tmp
[1]));
247 ds
= nfs4_pnfs_ds_add(inode
, ip_addr
, port
);
248 dprintk("%s: Decoded address and port %s\n", __func__
, buf
);
255 /* Decode opaque device data and return the result */
256 static struct nfs4_file_layout_dsaddr
*
257 decode_device(struct inode
*ino
, struct pnfs_device
*pdev
)
262 __be32
*p
= (__be32
*)pdev
->area
, *indicesp
;
263 struct nfs4_file_layout_dsaddr
*dsaddr
;
265 /* Get the stripe count (number of stripe index) */
266 cnt
= be32_to_cpup(p
++);
267 dprintk("%s stripe count %d\n", __func__
, cnt
);
268 if (cnt
> NFS4_PNFS_MAX_STRIPE_CNT
) {
269 printk(KERN_WARNING
"%s: stripe count %d greater than "
270 "supported maximum %d\n", __func__
,
271 cnt
, NFS4_PNFS_MAX_STRIPE_CNT
);
275 /* Check the multipath list count */
277 p
+= XDR_QUADLEN(cnt
<< 2);
278 num
= be32_to_cpup(p
++);
279 dprintk("%s ds_num %u\n", __func__
, num
);
280 if (num
> NFS4_PNFS_MAX_MULTI_CNT
) {
281 printk(KERN_WARNING
"%s: multipath count %d greater than "
282 "supported maximum %d\n", __func__
,
283 num
, NFS4_PNFS_MAX_MULTI_CNT
);
286 dsaddr
= kzalloc(sizeof(*dsaddr
) +
287 (sizeof(struct nfs4_pnfs_ds
*) * (num
- 1)),
292 dsaddr
->stripe_indices
= kzalloc(sizeof(u8
) * cnt
, GFP_KERNEL
);
293 if (!dsaddr
->stripe_indices
)
296 dsaddr
->stripe_count
= cnt
;
297 dsaddr
->ds_num
= num
;
299 memcpy(&dsaddr
->deviceid
.de_id
, &pdev
->dev_id
, sizeof(pdev
->dev_id
));
301 /* Go back an read stripe indices */
303 indexp
= &dsaddr
->stripe_indices
[0];
304 for (i
= 0; i
< dsaddr
->stripe_count
; i
++) {
305 *indexp
= be32_to_cpup(p
++);
310 /* Skip already read multipath list count */
313 for (i
= 0; i
< dsaddr
->ds_num
; i
++) {
316 dummy
= be32_to_cpup(p
++); /* multipath count */
319 "%s: Multipath count %d not supported, "
320 "skipping all greater than 1\n", __func__
,
323 for (j
= 0; j
< dummy
; j
++) {
325 dsaddr
->ds_list
[i
] = decode_and_add_ds(&p
, ino
);
326 if (dsaddr
->ds_list
[i
] == NULL
)
330 /* skip extra multipath */
331 len
= be32_to_cpup(p
++);
332 p
+= XDR_QUADLEN(len
);
333 len
= be32_to_cpup(p
++);
334 p
+= XDR_QUADLEN(len
);
342 nfs4_fl_free_deviceid(dsaddr
);
344 dprintk("%s ERROR: returning NULL\n", __func__
);
349 * Decode the opaque device specified in 'dev'
350 * and add it to the list of available devices.
351 * If the deviceid is already cached, nfs4_add_deviceid will return
352 * a pointer to the cached struct and throw away the new.
354 static struct nfs4_file_layout_dsaddr
*
355 decode_and_add_device(struct inode
*inode
, struct pnfs_device
*dev
)
357 struct nfs4_file_layout_dsaddr
*dsaddr
;
358 struct pnfs_deviceid_node
*d
;
360 dsaddr
= decode_device(inode
, dev
);
362 printk(KERN_WARNING
"%s: Could not decode or add device\n",
367 d
= pnfs_add_deviceid(NFS_SERVER(inode
)->nfs_client
->cl_devid_cache
,
370 return container_of(d
, struct nfs4_file_layout_dsaddr
, deviceid
);
374 * Retrieve the information for dev_id, add it to the list
375 * of available devices, and return it.
377 struct nfs4_file_layout_dsaddr
*
378 get_device_info(struct inode
*inode
, struct nfs4_deviceid
*dev_id
)
380 struct pnfs_device
*pdev
= NULL
;
383 struct page
**pages
= NULL
;
384 struct nfs4_file_layout_dsaddr
*dsaddr
= NULL
;
386 struct nfs_server
*server
= NFS_SERVER(inode
);
389 * Use the session max response size as the basis for setting
390 * GETDEVICEINFO's maxcount
392 max_resp_sz
= server
->nfs_client
->cl_session
->fc_attrs
.max_resp_sz
;
393 max_pages
= max_resp_sz
>> PAGE_SHIFT
;
394 dprintk("%s inode %p max_resp_sz %u max_pages %d\n",
395 __func__
, inode
, max_resp_sz
, max_pages
);
397 pdev
= kzalloc(sizeof(struct pnfs_device
), GFP_KERNEL
);
401 pages
= kzalloc(max_pages
* sizeof(struct page
*), GFP_KERNEL
);
406 for (i
= 0; i
< max_pages
; i
++) {
407 pages
[i
] = alloc_page(GFP_KERNEL
);
413 pdev
->area
= vmap(pages
, max_pages
, VM_MAP
, PAGE_KERNEL
);
417 memcpy(&pdev
->dev_id
, dev_id
, sizeof(*dev_id
));
418 pdev
->layout_type
= LAYOUT_NFSV4_1_FILES
;
421 pdev
->pglen
= PAGE_SIZE
* max_pages
;
424 rc
= nfs4_proc_getdeviceinfo(server
, pdev
);
425 dprintk("%s getdevice info returns %d\n", __func__
, rc
);
430 * Found new device, need to decode it and then add it to the
431 * list of known devices for this mountpoint.
433 dsaddr
= decode_and_add_device(inode
, pdev
);
435 if (pdev
->area
!= NULL
)
437 for (i
= 0; i
< max_pages
; i
++)
438 __free_page(pages
[i
]);
441 dprintk("<-- %s dsaddr %p\n", __func__
, dsaddr
);
445 struct nfs4_file_layout_dsaddr
*
446 nfs4_fl_find_get_deviceid(struct nfs_client
*clp
, struct nfs4_deviceid
*id
)
448 struct pnfs_deviceid_node
*d
;
450 d
= pnfs_find_get_deviceid(clp
->cl_devid_cache
, id
);
451 return (d
== NULL
) ? NULL
:
452 container_of(d
, struct nfs4_file_layout_dsaddr
, deviceid
);