[PATCH] knfsd: call lockd_down when closing a socket via a write to nfsd/portlist
[linux-2.6/kvm.git] / drivers / block / aoe / aoecmd.c
blob39da28d344fe9465186f6bed89be04ff449a7f77
1 /* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */
2 /*
3 * aoecmd.c
4 * Filesystem request handling methods
5 */
7 #include <linux/hdreg.h>
8 #include <linux/blkdev.h>
9 #include <linux/skbuff.h>
10 #include <linux/netdevice.h>
11 #include <linux/genhd.h>
12 #include <asm/unaligned.h>
13 #include "aoe.h"
15 #define TIMERTICK (HZ / 10)
16 #define MINTIMER (2 * TIMERTICK)
17 #define MAXTIMER (HZ << 1)
18 #define MAXWAIT (60 * 3) /* After MAXWAIT seconds, give up and fail dev */
20 static struct sk_buff *
21 new_skb(struct net_device *if_dev, ulong len)
23 struct sk_buff *skb;
25 skb = alloc_skb(len, GFP_ATOMIC);
26 if (skb) {
27 skb->nh.raw = skb->mac.raw = skb->data;
28 skb->dev = if_dev;
29 skb->protocol = __constant_htons(ETH_P_AOE);
30 skb->priority = 0;
31 skb_put(skb, len);
32 memset(skb->head, 0, len);
33 skb->next = skb->prev = NULL;
35 /* tell the network layer not to perform IP checksums
36 * or to get the NIC to do it
38 skb->ip_summed = CHECKSUM_NONE;
40 return skb;
43 static struct sk_buff *
44 skb_prepare(struct aoedev *d, struct frame *f)
46 struct sk_buff *skb;
47 char *p;
49 skb = new_skb(d->ifp, f->ndata + f->writedatalen);
50 if (!skb) {
51 printk(KERN_INFO "aoe: skb_prepare: failure to allocate skb\n");
52 return NULL;
55 p = skb->mac.raw;
56 memcpy(p, f->data, f->ndata);
58 if (f->writedatalen) {
59 p += sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr);
60 memcpy(p, f->bufaddr, f->writedatalen);
63 return skb;
66 static struct frame *
67 getframe(struct aoedev *d, int tag)
69 struct frame *f, *e;
71 f = d->frames;
72 e = f + d->nframes;
73 for (; f<e; f++)
74 if (f->tag == tag)
75 return f;
76 return NULL;
80 * Leave the top bit clear so we have tagspace for userland.
81 * The bottom 16 bits are the xmit tick for rexmit/rttavg processing.
82 * This driver reserves tag -1 to mean "unused frame."
84 static int
85 newtag(struct aoedev *d)
87 register ulong n;
89 n = jiffies & 0xffff;
90 return n |= (++d->lasttag & 0x7fff) << 16;
93 static int
94 aoehdr_atainit(struct aoedev *d, struct aoe_hdr *h)
96 u32 host_tag = newtag(d);
98 memcpy(h->src, d->ifp->dev_addr, sizeof h->src);
99 memcpy(h->dst, d->addr, sizeof h->dst);
100 h->type = __constant_cpu_to_be16(ETH_P_AOE);
101 h->verfl = AOE_HVER;
102 h->major = cpu_to_be16(d->aoemajor);
103 h->minor = d->aoeminor;
104 h->cmd = AOECMD_ATA;
105 h->tag = cpu_to_be32(host_tag);
107 return host_tag;
110 static void
111 aoecmd_ata_rw(struct aoedev *d, struct frame *f)
113 struct aoe_hdr *h;
114 struct aoe_atahdr *ah;
115 struct buf *buf;
116 struct sk_buff *skb;
117 ulong bcnt;
118 register sector_t sector;
119 char writebit, extbit;
121 writebit = 0x10;
122 extbit = 0x4;
124 buf = d->inprocess;
126 sector = buf->sector;
127 bcnt = buf->bv_resid;
128 if (bcnt > MAXATADATA)
129 bcnt = MAXATADATA;
131 /* initialize the headers & frame */
132 h = (struct aoe_hdr *) f->data;
133 ah = (struct aoe_atahdr *) (h+1);
134 f->ndata = sizeof *h + sizeof *ah;
135 memset(h, 0, f->ndata);
136 f->tag = aoehdr_atainit(d, h);
137 f->waited = 0;
138 f->buf = buf;
139 f->bufaddr = buf->bufaddr;
141 /* set up ata header */
142 ah->scnt = bcnt >> 9;
143 ah->lba0 = sector;
144 ah->lba1 = sector >>= 8;
145 ah->lba2 = sector >>= 8;
146 ah->lba3 = sector >>= 8;
147 if (d->flags & DEVFL_EXT) {
148 ah->aflags |= AOEAFL_EXT;
149 ah->lba4 = sector >>= 8;
150 ah->lba5 = sector >>= 8;
151 } else {
152 extbit = 0;
153 ah->lba3 &= 0x0f;
154 ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */
157 if (bio_data_dir(buf->bio) == WRITE) {
158 ah->aflags |= AOEAFL_WRITE;
159 f->writedatalen = bcnt;
160 } else {
161 writebit = 0;
162 f->writedatalen = 0;
165 ah->cmdstat = WIN_READ | writebit | extbit;
167 /* mark all tracking fields and load out */
168 buf->nframesout += 1;
169 buf->bufaddr += bcnt;
170 buf->bv_resid -= bcnt;
171 /* printk(KERN_INFO "aoe: bv_resid=%ld\n", buf->bv_resid); */
172 buf->resid -= bcnt;
173 buf->sector += bcnt >> 9;
174 if (buf->resid == 0) {
175 d->inprocess = NULL;
176 } else if (buf->bv_resid == 0) {
177 buf->bv++;
178 buf->bv_resid = buf->bv->bv_len;
179 buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset;
182 skb = skb_prepare(d, f);
183 if (skb) {
184 skb->next = NULL;
185 if (d->sendq_hd)
186 d->sendq_tl->next = skb;
187 else
188 d->sendq_hd = skb;
189 d->sendq_tl = skb;
193 /* some callers cannot sleep, and they can call this function,
194 * transmitting the packets later, when interrupts are on
196 static struct sk_buff *
197 aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff **tail)
199 struct aoe_hdr *h;
200 struct aoe_cfghdr *ch;
201 struct sk_buff *skb, *sl, *sl_tail;
202 struct net_device *ifp;
204 sl = sl_tail = NULL;
206 read_lock(&dev_base_lock);
207 for (ifp = dev_base; ifp; dev_put(ifp), ifp = ifp->next) {
208 dev_hold(ifp);
209 if (!is_aoe_netif(ifp))
210 continue;
212 skb = new_skb(ifp, sizeof *h + sizeof *ch);
213 if (skb == NULL) {
214 printk(KERN_INFO "aoe: aoecmd_cfg: skb alloc failure\n");
215 continue;
217 if (sl_tail == NULL)
218 sl_tail = skb;
219 h = (struct aoe_hdr *) skb->mac.raw;
220 memset(h, 0, sizeof *h + sizeof *ch);
222 memset(h->dst, 0xff, sizeof h->dst);
223 memcpy(h->src, ifp->dev_addr, sizeof h->src);
224 h->type = __constant_cpu_to_be16(ETH_P_AOE);
225 h->verfl = AOE_HVER;
226 h->major = cpu_to_be16(aoemajor);
227 h->minor = aoeminor;
228 h->cmd = AOECMD_CFG;
230 skb->next = sl;
231 sl = skb;
233 read_unlock(&dev_base_lock);
235 if (tail != NULL)
236 *tail = sl_tail;
237 return sl;
240 /* enters with d->lock held */
241 void
242 aoecmd_work(struct aoedev *d)
244 struct frame *f;
245 struct buf *buf;
247 if (d->flags & DEVFL_PAUSE) {
248 if (!aoedev_isbusy(d))
249 d->sendq_hd = aoecmd_cfg_pkts(d->aoemajor,
250 d->aoeminor, &d->sendq_tl);
251 return;
254 loop:
255 f = getframe(d, FREETAG);
256 if (f == NULL)
257 return;
258 if (d->inprocess == NULL) {
259 if (list_empty(&d->bufq))
260 return;
261 buf = container_of(d->bufq.next, struct buf, bufs);
262 list_del(d->bufq.next);
263 /*printk(KERN_INFO "aoecmd_work: bi_size=%ld\n", buf->bio->bi_size); */
264 d->inprocess = buf;
266 aoecmd_ata_rw(d, f);
267 goto loop;
270 static void
271 rexmit(struct aoedev *d, struct frame *f)
273 struct sk_buff *skb;
274 struct aoe_hdr *h;
275 char buf[128];
276 u32 n;
278 n = newtag(d);
280 snprintf(buf, sizeof buf,
281 "%15s e%ld.%ld oldtag=%08x@%08lx newtag=%08x\n",
282 "retransmit",
283 d->aoemajor, d->aoeminor, f->tag, jiffies, n);
284 aoechr_error(buf);
286 h = (struct aoe_hdr *) f->data;
287 f->tag = n;
288 h->tag = cpu_to_be32(n);
289 memcpy(h->dst, d->addr, sizeof h->dst);
290 memcpy(h->src, d->ifp->dev_addr, sizeof h->src);
292 skb = skb_prepare(d, f);
293 if (skb) {
294 skb->next = NULL;
295 if (d->sendq_hd)
296 d->sendq_tl->next = skb;
297 else
298 d->sendq_hd = skb;
299 d->sendq_tl = skb;
303 static int
304 tsince(int tag)
306 int n;
308 n = jiffies & 0xffff;
309 n -= tag & 0xffff;
310 if (n < 0)
311 n += 1<<16;
312 return n;
315 static void
316 rexmit_timer(ulong vp)
318 struct aoedev *d;
319 struct frame *f, *e;
320 struct sk_buff *sl;
321 register long timeout;
322 ulong flags, n;
324 d = (struct aoedev *) vp;
325 sl = NULL;
327 /* timeout is always ~150% of the moving average */
328 timeout = d->rttavg;
329 timeout += timeout >> 1;
331 spin_lock_irqsave(&d->lock, flags);
333 if (d->flags & DEVFL_TKILL) {
334 spin_unlock_irqrestore(&d->lock, flags);
335 return;
337 f = d->frames;
338 e = f + d->nframes;
339 for (; f<e; f++) {
340 if (f->tag != FREETAG && tsince(f->tag) >= timeout) {
341 n = f->waited += timeout;
342 n /= HZ;
343 if (n > MAXWAIT) { /* waited too long. device failure. */
344 aoedev_downdev(d);
345 break;
347 rexmit(d, f);
351 sl = d->sendq_hd;
352 d->sendq_hd = d->sendq_tl = NULL;
353 if (sl) {
354 n = d->rttavg <<= 1;
355 if (n > MAXTIMER)
356 d->rttavg = MAXTIMER;
359 d->timer.expires = jiffies + TIMERTICK;
360 add_timer(&d->timer);
362 spin_unlock_irqrestore(&d->lock, flags);
364 aoenet_xmit(sl);
367 /* this function performs work that has been deferred until sleeping is OK
369 void
370 aoecmd_sleepwork(void *vp)
372 struct aoedev *d = (struct aoedev *) vp;
374 if (d->flags & DEVFL_GDALLOC)
375 aoeblk_gdalloc(d);
377 if (d->flags & DEVFL_NEWSIZE) {
378 struct block_device *bd;
379 unsigned long flags;
380 u64 ssize;
382 ssize = d->gd->capacity;
383 bd = bdget_disk(d->gd, 0);
385 if (bd) {
386 mutex_lock(&bd->bd_inode->i_mutex);
387 i_size_write(bd->bd_inode, (loff_t)ssize<<9);
388 mutex_unlock(&bd->bd_inode->i_mutex);
389 bdput(bd);
391 spin_lock_irqsave(&d->lock, flags);
392 d->flags |= DEVFL_UP;
393 d->flags &= ~DEVFL_NEWSIZE;
394 spin_unlock_irqrestore(&d->lock, flags);
398 static void
399 ataid_complete(struct aoedev *d, unsigned char *id)
401 u64 ssize;
402 u16 n;
404 /* word 83: command set supported */
405 n = le16_to_cpu(get_unaligned((__le16 *) &id[83<<1]));
407 /* word 86: command set/feature enabled */
408 n |= le16_to_cpu(get_unaligned((__le16 *) &id[86<<1]));
410 if (n & (1<<10)) { /* bit 10: LBA 48 */
411 d->flags |= DEVFL_EXT;
413 /* word 100: number lba48 sectors */
414 ssize = le64_to_cpu(get_unaligned((__le64 *) &id[100<<1]));
416 /* set as in ide-disk.c:init_idedisk_capacity */
417 d->geo.cylinders = ssize;
418 d->geo.cylinders /= (255 * 63);
419 d->geo.heads = 255;
420 d->geo.sectors = 63;
421 } else {
422 d->flags &= ~DEVFL_EXT;
424 /* number lba28 sectors */
425 ssize = le32_to_cpu(get_unaligned((__le32 *) &id[60<<1]));
427 /* NOTE: obsolete in ATA 6 */
428 d->geo.cylinders = le16_to_cpu(get_unaligned((__le16 *) &id[54<<1]));
429 d->geo.heads = le16_to_cpu(get_unaligned((__le16 *) &id[55<<1]));
430 d->geo.sectors = le16_to_cpu(get_unaligned((__le16 *) &id[56<<1]));
433 if (d->ssize != ssize)
434 printk(KERN_INFO "aoe: %012llx e%lu.%lu v%04x has %llu "
435 "sectors\n", (unsigned long long)mac_addr(d->addr),
436 d->aoemajor, d->aoeminor,
437 d->fw_ver, (long long)ssize);
438 d->ssize = ssize;
439 d->geo.start = 0;
440 if (d->gd != NULL) {
441 d->gd->capacity = ssize;
442 d->flags |= DEVFL_NEWSIZE;
443 } else {
444 if (d->flags & DEVFL_GDALLOC) {
445 printk(KERN_INFO "aoe: %s: %s e%lu.%lu, %s\n",
446 __FUNCTION__,
447 "can't schedule work for",
448 d->aoemajor, d->aoeminor,
449 "it's already on! (This really shouldn't happen).\n");
450 return;
452 d->flags |= DEVFL_GDALLOC;
454 schedule_work(&d->work);
457 static void
458 calc_rttavg(struct aoedev *d, int rtt)
460 register long n;
462 n = rtt;
463 if (n < MINTIMER)
464 n = MINTIMER;
465 else if (n > MAXTIMER)
466 n = MAXTIMER;
468 /* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */
469 n -= d->rttavg;
470 d->rttavg += n >> 2;
473 void
474 aoecmd_ata_rsp(struct sk_buff *skb)
476 struct aoedev *d;
477 struct aoe_hdr *hin;
478 struct aoe_atahdr *ahin, *ahout;
479 struct frame *f;
480 struct buf *buf;
481 struct sk_buff *sl;
482 register long n;
483 ulong flags;
484 char ebuf[128];
485 u16 aoemajor;
487 hin = (struct aoe_hdr *) skb->mac.raw;
488 aoemajor = be16_to_cpu(hin->major);
489 d = aoedev_by_aoeaddr(aoemajor, hin->minor);
490 if (d == NULL) {
491 snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
492 "for unknown device %d.%d\n",
493 aoemajor, hin->minor);
494 aoechr_error(ebuf);
495 return;
498 spin_lock_irqsave(&d->lock, flags);
500 f = getframe(d, be32_to_cpu(hin->tag));
501 if (f == NULL) {
502 spin_unlock_irqrestore(&d->lock, flags);
503 snprintf(ebuf, sizeof ebuf,
504 "%15s e%d.%d tag=%08x@%08lx\n",
505 "unexpected rsp",
506 be16_to_cpu(hin->major),
507 hin->minor,
508 be32_to_cpu(hin->tag),
509 jiffies);
510 aoechr_error(ebuf);
511 return;
514 calc_rttavg(d, tsince(f->tag));
516 ahin = (struct aoe_atahdr *) (hin+1);
517 ahout = (struct aoe_atahdr *) (f->data + sizeof(struct aoe_hdr));
518 buf = f->buf;
520 if (ahout->cmdstat == WIN_IDENTIFY)
521 d->flags &= ~DEVFL_PAUSE;
522 if (ahin->cmdstat & 0xa9) { /* these bits cleared on success */
523 printk(KERN_CRIT "aoe: aoecmd_ata_rsp: ata error cmd=%2.2Xh "
524 "stat=%2.2Xh from e%ld.%ld\n",
525 ahout->cmdstat, ahin->cmdstat,
526 d->aoemajor, d->aoeminor);
527 if (buf)
528 buf->flags |= BUFFL_FAIL;
529 } else {
530 switch (ahout->cmdstat) {
531 case WIN_READ:
532 case WIN_READ_EXT:
533 n = ahout->scnt << 9;
534 if (skb->len - sizeof *hin - sizeof *ahin < n) {
535 printk(KERN_CRIT "aoe: aoecmd_ata_rsp: runt "
536 "ata data size in read. skb->len=%d\n",
537 skb->len);
538 /* fail frame f? just returning will rexmit. */
539 spin_unlock_irqrestore(&d->lock, flags);
540 return;
542 memcpy(f->bufaddr, ahin+1, n);
543 case WIN_WRITE:
544 case WIN_WRITE_EXT:
545 break;
546 case WIN_IDENTIFY:
547 if (skb->len - sizeof *hin - sizeof *ahin < 512) {
548 printk(KERN_INFO "aoe: aoecmd_ata_rsp: runt data size "
549 "in ataid. skb->len=%d\n", skb->len);
550 spin_unlock_irqrestore(&d->lock, flags);
551 return;
553 ataid_complete(d, (char *) (ahin+1));
554 break;
555 default:
556 printk(KERN_INFO "aoe: aoecmd_ata_rsp: unrecognized "
557 "outbound ata command %2.2Xh for %d.%d\n",
558 ahout->cmdstat,
559 be16_to_cpu(hin->major),
560 hin->minor);
564 if (buf) {
565 buf->nframesout -= 1;
566 if (buf->nframesout == 0 && buf->resid == 0) {
567 unsigned long duration = jiffies - buf->start_time;
568 unsigned long n_sect = buf->bio->bi_size >> 9;
569 struct gendisk *disk = d->gd;
570 const int rw = bio_data_dir(buf->bio);
572 disk_stat_inc(disk, ios[rw]);
573 disk_stat_add(disk, ticks[rw], duration);
574 disk_stat_add(disk, sectors[rw], n_sect);
575 disk_stat_add(disk, io_ticks, duration);
576 n = (buf->flags & BUFFL_FAIL) ? -EIO : 0;
577 bio_endio(buf->bio, buf->bio->bi_size, n);
578 mempool_free(buf, d->bufpool);
582 f->buf = NULL;
583 f->tag = FREETAG;
585 aoecmd_work(d);
586 sl = d->sendq_hd;
587 d->sendq_hd = d->sendq_tl = NULL;
589 spin_unlock_irqrestore(&d->lock, flags);
590 aoenet_xmit(sl);
593 void
594 aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
596 struct sk_buff *sl;
598 sl = aoecmd_cfg_pkts(aoemajor, aoeminor, NULL);
600 aoenet_xmit(sl);
604 * Since we only call this in one place (and it only prepares one frame)
605 * we just return the skb. Usually we'd chain it up to the aoedev sendq.
607 static struct sk_buff *
608 aoecmd_ata_id(struct aoedev *d)
610 struct aoe_hdr *h;
611 struct aoe_atahdr *ah;
612 struct frame *f;
613 struct sk_buff *skb;
615 f = getframe(d, FREETAG);
616 if (f == NULL) {
617 printk(KERN_CRIT "aoe: aoecmd_ata_id: can't get a frame. "
618 "This shouldn't happen.\n");
619 return NULL;
622 /* initialize the headers & frame */
623 h = (struct aoe_hdr *) f->data;
624 ah = (struct aoe_atahdr *) (h+1);
625 f->ndata = sizeof *h + sizeof *ah;
626 memset(h, 0, f->ndata);
627 f->tag = aoehdr_atainit(d, h);
628 f->waited = 0;
629 f->writedatalen = 0;
631 /* set up ata header */
632 ah->scnt = 1;
633 ah->cmdstat = WIN_IDENTIFY;
634 ah->lba3 = 0xa0;
636 skb = skb_prepare(d, f);
638 d->rttavg = MAXTIMER;
639 d->timer.function = rexmit_timer;
641 return skb;
644 void
645 aoecmd_cfg_rsp(struct sk_buff *skb)
647 struct aoedev *d;
648 struct aoe_hdr *h;
649 struct aoe_cfghdr *ch;
650 ulong flags, sysminor, aoemajor;
651 u16 bufcnt;
652 struct sk_buff *sl;
653 enum { MAXFRAMES = 16 };
655 h = (struct aoe_hdr *) skb->mac.raw;
656 ch = (struct aoe_cfghdr *) (h+1);
659 * Enough people have their dip switches set backwards to
660 * warrant a loud message for this special case.
662 aoemajor = be16_to_cpu(h->major);
663 if (aoemajor == 0xfff) {
664 printk(KERN_CRIT "aoe: aoecmd_cfg_rsp: Warning: shelf "
665 "address is all ones. Check shelf dip switches\n");
666 return;
669 sysminor = SYSMINOR(aoemajor, h->minor);
670 if (sysminor * AOE_PARTITIONS + AOE_PARTITIONS > MINORMASK) {
671 printk(KERN_INFO
672 "aoe: e%ld.%d: minor number too large\n",
673 aoemajor, (int) h->minor);
674 return;
677 bufcnt = be16_to_cpu(ch->bufcnt);
678 if (bufcnt > MAXFRAMES) /* keep it reasonable */
679 bufcnt = MAXFRAMES;
681 d = aoedev_by_sysminor_m(sysminor, bufcnt);
682 if (d == NULL) {
683 printk(KERN_INFO "aoe: aoecmd_cfg_rsp: device sysminor_m failure\n");
684 return;
687 spin_lock_irqsave(&d->lock, flags);
689 /* permit device to migrate mac and network interface */
690 d->ifp = skb->dev;
691 memcpy(d->addr, h->src, sizeof d->addr);
693 /* don't change users' perspective */
694 if (d->nopen && !(d->flags & DEVFL_PAUSE)) {
695 spin_unlock_irqrestore(&d->lock, flags);
696 return;
698 d->flags |= DEVFL_PAUSE; /* force pause */
699 d->fw_ver = be16_to_cpu(ch->fwver);
701 /* check for already outstanding ataid */
702 sl = aoedev_isbusy(d) == 0 ? aoecmd_ata_id(d) : NULL;
704 spin_unlock_irqrestore(&d->lock, flags);
706 aoenet_xmit(sl);