Linux-2.6.12-rc2
[deliverable/linux.git] / drivers / block / aoe / aoecmd.c
1 /* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */
2 /*
3 * aoecmd.c
4 * Filesystem request handling methods
5 */
6
7 #include <linux/hdreg.h>
8 #include <linux/blkdev.h>
9 #include <linux/skbuff.h>
10 #include <linux/netdevice.h>
11 #include "aoe.h"
12
13 #define TIMERTICK (HZ / 10)
14 #define MINTIMER (2 * TIMERTICK)
15 #define MAXTIMER (HZ << 1)
16 #define MAXWAIT (60 * 3) /* After MAXWAIT seconds, give up and fail dev */
17
18 static struct sk_buff *
19 new_skb(struct net_device *if_dev, ulong len)
20 {
21 struct sk_buff *skb;
22
23 skb = alloc_skb(len, GFP_ATOMIC);
24 if (skb) {
25 skb->nh.raw = skb->mac.raw = skb->data;
26 skb->dev = if_dev;
27 skb->protocol = __constant_htons(ETH_P_AOE);
28 skb->priority = 0;
29 skb_put(skb, len);
30 skb->next = skb->prev = NULL;
31
32 /* tell the network layer not to perform IP checksums
33 * or to get the NIC to do it
34 */
35 skb->ip_summed = CHECKSUM_NONE;
36 }
37 return skb;
38 }
39
40 static struct sk_buff *
41 skb_prepare(struct aoedev *d, struct frame *f)
42 {
43 struct sk_buff *skb;
44 char *p;
45
46 skb = new_skb(d->ifp, f->ndata + f->writedatalen);
47 if (!skb) {
48 printk(KERN_INFO "aoe: skb_prepare: failure to allocate skb\n");
49 return NULL;
50 }
51
52 p = skb->mac.raw;
53 memcpy(p, f->data, f->ndata);
54
55 if (f->writedatalen) {
56 p += sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr);
57 memcpy(p, f->bufaddr, f->writedatalen);
58 }
59
60 return skb;
61 }
62
63 static struct frame *
64 getframe(struct aoedev *d, int tag)
65 {
66 struct frame *f, *e;
67
68 f = d->frames;
69 e = f + d->nframes;
70 for (; f<e; f++)
71 if (f->tag == tag)
72 return f;
73 return NULL;
74 }
75
76 /*
77 * Leave the top bit clear so we have tagspace for userland.
78 * The bottom 16 bits are the xmit tick for rexmit/rttavg processing.
79 * This driver reserves tag -1 to mean "unused frame."
80 */
81 static int
82 newtag(struct aoedev *d)
83 {
84 register ulong n;
85
86 n = jiffies & 0xffff;
87 return n |= (++d->lasttag & 0x7fff) << 16;
88 }
89
90 static int
91 aoehdr_atainit(struct aoedev *d, struct aoe_hdr *h)
92 {
93 u16 type = __constant_cpu_to_be16(ETH_P_AOE);
94 u16 aoemajor = __cpu_to_be16(d->aoemajor);
95 u32 host_tag = newtag(d);
96 u32 tag = __cpu_to_be32(host_tag);
97
98 memcpy(h->src, d->ifp->dev_addr, sizeof h->src);
99 memcpy(h->dst, d->addr, sizeof h->dst);
100 memcpy(h->type, &type, sizeof type);
101 h->verfl = AOE_HVER;
102 memcpy(h->major, &aoemajor, sizeof aoemajor);
103 h->minor = d->aoeminor;
104 h->cmd = AOECMD_ATA;
105 memcpy(h->tag, &tag, sizeof tag);
106
107 return host_tag;
108 }
109
110 static void
111 aoecmd_ata_rw(struct aoedev *d, struct frame *f)
112 {
113 struct aoe_hdr *h;
114 struct aoe_atahdr *ah;
115 struct buf *buf;
116 struct sk_buff *skb;
117 ulong bcnt;
118 register sector_t sector;
119 char writebit, extbit;
120
121 writebit = 0x10;
122 extbit = 0x4;
123
124 buf = d->inprocess;
125
126 sector = buf->sector;
127 bcnt = buf->bv_resid;
128 if (bcnt > MAXATADATA)
129 bcnt = MAXATADATA;
130
131 /* initialize the headers & frame */
132 h = (struct aoe_hdr *) f->data;
133 ah = (struct aoe_atahdr *) (h+1);
134 f->ndata = sizeof *h + sizeof *ah;
135 memset(h, 0, f->ndata);
136 f->tag = aoehdr_atainit(d, h);
137 f->waited = 0;
138 f->buf = buf;
139 f->bufaddr = buf->bufaddr;
140
141 /* set up ata header */
142 ah->scnt = bcnt >> 9;
143 ah->lba0 = sector;
144 ah->lba1 = sector >>= 8;
145 ah->lba2 = sector >>= 8;
146 ah->lba3 = sector >>= 8;
147 if (d->flags & DEVFL_EXT) {
148 ah->aflags |= AOEAFL_EXT;
149 ah->lba4 = sector >>= 8;
150 ah->lba5 = sector >>= 8;
151 } else {
152 extbit = 0;
153 ah->lba3 &= 0x0f;
154 ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */
155 }
156
157 if (bio_data_dir(buf->bio) == WRITE) {
158 ah->aflags |= AOEAFL_WRITE;
159 f->writedatalen = bcnt;
160 } else {
161 writebit = 0;
162 f->writedatalen = 0;
163 }
164
165 ah->cmdstat = WIN_READ | writebit | extbit;
166
167 /* mark all tracking fields and load out */
168 buf->nframesout += 1;
169 buf->bufaddr += bcnt;
170 buf->bv_resid -= bcnt;
171 /* printk(KERN_INFO "aoe: bv_resid=%ld\n", buf->bv_resid); */
172 buf->resid -= bcnt;
173 buf->sector += bcnt >> 9;
174 if (buf->resid == 0) {
175 d->inprocess = NULL;
176 } else if (buf->bv_resid == 0) {
177 buf->bv++;
178 buf->bv_resid = buf->bv->bv_len;
179 buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset;
180 }
181
182 skb = skb_prepare(d, f);
183 if (skb) {
184 skb->next = d->skblist;
185 d->skblist = skb;
186 }
187 }
188
189 /* enters with d->lock held */
190 void
191 aoecmd_work(struct aoedev *d)
192 {
193 struct frame *f;
194 struct buf *buf;
195 loop:
196 f = getframe(d, FREETAG);
197 if (f == NULL)
198 return;
199 if (d->inprocess == NULL) {
200 if (list_empty(&d->bufq))
201 return;
202 buf = container_of(d->bufq.next, struct buf, bufs);
203 list_del(d->bufq.next);
204 /*printk(KERN_INFO "aoecmd_work: bi_size=%ld\n", buf->bio->bi_size); */
205 d->inprocess = buf;
206 }
207 aoecmd_ata_rw(d, f);
208 goto loop;
209 }
210
211 static void
212 rexmit(struct aoedev *d, struct frame *f)
213 {
214 struct sk_buff *skb;
215 struct aoe_hdr *h;
216 char buf[128];
217 u32 n;
218 u32 net_tag;
219
220 n = newtag(d);
221
222 snprintf(buf, sizeof buf,
223 "%15s e%ld.%ld oldtag=%08x@%08lx newtag=%08x\n",
224 "retransmit",
225 d->aoemajor, d->aoeminor, f->tag, jiffies, n);
226 aoechr_error(buf);
227
228 h = (struct aoe_hdr *) f->data;
229 f->tag = n;
230 net_tag = __cpu_to_be32(n);
231 memcpy(h->tag, &net_tag, sizeof net_tag);
232
233 skb = skb_prepare(d, f);
234 if (skb) {
235 skb->next = d->skblist;
236 d->skblist = skb;
237 }
238 }
239
240 static int
241 tsince(int tag)
242 {
243 int n;
244
245 n = jiffies & 0xffff;
246 n -= tag & 0xffff;
247 if (n < 0)
248 n += 1<<16;
249 return n;
250 }
251
252 static void
253 rexmit_timer(ulong vp)
254 {
255 struct aoedev *d;
256 struct frame *f, *e;
257 struct sk_buff *sl;
258 register long timeout;
259 ulong flags, n;
260
261 d = (struct aoedev *) vp;
262 sl = NULL;
263
264 /* timeout is always ~150% of the moving average */
265 timeout = d->rttavg;
266 timeout += timeout >> 1;
267
268 spin_lock_irqsave(&d->lock, flags);
269
270 if (d->flags & DEVFL_TKILL) {
271 tdie: spin_unlock_irqrestore(&d->lock, flags);
272 return;
273 }
274 f = d->frames;
275 e = f + d->nframes;
276 for (; f<e; f++) {
277 if (f->tag != FREETAG && tsince(f->tag) >= timeout) {
278 n = f->waited += timeout;
279 n /= HZ;
280 if (n > MAXWAIT) { /* waited too long. device failure. */
281 aoedev_downdev(d);
282 goto tdie;
283 }
284 rexmit(d, f);
285 }
286 }
287
288 sl = d->skblist;
289 d->skblist = NULL;
290 if (sl) {
291 n = d->rttavg <<= 1;
292 if (n > MAXTIMER)
293 d->rttavg = MAXTIMER;
294 }
295
296 d->timer.expires = jiffies + TIMERTICK;
297 add_timer(&d->timer);
298
299 spin_unlock_irqrestore(&d->lock, flags);
300
301 aoenet_xmit(sl);
302 }
303
304 static void
305 ataid_complete(struct aoedev *d, unsigned char *id)
306 {
307 u64 ssize;
308 u16 n;
309
310 /* word 83: command set supported */
311 n = __le16_to_cpu(*((u16 *) &id[83<<1]));
312
313 /* word 86: command set/feature enabled */
314 n |= __le16_to_cpu(*((u16 *) &id[86<<1]));
315
316 if (n & (1<<10)) { /* bit 10: LBA 48 */
317 d->flags |= DEVFL_EXT;
318
319 /* word 100: number lba48 sectors */
320 ssize = __le64_to_cpu(*((u64 *) &id[100<<1]));
321
322 /* set as in ide-disk.c:init_idedisk_capacity */
323 d->geo.cylinders = ssize;
324 d->geo.cylinders /= (255 * 63);
325 d->geo.heads = 255;
326 d->geo.sectors = 63;
327 } else {
328 d->flags &= ~DEVFL_EXT;
329
330 /* number lba28 sectors */
331 ssize = __le32_to_cpu(*((u32 *) &id[60<<1]));
332
333 /* NOTE: obsolete in ATA 6 */
334 d->geo.cylinders = __le16_to_cpu(*((u16 *) &id[54<<1]));
335 d->geo.heads = __le16_to_cpu(*((u16 *) &id[55<<1]));
336 d->geo.sectors = __le16_to_cpu(*((u16 *) &id[56<<1]));
337 }
338 d->ssize = ssize;
339 d->geo.start = 0;
340 if (d->gd != NULL) {
341 d->gd->capacity = ssize;
342 d->flags |= DEVFL_UP;
343 return;
344 }
345 if (d->flags & DEVFL_WORKON) {
346 printk(KERN_INFO "aoe: ataid_complete: can't schedule work, it's already on! "
347 "(This really shouldn't happen).\n");
348 return;
349 }
350 INIT_WORK(&d->work, aoeblk_gdalloc, d);
351 schedule_work(&d->work);
352 d->flags |= DEVFL_WORKON;
353 }
354
355 static void
356 calc_rttavg(struct aoedev *d, int rtt)
357 {
358 register long n;
359
360 n = rtt;
361 if (n < MINTIMER)
362 n = MINTIMER;
363 else if (n > MAXTIMER)
364 n = MAXTIMER;
365
366 /* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */
367 n -= d->rttavg;
368 d->rttavg += n >> 2;
369 }
370
371 void
372 aoecmd_ata_rsp(struct sk_buff *skb)
373 {
374 struct aoedev *d;
375 struct aoe_hdr *hin;
376 struct aoe_atahdr *ahin, *ahout;
377 struct frame *f;
378 struct buf *buf;
379 struct sk_buff *sl;
380 register long n;
381 ulong flags;
382 char ebuf[128];
383
384 hin = (struct aoe_hdr *) skb->mac.raw;
385 d = aoedev_bymac(hin->src);
386 if (d == NULL) {
387 snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
388 "for unknown device %d.%d\n",
389 __be16_to_cpu(*((u16 *) hin->major)),
390 hin->minor);
391 aoechr_error(ebuf);
392 return;
393 }
394
395 spin_lock_irqsave(&d->lock, flags);
396
397 f = getframe(d, __be32_to_cpu(*((u32 *) hin->tag)));
398 if (f == NULL) {
399 spin_unlock_irqrestore(&d->lock, flags);
400 snprintf(ebuf, sizeof ebuf,
401 "%15s e%d.%d tag=%08x@%08lx\n",
402 "unexpected rsp",
403 __be16_to_cpu(*((u16 *) hin->major)),
404 hin->minor,
405 __be32_to_cpu(*((u32 *) hin->tag)),
406 jiffies);
407 aoechr_error(ebuf);
408 return;
409 }
410
411 calc_rttavg(d, tsince(f->tag));
412
413 ahin = (struct aoe_atahdr *) (hin+1);
414 ahout = (struct aoe_atahdr *) (f->data + sizeof(struct aoe_hdr));
415 buf = f->buf;
416
417 if (ahin->cmdstat & 0xa9) { /* these bits cleared on success */
418 printk(KERN_CRIT "aoe: aoecmd_ata_rsp: ata error cmd=%2.2Xh "
419 "stat=%2.2Xh from e%ld.%ld\n",
420 ahout->cmdstat, ahin->cmdstat,
421 d->aoemajor, d->aoeminor);
422 if (buf)
423 buf->flags |= BUFFL_FAIL;
424 } else {
425 switch (ahout->cmdstat) {
426 case WIN_READ:
427 case WIN_READ_EXT:
428 n = ahout->scnt << 9;
429 if (skb->len - sizeof *hin - sizeof *ahin < n) {
430 printk(KERN_CRIT "aoe: aoecmd_ata_rsp: runt "
431 "ata data size in read. skb->len=%d\n",
432 skb->len);
433 /* fail frame f? just returning will rexmit. */
434 spin_unlock_irqrestore(&d->lock, flags);
435 return;
436 }
437 memcpy(f->bufaddr, ahin+1, n);
438 case WIN_WRITE:
439 case WIN_WRITE_EXT:
440 break;
441 case WIN_IDENTIFY:
442 if (skb->len - sizeof *hin - sizeof *ahin < 512) {
443 printk(KERN_INFO "aoe: aoecmd_ata_rsp: runt data size "
444 "in ataid. skb->len=%d\n", skb->len);
445 spin_unlock_irqrestore(&d->lock, flags);
446 return;
447 }
448 ataid_complete(d, (char *) (ahin+1));
449 /* d->flags |= DEVFL_WC_UPDATE; */
450 break;
451 default:
452 printk(KERN_INFO "aoe: aoecmd_ata_rsp: unrecognized "
453 "outbound ata command %2.2Xh for %d.%d\n",
454 ahout->cmdstat,
455 __be16_to_cpu(*((u16 *) hin->major)),
456 hin->minor);
457 }
458 }
459
460 if (buf) {
461 buf->nframesout -= 1;
462 if (buf->nframesout == 0 && buf->resid == 0) {
463 n = (buf->flags & BUFFL_FAIL) ? -EIO : 0;
464 bio_endio(buf->bio, buf->bio->bi_size, n);
465 mempool_free(buf, d->bufpool);
466 }
467 }
468
469 f->buf = NULL;
470 f->tag = FREETAG;
471
472 aoecmd_work(d);
473
474 sl = d->skblist;
475 d->skblist = NULL;
476
477 spin_unlock_irqrestore(&d->lock, flags);
478
479 aoenet_xmit(sl);
480 }
481
482 void
483 aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
484 {
485 struct aoe_hdr *h;
486 struct aoe_cfghdr *ch;
487 struct sk_buff *skb, *sl;
488 struct net_device *ifp;
489 u16 aoe_type = __constant_cpu_to_be16(ETH_P_AOE);
490 u16 net_aoemajor = __cpu_to_be16(aoemajor);
491
492 sl = NULL;
493
494 read_lock(&dev_base_lock);
495 for (ifp = dev_base; ifp; dev_put(ifp), ifp = ifp->next) {
496 dev_hold(ifp);
497 if (!is_aoe_netif(ifp))
498 continue;
499
500 skb = new_skb(ifp, sizeof *h + sizeof *ch);
501 if (skb == NULL) {
502 printk(KERN_INFO "aoe: aoecmd_cfg: skb alloc failure\n");
503 continue;
504 }
505 h = (struct aoe_hdr *) skb->mac.raw;
506 memset(h, 0, sizeof *h + sizeof *ch);
507
508 memset(h->dst, 0xff, sizeof h->dst);
509 memcpy(h->src, ifp->dev_addr, sizeof h->src);
510 memcpy(h->type, &aoe_type, sizeof aoe_type);
511 h->verfl = AOE_HVER;
512 memcpy(h->major, &net_aoemajor, sizeof net_aoemajor);
513 h->minor = aoeminor;
514 h->cmd = AOECMD_CFG;
515
516 skb->next = sl;
517 sl = skb;
518 }
519 read_unlock(&dev_base_lock);
520
521 aoenet_xmit(sl);
522 }
523
524 /*
525 * Since we only call this in one place (and it only prepares one frame)
526 * we just return the skb. Usually we'd chain it up to the d->skblist.
527 */
528 static struct sk_buff *
529 aoecmd_ata_id(struct aoedev *d)
530 {
531 struct aoe_hdr *h;
532 struct aoe_atahdr *ah;
533 struct frame *f;
534 struct sk_buff *skb;
535
536 f = getframe(d, FREETAG);
537 if (f == NULL) {
538 printk(KERN_CRIT "aoe: aoecmd_ata_id: can't get a frame. "
539 "This shouldn't happen.\n");
540 return NULL;
541 }
542
543 /* initialize the headers & frame */
544 h = (struct aoe_hdr *) f->data;
545 ah = (struct aoe_atahdr *) (h+1);
546 f->ndata = sizeof *h + sizeof *ah;
547 memset(h, 0, f->ndata);
548 f->tag = aoehdr_atainit(d, h);
549 f->waited = 0;
550 f->writedatalen = 0;
551
552 /* this message initializes the device, so we reset the rttavg */
553 d->rttavg = MAXTIMER;
554
555 /* set up ata header */
556 ah->scnt = 1;
557 ah->cmdstat = WIN_IDENTIFY;
558 ah->lba3 = 0xa0;
559
560 skb = skb_prepare(d, f);
561
562 /* we now want to start the rexmit tracking */
563 d->flags &= ~DEVFL_TKILL;
564 d->timer.data = (ulong) d;
565 d->timer.function = rexmit_timer;
566 d->timer.expires = jiffies + TIMERTICK;
567 add_timer(&d->timer);
568
569 return skb;
570 }
571
572 void
573 aoecmd_cfg_rsp(struct sk_buff *skb)
574 {
575 struct aoedev *d;
576 struct aoe_hdr *h;
577 struct aoe_cfghdr *ch;
578 ulong flags, bufcnt, sysminor, aoemajor;
579 struct sk_buff *sl;
580 enum { MAXFRAMES = 8, MAXSYSMINOR = 255 };
581
582 h = (struct aoe_hdr *) skb->mac.raw;
583 ch = (struct aoe_cfghdr *) (h+1);
584
585 /*
586 * Enough people have their dip switches set backwards to
587 * warrant a loud message for this special case.
588 */
589 aoemajor = __be16_to_cpu(*((u16 *) h->major));
590 if (aoemajor == 0xfff) {
591 printk(KERN_CRIT "aoe: aoecmd_cfg_rsp: Warning: shelf "
592 "address is all ones. Check shelf dip switches\n");
593 return;
594 }
595
596 sysminor = SYSMINOR(aoemajor, h->minor);
597 if (sysminor > MAXSYSMINOR) {
598 printk(KERN_INFO "aoe: aoecmd_cfg_rsp: sysminor %ld too "
599 "large\n", sysminor);
600 return;
601 }
602
603 bufcnt = __be16_to_cpu(*((u16 *) ch->bufcnt));
604 if (bufcnt > MAXFRAMES) /* keep it reasonable */
605 bufcnt = MAXFRAMES;
606
607 d = aoedev_set(sysminor, h->src, skb->dev, bufcnt);
608 if (d == NULL) {
609 printk(KERN_INFO "aoe: aoecmd_cfg_rsp: device set failure\n");
610 return;
611 }
612
613 spin_lock_irqsave(&d->lock, flags);
614
615 if (d->flags & (DEVFL_UP | DEVFL_CLOSEWAIT)) {
616 spin_unlock_irqrestore(&d->lock, flags);
617 return;
618 }
619
620 d->fw_ver = __be16_to_cpu(*((u16 *) ch->fwver));
621
622 /* we get here only if the device is new */
623 sl = aoecmd_ata_id(d);
624
625 spin_unlock_irqrestore(&d->lock, flags);
626
627 aoenet_xmit(sl);
628 }
629
This page took 0.044508 seconds and 5 git commands to generate.