diff --git a/EtherDrive-2.6-HOWTO.sgml b/EtherDrive-2.6-HOWTO.sgml index 6ed3a8f..69e064a 100644 --- a/EtherDrive-2.6-HOWTO.sgml +++ b/EtherDrive-2.6-HOWTO.sgml @@ -1060,8 +1060,12 @@ non-filesystem I/O, the complexity associated with coalescing multiple I/O jobs in the aoe driver is probably not worth the potential driver instability it could introduce. -There is a straightforward way, however, to work around this issue by -using a trivial md device as a wrapper. (Almost everyone uses a +One way to work around this issue is to use the O_DIRECT flag to +the "open" system call. For recent versions of dd, you can use the +option, "oflag=direct" to tell dd to use this O_DIRECT flag. + +Another way to work around this issue is to +use a trivial md device as a wrapper. (Almost everyone uses a filesystem. This technique is only interesting to those who are not using a filesystem, so most people should ignore this idea.) In the example below, a single-disk RAID 0 is created for the AoE device @@ -1082,7 +1086,6 @@ makki:~# cat /sys/block/md1/size 209715072 - Q: How can I boot diskless systems from my Coraid EtherDrive devices?

diff --git a/Makefile b/Makefile index c85d5c2..e5f2e5d 100644 --- a/Makefile +++ b/Makefile @@ -127,11 +127,17 @@ install: install_nodev # experts can put the driver in a kernel source tree with # "make kerninst" -kerninst: default +kerninst: conf/done mak/src-id.ts test -d ${KDIR}/drivers/block/aoe cp -b ${DRIVER_D}/*.[ch] ${KDIR}/drivers/block/aoe echo "#define AOE_PARTITIONS (${AOE_PARTITIONS})" \ >> ${KDIR}/drivers/block/aoe/aoe.h + echo '#define DEVSUBDIR "${DEVSUBDIR}"' \ + >> ${KDIR}/drivers/block/aoe/aoe.h + echo '#define AOE_DYNDEVS (${DYNDEVS})' \ + >> ${KDIR}/drivers/block/aoe/aoe.h + echo 'aoe-objs += aoedbg.o' \ + >> ${KDIR}/drivers/block/aoe/Makefile uninstall: @echo "Removing Module from $(INSTDIR)" diff --git a/NEWS b/NEWS index fb40cc1..48c0276 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,7 @@ -*- change-log -*- +2009-02-06 Ed Cashin + fix the "kerninst" target for make, which had atrophied + 2009-02-03 Ed Cashin fix and refine compatibility tests version-70 diff --git a/conf/25-patches/fs5-none.diff b/conf/25-patches/fs5-none.diff index a6a02d4..3da48c9 100644 --- a/conf/25-patches/fs5-none.diff +++ b/conf/25-patches/fs5-none.diff @@ -1,7 +1,7 @@ diff -upr fs5/linux/drivers/block/aoe/aoedbg.c none/linux/drivers/block/aoe/aoedbg.c --- fs5/linux/drivers/block/aoe/aoedbg.c 2008-10-17 11:35:32.000000000 -0400 +++ none/linux/drivers/block/aoe/aoedbg.c 2008-10-17 11:51:51.000000000 -0400 -@@ -1,11 +1,58 @@ +@@ -1,11 +1,59 @@ #include #include -#include @@ -10,6 +10,7 @@ diff -upr fs5/linux/drivers/block/aoe/aoedbg.c none/linux/drivers/block/aoe/aoed +#include +#include +#include ++#include #include "aoe.h" #define SUBBUF_SIZE 262144 diff --git a/conf/25-patches/none-relay5.diff b/conf/25-patches/none-relay5.diff index 54a787c..b3155ba 100644 --- a/conf/25-patches/none-relay5.diff +++ b/conf/25-patches/none-relay5.diff @@ -1,7 +1,7 @@ diff -upr none/linux/drivers/block/aoe/aoedbg.c relay5/linux/drivers/block/aoe/aoedbg.c --- none/linux/drivers/block/aoe/aoedbg.c 2008-10-17 11:51:51.000000000 -0400 +++ relay5/linux/drivers/block/aoe/aoedbg.c 2008-10-17 11:10:02.000000000 -0400 -@@ -1,58 +1,12 @@ +@@ -1,59 +1,12 @@ #include #include -#include @@ -9,6 +9,7 @@ diff -upr none/linux/drivers/block/aoe/aoedbg.c relay5/linux/drivers/block/aoe/a -#include -#include -#include +-#include +#include +#include #include "aoe.h" diff --git a/conf/25-patches/none-relay6.diff b/conf/25-patches/none-relay6.diff index 3f0a1ba..dfbd28c 100644 --- a/conf/25-patches/none-relay6.diff +++ b/conf/25-patches/none-relay6.diff @@ -1,7 +1,7 @@ diff -upr none/linux/drivers/block/aoe/aoedbg.c relay6/linux/drivers/block/aoe/aoedbg.c --- none/linux/drivers/block/aoe/aoedbg.c 2008-10-17 11:51:51.000000000 -0400 +++ relay6/linux/drivers/block/aoe/aoedbg.c 2008-10-17 09:37:31.000000000 -0400 -@@ -1,58 +1,12 @@ +@@ -1,59 +1,12 @@ #include #include -#include @@ -9,6 +9,7 @@ diff -upr none/linux/drivers/block/aoe/aoedbg.c relay6/linux/drivers/block/aoe/a -#include -#include -#include +-#include +#include +#include #include "aoe.h" diff --git a/conf/8-patches/old-new.diff b/conf/8-patches/old-new.diff index 1830d2b..27514be 100644 --- a/conf/8-patches/old-new.diff +++ b/conf/8-patches/old-new.diff @@ -1,17 +1,22 @@ -diff -upr b/linux/drivers/block/aoe/aoe.h a/linux/drivers/block/aoe/aoe.h ---- b/linux/drivers/block/aoe/aoe.h 2007-01-23 14:14:30.000000000 -0500 -+++ a/linux/drivers/block/aoe/aoe.h 2007-01-23 13:41:13.000000000 -0500 -@@ -213,6 +213,3 @@ unsigned long long mac_addr(char addr[6] - /* for compatibility with older 2.6 kernels lacking kcalloc +diff --git a/linux/drivers/block/aoe/aoe.h b/linux/drivers/block/aoe/aoe.h +index ece38d3..dc3be54 100644 +--- a/linux/drivers/block/aoe/aoe.h ++++ b/linux/drivers/block/aoe/aoe.h +@@ -253,9 +253,6 @@ unsigned long long mac_addr(char addr[6]); */ extern void *aoe_kcalloc(size_t, size_t, int); -- + -/* compatibility with pre-2.6.9 kernels */ -unsigned long msleep_interruptible(unsigned int msecs); -diff -upr b/linux/drivers/block/aoe/aoechr.c a/linux/drivers/block/aoe/aoechr.c ---- b/linux/drivers/block/aoe/aoechr.c 2007-01-23 14:13:18.000000000 -0500 -+++ a/linux/drivers/block/aoe/aoechr.c 2006-10-31 15:59:40.000000000 -0500 -@@ -65,47 +65,6 @@ interfaces(const char __user *str, size_ +- + #define AOEDBG_ACTIVE 0 + void __init aoedbg_init(void); + void aoedbg_print(char *fmt, ...); +diff --git a/linux/drivers/block/aoe/aoechr.c b/linux/drivers/block/aoe/aoechr.c +index 3eb619b..b542143 100644 +--- a/linux/drivers/block/aoe/aoechr.c ++++ b/linux/drivers/block/aoe/aoechr.c +@@ -72,47 +72,6 @@ interfaces(const char __user *str, size_t size) return 0; } diff --git a/linux/Documentation/aoe/aoe.txt b/linux/Documentation/aoe/aoe.txt index 4c40d50..01739ba 100644 --- a/linux/Documentation/aoe/aoe.txt +++ b/linux/Documentation/aoe/aoe.txt @@ -122,7 +122,9 @@ DRIVER OPTIONS The aoe_deadsecs module parameter determines the maximum number of seconds that the driver will wait for an AoE device to provide a response to an AoE command. After aoe_deadsecs seconds have - elapsed, the AoE device will be marked as "down". + elapsed, the AoE device will be marked as "down". A value of zero + is supported for testing purposes and makes the aoe driver keep + trying AoE commands forever. The aoe_maxout module parameter has a default of 128. This is the maximum number of unresponded packets that will be sent to an AoE diff --git a/linux/drivers/block/aoe/aoe.h b/linux/drivers/block/aoe/aoe.h index abb15af..0471431 100644 --- a/linux/drivers/block/aoe/aoe.h +++ b/linux/drivers/block/aoe/aoe.h @@ -1,5 +1,5 @@ /* Copyright (c) 2008 Coraid, Inc. See COPYING for GPL terms. */ -#define VERSION "70" +#define VERSION "70zds-tgtrtt-sticky" #define AOE_MAJOR 152 #define DEVICE_NAME "aoe" @@ -143,6 +143,8 @@ struct aoetgt { struct list_head ffree; /* list of free frames */ struct aoeif ifs[NAOEIFS]; struct aoeif *ifp; /* current aoeif in use */ + u32 rttavg; /* scaled AoE round trip time average */ + u32 rttdev; /* scaled round trip time mean deviation */ ushort nout; ushort maxout; /* current value for max outstanding */ ushort next_cwnd; /* incr maxout after decrementing to zero */ @@ -158,8 +160,6 @@ struct aoedev { struct aoedev *next; ulong sysminor; ulong aoemajor; - u32 rttavg; /* scaled AoE round trip time average */ - u32 rttdev; /* scaled round trip time mean deviation */ u16 aoeminor; u16 flags; u16 nopen; /* (bd_openers isn't available without sleeping) */ diff --git a/linux/drivers/block/aoe/aoeblk.c b/linux/drivers/block/aoe/aoeblk.c index 677584e..8c2080e 100644 --- a/linux/drivers/block/aoe/aoeblk.c +++ b/linux/drivers/block/aoe/aoeblk.c @@ -126,7 +126,6 @@ static ssize_t aoedisk_show_debug(struct device *dev, #define so(arg...) snprintf(p, PAGE_SIZE - (p-page), ## arg) p = page; - p += so("rttavg: %d rttdev: %d\n", d->rttavg, d->rttdev); p += so("nskbpool: %d\n", d->nskbpool); p += so("kicked: %ld\n", d->kicked); p += so("maxbcnt: %ld\n", d->maxbcnt); @@ -142,6 +141,8 @@ static ssize_t aoedisk_show_debug(struct device *dev, p += so("ffree: %p\n", list_head_el(&(*t)->ffree)); p += so("%012llx:%d:%d:%d\n", mac_addr((*t)->addr), (*t)->nout, (*t)->maxout, (*t)->nframes); + p += so("\trttavg: %d rttdev: %d\n", + (*t)->rttavg, (*t)->rttdev); p += so("\tssthresh:%d\n", (*t)->ssthresh); p += so("\tlost:%lu\n", (*t)->lost); p += so("\ttaint:%d\n", (*t)->taint); diff --git a/linux/drivers/block/aoe/aoecmd.c b/linux/drivers/block/aoe/aoecmd.c index d8eb28d..651aed5 100644 --- a/linux/drivers/block/aoe/aoecmd.c +++ b/linux/drivers/block/aoe/aoecmd.c @@ -667,16 +667,16 @@ probe(struct aoetgt *t) } static long -rto(struct aoedev *d) +rto(struct aoetgt *t) { - long t; + long n; - t = 2 * d->rttavg >> RTTSCALE; - t += 8 * d->rttdev >> RTTDSCALE; - if (t == 0) - t = 1; + n = 2 * t->rttavg >> RTTSCALE; + n += 8 * t->rttdev >> RTTDSCALE; + if (n == 0) + n = 1; - return t; + return n; } static void @@ -703,7 +703,7 @@ rexmit_deferred(struct aoedev *d) f = nf; t = f->t; } - } else if (tsince_hr(f) < t->taint * rto(d)) { + } else if (tsince_hr(f) < t->taint * rto(t)) { /* reprobe more slowly when taint is high */ continue; } @@ -760,7 +760,6 @@ rexmit_timer(ulong vp) struct list_head *head, *pos, *nx; LIST_HEAD(flist); struct sk_buff *sl; - register long timeout; ulong flags, n; int i; int utgts; /* number of aoetgt descriptors (not slots) */ @@ -771,8 +770,6 @@ rexmit_timer(ulong vp) spin_lock_irqsave(&d->lock, flags); - /* timeout based on observed timings and variations */ - timeout = rto(d); utgts = count_targets(d); if (d->flags & DEVFL_TKILL) { @@ -785,7 +782,7 @@ rexmit_timer(ulong vp) head = &d->factive[i]; list_for_each_safe(pos, nx, head) { f = list_entry(pos, struct frame, head); - if (tsince_hr(f) < timeout) // end of expired frames + if (tsince_hr(f) < rto(f->t)) // end of expired frames break; // move to flist for later processing list_move_tail(pos, &flist); @@ -798,7 +795,9 @@ rexmit_timer(ulong vp) since = tsince_hr(f); n = f->waited_total + since; n /= USEC_PER_SEC; - if (n > aoe_deadsecs && !(f->flags & FFL_PROBE)) { + if (aoe_deadsecs + && n > aoe_deadsecs + && !(f->flags & FFL_PROBE)) { /* We have waited too long retransmitting, and it's time to fail the device. @@ -817,7 +816,7 @@ rexmit_timer(ulong vp) t = f->t; n = f->waited + since; n /= USEC_PER_SEC; - if (utgts > 0 && n > aoe_deadsecs / utgts) + if (aoe_deadsecs && utgts > 0 && n > aoe_deadsecs / utgts) scorn(t); /* avoid this target */ t->lost += 1; /* (decremented on unexpected response) */ @@ -1035,30 +1034,31 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id) } static void -calc_rttavg(struct aoedev *d, struct aoe_hdr *h, - struct frame *f, /* for tracing RTTs */ - struct aoetgt *t, int rtt) +calc_rttavg(struct aoetgt *t, int rtt) { register long n; n = rtt; - aoedbg_rtt(d, h, f, rtt); /* cf. Congestion Avoidance and Control, Jacobson & Karels, 1988 */ - n -= d->rttavg >> RTTSCALE; - d->rttavg += n; + n -= t->rttavg >> RTTSCALE; + t->rttavg += n; if (n < 0) n = -n; - n -= d->rttdev >> RTTDSCALE; - d->rttdev += n; + n -= t->rttdev >> RTTDSCALE; + t->rttdev += n; +} - if (!t || t->maxout >= t->nframes) +static void +update_cong(struct aoetgt *t) +{ + if (t->maxout >= t->nframes) return; - if (t->maxout < t->ssthresh) + if (t->nout == t->maxout && t->next_cwnd-- == 0) { t->maxout += 1; - else if (t->nout == t->maxout && t->next_cwnd-- == 0) { - t->maxout += 1; - t->next_cwnd = t->maxout; + t->next_cwnd = t->nframes; + if (t->maxout < 10) + t->next_cwnd *= 2; } } @@ -1342,15 +1342,17 @@ aoecmd_ata_rsp(struct sk_buff *skb) n = be32_to_cpu(get_unaligned(&h->tag)); f = getframe(d, n); if (f) { - calc_rttavg(d, h, f, f->t, tsince_hr(f)); + calc_rttavg(f->t, tsince_hr(f)); + update_cong(f->t); f->t->nout--; } else if ((f = getframe_deferred(d, n))) { - calc_rttavg(d, h, f, NULL, tsince_hr(f)); + calc_rttavg(f->t, tsince_hr(f)); } else { - calc_rttavg(d, h, NULL, NULL, tsince(n)); t = gettgt(d, h->src); - if (t) + if (t) { t->lost -= 1; /* packet wasn't lost, just late */ + calc_rttavg(t, tsince(n)); + } spin_unlock_irqrestore(&d->lock, flags); aoedev_put(d); snprintf(ebuf, sizeof ebuf, @@ -1422,8 +1424,6 @@ aoecmd_ata_id(struct aoedev *d) skb->dev = t->ifp->nd; - d->rttavg = RTTAVG_INIT; - d->rttdev = RTTDEV_INIT; d->timer.function = rexmit_timer; skb = skb_clone(skb, GFP_ATOMIC); @@ -1623,6 +1623,8 @@ aoecmd_wreset(struct aoetgt *t) t->maxout = 1; t->ssthresh = t->nframes / 2; t->next_cwnd = t->nframes; + t->rttavg = RTTAVG_INIT; + t->rttdev = RTTDEV_INIT; } void @@ -1630,8 +1632,6 @@ aoecmd_cleanslate(struct aoedev *d) { struct aoetgt **t, **te; - d->rttavg = RTTAVG_INIT; - d->rttdev = RTTDEV_INIT; d->maxbcnt = 0; t = d->targets; diff --git a/linux/drivers/block/aoe/aoedbg.c b/linux/drivers/block/aoe/aoedbg.c index 57910da..1748bea 100644 --- a/linux/drivers/block/aoe/aoedbg.c +++ b/linux/drivers/block/aoe/aoedbg.c @@ -226,12 +226,12 @@ aoedbg_rtt(struct aoedev *d, struct aoe_hdr *h, struct frame *f, int rtt) do_gettimeofday(&now); r.reltime = now.tv_usec - start.tv_usec; r.reltime += (now.tv_sec - start.tv_sec) * USEC_PER_SEC; - r.rttavg = d->rttavg >> RTTSCALE; - r.rttdev = d->rttdev >> RTTDSCALE; if (!f) { r.flags |= R_ORPHAN; } else { r.send_nout = f->nout; + r.rttavg = f->t->rttavg >> RTTSCALE; + r.rttdev = f->t->rttdev >> RTTDSCALE; r.rcv_nout = f->t->nout; r.ssthresh = f->t->ssthresh; r.cwnd = f->t->maxout; diff --git a/linux/drivers/block/aoe/aoedev.c b/linux/drivers/block/aoe/aoedev.c index ab83022..f073940 100644 --- a/linux/drivers/block/aoe/aoedev.c +++ b/linux/drivers/block/aoe/aoedev.c @@ -486,8 +486,6 @@ aoedev_by_aoeaddr(ulong maj, int min, int malloc) d->sysminor = sysminor; d->aoemajor = maj; d->aoeminor = min; - d->rttavg = RTTAVG_INIT; - d->rttdev = RTTDEV_INIT; d->next = devlist; devlist = d;