diff options
Diffstat (limited to 'drivers/infiniband/hw/ipath/ipath_driver.c')
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_driver.c | 1041 |
1 files changed, 718 insertions, 323 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index ca4d0acc6786..e0a64f070b97 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. + * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -41,7 +41,6 @@ #include "ipath_kernel.h" #include "ipath_verbs.h" -#include "ipath_common.h" static void ipath_update_pio_bufs(struct ipath_devdata *); @@ -73,10 +72,27 @@ module_param_named(debug, ipath_debug, uint, S_IWUSR | S_IRUGO); MODULE_PARM_DESC(debug, "mask for debug prints"); EXPORT_SYMBOL_GPL(ipath_debug); +unsigned ipath_mtu4096 = 1; /* max 4KB IB mtu by default, if supported */ +module_param_named(mtu4096, ipath_mtu4096, uint, S_IRUGO); +MODULE_PARM_DESC(mtu4096, "enable MTU of 4096 bytes, if supported"); + +static unsigned ipath_hol_timeout_ms = 13000; +module_param_named(hol_timeout_ms, ipath_hol_timeout_ms, uint, S_IRUGO); +MODULE_PARM_DESC(hol_timeout_ms, + "duration of user app suspension after link failure"); + +unsigned ipath_linkrecovery = 1; +module_param_named(linkrecovery, ipath_linkrecovery, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(linkrecovery, "enable workaround for link recovery issue"); + MODULE_LICENSE("GPL"); -MODULE_AUTHOR("QLogic <support@pathscale.com>"); +MODULE_AUTHOR("QLogic <support@qlogic.com>"); MODULE_DESCRIPTION("QLogic InfiniPath driver"); +/* + * Table to translate the LINKTRAININGSTATE portion of + * IBCStatus to a human-readable form. + */ const char *ipath_ibcstatus_str[] = { "Disabled", "LinkUp", @@ -91,9 +107,20 @@ const char *ipath_ibcstatus_str[] = { "CfgWaitRmt", "CfgIdle", "RecovRetrain", - "LState0xD", /* unused */ + "CfgTxRevLane", /* unused before IBA7220 */ "RecovWaitRmt", "RecovIdle", + /* below were added for IBA7220 */ + "CfgEnhanced", + "CfgTest", + "CfgWaitRmtTest", + "CfgWaitCfgEnhanced", + "SendTS_T", + "SendTstIdles", + "RcvTS_T", + "SendTst_TS1s", + "LTState18", "LTState19", "LTState1A", "LTState1B", + "LTState1C", "LTState1D", "LTState1E", "LTState1F" }; static void __devexit ipath_remove_one(struct pci_dev *); @@ -102,8 +129,10 @@ static int __devinit ipath_init_one(struct pci_dev *, /* Only needed for registration, nothing else needs this info */ #define PCI_VENDOR_ID_PATHSCALE 0x1fc1 +#define PCI_VENDOR_ID_QLOGIC 0x1077 #define PCI_DEVICE_ID_INFINIPATH_HT 0xd #define PCI_DEVICE_ID_INFINIPATH_PE800 0x10 +#define PCI_DEVICE_ID_INFINIPATH_7220 0x7220 /* Number of seconds before our card status check... */ #define STATUS_TIMEOUT 60 @@ -111,6 +140,7 @@ static int __devinit ipath_init_one(struct pci_dev *, static const struct pci_device_id ipath_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_HT) }, { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_PE800) }, + { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_INFINIPATH_7220) }, { 0, } }; @@ -126,19 +156,6 @@ static struct pci_driver ipath_driver = { }, }; -static void ipath_check_status(struct work_struct *work) -{ - struct ipath_devdata *dd = container_of(work, struct ipath_devdata, - status_work.work); - - /* - * If we don't have any interrupts, let the user know and - * don't bother checking again. - */ - if (dd->ipath_int_counter == 0) - dev_err(&dd->pcidev->dev, "No interrupts detected.\n"); -} - static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev, u32 *bar0, u32 *bar1) { @@ -206,8 +223,6 @@ static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev) dd->pcidev = pdev; pci_set_drvdata(pdev, dd); - INIT_DELAYED_WORK(&dd->status_work, ipath_check_status); - list_add(&dd->ipath_list, &ipath_dev_list); bail_unlock: @@ -234,12 +249,12 @@ struct ipath_devdata *ipath_lookup(int unit) return dd; } -int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp) +int ipath_count_units(int *npresentp, int *nupp, int *maxportsp) { int nunits, npresent, nup; struct ipath_devdata *dd; unsigned long flags; - u32 maxports; + int maxports; nunits = npresent = nup = maxports = 0; @@ -304,7 +319,7 @@ static void ipath_verify_pioperf(struct ipath_devdata *dd) u32 *addr; u64 msecs, emsecs; - piobuf = ipath_getpiobuf(dd, &pbnum); + piobuf = ipath_getpiobuf(dd, 0, &pbnum); if (!piobuf) { dev_info(&dd->pcidev->dev, "No PIObufs for checking perf, skipping\n"); @@ -336,7 +351,14 @@ static void ipath_verify_pioperf(struct ipath_devdata *dd) ipath_disable_armlaunch(dd); - writeq(0, piobuf); /* length 0, no dwords actually sent */ + /* + * length 0, no dwords actually sent, and mark as VL15 + * on chips where that may matter (due to IB flowcontrol) + */ + if ((dd->ipath_flags & IPATH_HAS_PBC_CNT)) + writeq(1UL << 63, piobuf); + else + writeq(0, piobuf); ipath_flush_wc(); /* @@ -377,6 +399,7 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, struct ipath_devdata *dd; unsigned long long addr; u32 bar0 = 0, bar1 = 0; + u8 rev; dd = ipath_alloc_devdata(pdev); if (IS_ERR(dd)) { @@ -408,7 +431,7 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, } addr = pci_resource_start(pdev, 0); len = pci_resource_len(pdev, 0); - ipath_cdbg(VERBOSE, "regbase (0) %llx len %d pdev->irq %d, vend %x/%x " + ipath_cdbg(VERBOSE, "regbase (0) %llx len %d irq %d, vend %x/%x " "driver_data %lx\n", addr, len, pdev->irq, ent->vendor, ent->device, ent->driver_data); @@ -512,6 +535,13 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, "CONFIG_PCI_MSI is not enabled\n", ent->device); return -ENODEV; #endif + case PCI_DEVICE_ID_INFINIPATH_7220: +#ifndef CONFIG_PCI_MSI + ipath_dbg("CONFIG_PCI_MSI is not enabled, " + "using IntX for unit %u\n", dd->ipath_unit); +#endif + ipath_init_iba7220_funcs(dd); + break; default: ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, " "failing\n", ent->device); @@ -533,7 +563,13 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, goto bail_regions; } - dd->ipath_pcirev = pdev->revision; + ret = pci_read_config_byte(pdev, PCI_REVISION_ID, &rev); + if (ret) { + ipath_dev_err(dd, "Failed to read PCI revision ID unit " + "%u: err %d\n", dd->ipath_unit, -ret); + goto bail_regions; /* shouldn't ever happen */ + } + dd->ipath_pcirev = rev; #if defined(__powerpc__) /* There isn't a generic way to specify writethrough mappings */ @@ -556,14 +592,6 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, ipath_cdbg(VERBOSE, "mapped io addr %llx to kregbase %p\n", addr, dd->ipath_kregbase); - /* - * clear ipath_flags here instead of in ipath_init_chip as it is set - * by ipath_setup_htconfig. - */ - dd->ipath_flags = 0; - dd->ipath_lli_counter = 0; - dd->ipath_lli_errors = 0; - if (dd->ipath_f_bus(dd, pdev)) ipath_dev_err(dd, "Failed to setup config space; " "continuing anyway\n"); @@ -608,13 +636,11 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, ipath_diag_add(dd); ipath_register_ib_device(dd); - /* Check that card status in STATUS_TIMEOUT seconds. */ - schedule_delayed_work(&dd->status_work, HZ * STATUS_TIMEOUT); - goto bail; bail_irqsetup: - if (pdev->irq) free_irq(pdev->irq, dd); + if (pdev->irq) + free_irq(pdev->irq, dd); bail_iounmap: iounmap((volatile void __iomem *) dd->ipath_kregbase); @@ -654,6 +680,10 @@ static void __devexit cleanup_device(struct ipath_devdata *dd) ipath_disable_wc(dd); } + if (dd->ipath_spectriggerhit) + dev_info(&dd->pcidev->dev, "%lu special trigger hits\n", + dd->ipath_spectriggerhit); + if (dd->ipath_pioavailregs_dma) { dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, (void *) dd->ipath_pioavailregs_dma, @@ -706,6 +736,8 @@ static void __devexit cleanup_device(struct ipath_devdata *dd) tmpp = dd->ipath_pageshadow; dd->ipath_pageshadow = NULL; vfree(tmpp); + + dd->ipath_egrtidbase = NULL; } /* @@ -738,7 +770,6 @@ static void __devexit ipath_remove_one(struct pci_dev *pdev) */ ipath_shutdown_device(dd); - cancel_delayed_work(&dd->status_work); flush_scheduled_work(); if (dd->verbs_dev) @@ -823,20 +854,8 @@ void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first, ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); } - - /* - * Disable PIOAVAILUPD, then re-enable, reading scratch in - * between. This seems to avoid a chip timing race that causes - * pioavail updates to memory to stop. We xor as we don't - * know the state of the bit when we're called. - */ - spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl ^ INFINIPATH_S_PIOBUFAVAILUPD); - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl); - spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); + /* on some older chips, update may not happen after cancel */ + ipath_force_pio_avail_update(dd); } /** @@ -873,18 +892,52 @@ int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs) (unsigned long long) ipath_read_kreg64( dd, dd->ipath_kregs->kr_ibcctrl), (unsigned long long) val, - ipath_ibcstatus_str[val & 0xf]); + ipath_ibcstatus_str[val & dd->ibcs_lts_mask]); } return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT; } +static void decode_sdma_errs(struct ipath_devdata *dd, ipath_err_t err, + char *buf, size_t blen) +{ + static const struct { + ipath_err_t err; + const char *msg; + } errs[] = { + { INFINIPATH_E_SDMAGENMISMATCH, "SDmaGenMismatch" }, + { INFINIPATH_E_SDMAOUTOFBOUND, "SDmaOutOfBound" }, + { INFINIPATH_E_SDMATAILOUTOFBOUND, "SDmaTailOutOfBound" }, + { INFINIPATH_E_SDMABASE, "SDmaBase" }, + { INFINIPATH_E_SDMA1STDESC, "SDma1stDesc" }, + { INFINIPATH_E_SDMARPYTAG, "SDmaRpyTag" }, + { INFINIPATH_E_SDMADWEN, "SDmaDwEn" }, + { INFINIPATH_E_SDMAMISSINGDW, "SDmaMissingDw" }, + { INFINIPATH_E_SDMAUNEXPDATA, "SDmaUnexpData" }, + { INFINIPATH_E_SDMADESCADDRMISALIGN, "SDmaDescAddrMisalign" }, + { INFINIPATH_E_SENDBUFMISUSE, "SendBufMisuse" }, + { INFINIPATH_E_SDMADISABLED, "SDmaDisabled" }, + }; + int i; + int expected; + size_t bidx = 0; + + for (i = 0; i < ARRAY_SIZE(errs); i++) { + expected = (errs[i].err != INFINIPATH_E_SDMADISABLED) ? 0 : + test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status); + if ((err & errs[i].err) && !expected) + bidx += snprintf(buf + bidx, blen - bidx, + "%s ", errs[i].msg); + } +} + /* * Decode the error status into strings, deciding whether to always * print * it or not depending on "normal packet errors" vs everything * else. Return 1 if "real" errors, otherwise 0 if only packet * errors, so caller can decide what to print with the string. */ -int ipath_decode_err(char *buf, size_t blen, ipath_err_t err) +int ipath_decode_err(struct ipath_devdata *dd, char *buf, size_t blen, + ipath_err_t err) { int iserr = 1; *buf = '\0'; @@ -922,6 +975,8 @@ int ipath_decode_err(char *buf, size_t blen, ipath_err_t err) strlcat(buf, "rbadversion ", blen); if (err & INFINIPATH_E_RHDR) strlcat(buf, "rhdr ", blen); + if (err & INFINIPATH_E_SENDSPECIALTRIGGER) + strlcat(buf, "sendspecialtrigger ", blen); if (err & INFINIPATH_E_RLONGPKTLEN) strlcat(buf, "rlongpktlen ", blen); if (err & INFINIPATH_E_RMAXPKTLEN) @@ -964,6 +1019,10 @@ int ipath_decode_err(char *buf, size_t blen, ipath_err_t err) strlcat(buf, "hardware ", blen); if (err & INFINIPATH_E_RESET) strlcat(buf, "reset ", blen); + if (err & INFINIPATH_E_SDMAERRS) + decode_sdma_errs(dd, err, buf, blen); + if (err & INFINIPATH_E_INVALIDEEPCMD) + strlcat(buf, "invalideepromcmd ", blen); done: return iserr; } @@ -1076,18 +1135,17 @@ static void ipath_rcv_hdrerr(struct ipath_devdata *dd, u32 eflags, u32 l, u32 etail, - u64 *rc) + __le32 *rhf_addr, + struct ipath_message_header *hdr) { char emsg[128]; - struct ipath_message_header *hdr; get_rhf_errstring(eflags, emsg, sizeof emsg); - hdr = (struct ipath_message_header *)&rc[1]; ipath_cdbg(PKT, "RHFerrs %x hdrqtail=%x typ=%u " "tlen=%x opcode=%x egridx=%x: %s\n", eflags, l, - ipath_hdrget_rcv_type((__le32 *) rc), - ipath_hdrget_length_in_bytes((__le32 *) rc), + ipath_hdrget_rcv_type(rhf_addr), + ipath_hdrget_length_in_bytes(rhf_addr), be32_to_cpu(hdr->bth[0]) >> 24, etail, emsg); @@ -1112,55 +1170,52 @@ static void ipath_rcv_hdrerr(struct ipath_devdata *dd, */ void ipath_kreceive(struct ipath_portdata *pd) { - u64 *rc; struct ipath_devdata *dd = pd->port_dd; + __le32 *rhf_addr; void *ebuf; const u32 rsize = dd->ipath_rcvhdrentsize; /* words */ const u32 maxcnt = dd->ipath_rcvhdrcnt * rsize; /* words */ u32 etail = -1, l, hdrqtail; struct ipath_message_header *hdr; - u32 eflags, i, etype, tlen, pkttot = 0, updegr=0, reloop=0; + u32 eflags, i, etype, tlen, pkttot = 0, updegr = 0, reloop = 0; static u64 totcalls; /* stats, may eventually remove */ - - if (!dd->ipath_hdrqtailptr) { - ipath_dev_err(dd, - "hdrqtailptr not set, can't do receives\n"); - goto bail; - } + int last; l = pd->port_head; - hdrqtail = ipath_get_rcvhdrtail(pd); - if (l == hdrqtail) - goto bail; - -reloop: - for (i = 0; l != hdrqtail; i++) { - u32 qp; - u8 *bthbytes; + rhf_addr = (__le32 *) pd->port_rcvhdrq + l + dd->ipath_rhf_offset; + if (dd->ipath_flags & IPATH_NODMA_RTAIL) { + u32 seq = ipath_hdrget_seq(rhf_addr); - rc = (u64 *) (pd->port_rcvhdrq + (l << 2)); - hdr = (struct ipath_message_header *)&rc[1]; - /* - * could make a network order version of IPATH_KD_QP, and - * do the obvious shift before masking to speed this up. - */ - qp = ntohl(hdr->bth[1]) & 0xffffff; - bthbytes = (u8 *) hdr->bth; + if (seq != pd->port_seq_cnt) + goto bail; + hdrqtail = 0; + } else { + hdrqtail = ipath_get_rcvhdrtail(pd); + if (l == hdrqtail) + goto bail; + smp_rmb(); + } - eflags = ipath_hdrget_err_flags((__le32 *) rc); - etype = ipath_hdrget_rcv_type((__le32 *) rc); +reloop: + for (last = 0, i = 1; !last; i++) { + hdr = dd->ipath_f_get_msgheader(dd, rhf_addr); + eflags = ipath_hdrget_err_flags(rhf_addr); + etype = ipath_hdrget_rcv_type(rhf_addr); /* total length */ - tlen = ipath_hdrget_length_in_bytes((__le32 *) rc); + tlen = ipath_hdrget_length_in_bytes(rhf_addr); ebuf = NULL; - if (etype != RCVHQ_RCV_TYPE_EXPECTED) { + if ((dd->ipath_flags & IPATH_NODMA_RTAIL) ? + ipath_hdrget_use_egr_buf(rhf_addr) : + (etype != RCVHQ_RCV_TYPE_EXPECTED)) { /* - * it turns out that the chips uses an eager buffer + * It turns out that the chip uses an eager buffer * for all non-expected packets, whether it "needs" * one or not. So always get the index, but don't * set ebuf (so we try to copy data) unless the * length requires it. */ - etail = ipath_hdrget_index((__le32 *) rc); + etail = ipath_hdrget_index(rhf_addr); + updegr = 1; if (tlen > sizeof(*hdr) || etype == RCVHQ_RCV_TYPE_NON_KD) ebuf = ipath_get_egrbuf(dd, etail); @@ -1171,75 +1226,91 @@ reloop: * packets; only ipathhdrerr should be set. */ - if (etype != RCVHQ_RCV_TYPE_NON_KD && etype != - RCVHQ_RCV_TYPE_ERROR && ipath_hdrget_ipath_ver( - hdr->iph.ver_port_tid_offset) != - IPS_PROTO_VERSION) { + if (etype != RCVHQ_RCV_TYPE_NON_KD && + etype != RCVHQ_RCV_TYPE_ERROR && + ipath_hdrget_ipath_ver(hdr->iph.ver_port_tid_offset) != + IPS_PROTO_VERSION) ipath_cdbg(PKT, "Bad InfiniPath protocol version " "%x\n", etype); - } if (unlikely(eflags)) - ipath_rcv_hdrerr(dd, eflags, l, etail, rc); + ipath_rcv_hdrerr(dd, eflags, l, etail, rhf_addr, hdr); else if (etype == RCVHQ_RCV_TYPE_NON_KD) { - ipath_ib_rcv(dd->verbs_dev, rc + 1, ebuf, tlen); + ipath_ib_rcv(dd->verbs_dev, (u32 *)hdr, ebuf, tlen); if (dd->ipath_lli_counter) dd->ipath_lli_counter--; + } else if (etype == RCVHQ_RCV_TYPE_EAGER) { + u8 opcode = be32_to_cpu(hdr->bth[0]) >> 24; + u32 qp = be32_to_cpu(hdr->bth[1]) & 0xffffff; ipath_cdbg(PKT, "typ %x, opcode %x (eager, " "qp=%x), len %x; ignored\n", - etype, bthbytes[0], qp, tlen); + etype, opcode, qp, tlen); } - else if (etype == RCVHQ_RCV_TYPE_EAGER) - ipath_cdbg(PKT, "typ %x, opcode %x (eager, " - "qp=%x), len %x; ignored\n", - etype, bthbytes[0], qp, tlen); else if (etype == RCVHQ_RCV_TYPE_EXPECTED) ipath_dbg("Bug: Expected TID, opcode %x; ignored\n", - be32_to_cpu(hdr->bth[0]) & 0xff); + be32_to_cpu(hdr->bth[0]) >> 24); else { /* * error packet, type of error unknown. * Probably type 3, but we don't know, so don't * even try to print the opcode, etc. + * Usually caused by a "bad packet", that has no + * BTH, when the LRH says it should. */ - ipath_dbg("Error Pkt, but no eflags! egrbuf %x, " - "len %x\nhdrq@%lx;hdrq+%x rhf: %llx; " - "hdr %llx %llx %llx %llx %llx\n", - etail, tlen, (unsigned long) rc, l, - (unsigned long long) rc[0], - (unsigned long long) rc[1], - (unsigned long long) rc[2], - (unsigned long long) rc[3], - (unsigned long long) rc[4], - (unsigned long long) rc[5]); + ipath_cdbg(ERRPKT, "Error Pkt, but no eflags! egrbuf" + " %x, len %x hdrq+%x rhf: %Lx\n", + etail, tlen, l, + le64_to_cpu(*(__le64 *) rhf_addr)); + if (ipath_debug & __IPATH_ERRPKTDBG) { + u32 j, *d, dw = rsize-2; + if (rsize > (tlen>>2)) + dw = tlen>>2; + d = (u32 *)hdr; + printk(KERN_DEBUG "EPkt rcvhdr(%x dw):\n", + dw); + for (j = 0; j < dw; j++) + printk(KERN_DEBUG "%8x%s", d[j], + (j%8) == 7 ? "\n" : " "); + printk(KERN_DEBUG ".\n"); + } } l += rsize; if (l >= maxcnt) l = 0; - if (etype != RCVHQ_RCV_TYPE_EXPECTED) - updegr = 1; + rhf_addr = (__le32 *) pd->port_rcvhdrq + + l + dd->ipath_rhf_offset; + if (dd->ipath_flags & IPATH_NODMA_RTAIL) { + u32 seq = ipath_hdrget_seq(rhf_addr); + + if (++pd->port_seq_cnt > 13) + pd->port_seq_cnt = 1; + if (seq != pd->port_seq_cnt) + last = 1; + } else if (l == hdrqtail) + last = 1; /* * update head regs on last packet, and every 16 packets. * Reduce bus traffic, while still trying to prevent * rcvhdrq overflows, for when the queue is nearly full */ - if (l == hdrqtail || (i && !(i&0xf))) { - u64 lval; - if (l == hdrqtail) - /* request IBA6120 interrupt only on last */ - lval = dd->ipath_rhdrhead_intr_off | l; - else - lval = l; - (void)ipath_write_ureg(dd, ur_rcvhdrhead, lval, 0); + if (last || !(i & 0xf)) { + u64 lval = l; + + /* request IBA6120 and 7220 interrupt only on last */ + if (last) + lval |= dd->ipath_rhdrhead_intr_off; + ipath_write_ureg(dd, ur_rcvhdrhead, lval, + pd->port_port); if (updegr) { - (void)ipath_write_ureg(dd, ur_rcvegrindexhead, - etail, 0); + ipath_write_ureg(dd, ur_rcvegrindexhead, + etail, pd->port_port); updegr = 0; } } } - if (!dd->ipath_rhdrhead_intr_off && !reloop) { + if (!dd->ipath_rhdrhead_intr_off && !reloop && + !(dd->ipath_flags & IPATH_NODMA_RTAIL)) { /* IBA6110 workaround; we can have a race clearing chip * interrupt with another interrupt about to be delivered, * and can clear it before it is delivered on the GPIO @@ -1301,7 +1372,6 @@ static void ipath_update_pio_bufs(struct ipath_devdata *dd) * happens when all buffers are in use, so only cpu overhead, not * latency or bandwidth is affected. */ -#define _IPATH_ALL_CHECKBITS 0x5555555555555555ULL if (!dd->ipath_pioavailregs_dma) { ipath_dbg("Update shadow pioavail, but regs_dma NULL!\n"); return; @@ -1346,7 +1416,7 @@ static void ipath_update_pio_bufs(struct ipath_devdata *dd) piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i ^ 1]); else piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i]); - pchg = _IPATH_ALL_CHECKBITS & + pchg = dd->ipath_pioavailkernel[i] & ~(dd->ipath_pioavailshadow[i] ^ piov); pchbusy = pchg << INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT; if (pchg && (pchbusy & dd->ipath_pioavailshadow[i])) { @@ -1397,27 +1467,63 @@ int ipath_setrcvhdrsize(struct ipath_devdata *dd, unsigned rhdrsize) return ret; } -/** - * ipath_getpiobuf - find an available pio buffer - * @dd: the infinipath device - * @pbufnum: the buffer number is placed here +/* + * debugging code and stats updates if no pio buffers available. + */ +static noinline void no_pio_bufs(struct ipath_devdata *dd) +{ + unsigned long *shadow = dd->ipath_pioavailshadow; + __le64 *dma = (__le64 *)dd->ipath_pioavailregs_dma; + + dd->ipath_upd_pio_shadow = 1; + + /* + * not atomic, but if we lose a stat count in a while, that's OK + */ + ipath_stats.sps_nopiobufs++; + if (!(++dd->ipath_consec_nopiobuf % 100000)) { + ipath_dbg("%u pio sends with no bufavail; dmacopy: " + "%llx %llx %llx %llx; shadow: %lx %lx %lx %lx\n", + dd->ipath_consec_nopiobuf, + (unsigned long long) le64_to_cpu(dma[0]), + (unsigned long long) le64_to_cpu(dma[1]), + (unsigned long long) le64_to_cpu(dma[2]), + (unsigned long long) le64_to_cpu(dma[3]), + shadow[0], shadow[1], shadow[2], shadow[3]); + /* + * 4 buffers per byte, 4 registers above, cover rest + * below + */ + if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > + (sizeof(shadow[0]) * 4 * 4)) + ipath_dbg("2nd group: dmacopy: %llx %llx " + "%llx %llx; shadow: %lx %lx %lx %lx\n", + (unsigned long long)le64_to_cpu(dma[4]), + (unsigned long long)le64_to_cpu(dma[5]), + (unsigned long long)le64_to_cpu(dma[6]), + (unsigned long long)le64_to_cpu(dma[7]), + shadow[4], shadow[5], shadow[6], + shadow[7]); + } +} + +/* + * common code for normal driver pio buffer allocation, and reserved + * allocation. * * do appropriate marking as busy, etc. * returns buffer number if one found (>=0), negative number is error. - * Used by ipath_layer_send */ -u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 * pbufnum) +static u32 __iomem *ipath_getpiobuf_range(struct ipath_devdata *dd, + u32 *pbufnum, u32 first, u32 last, u32 firsti) { - int i, j, starti, updated = 0; - unsigned piobcnt, iter; + int i, j, updated = 0; + unsigned piobcnt; unsigned long flags; unsigned long *shadow = dd->ipath_pioavailshadow; u32 __iomem *buf; - piobcnt = (unsigned)(dd->ipath_piobcnt2k - + dd->ipath_piobcnt4k); - starti = dd->ipath_lastport_piobuf; - iter = piobcnt - starti; + piobcnt = last - first; if (dd->ipath_upd_pio_shadow) { /* * Minor optimization. If we had no buffers on last call, @@ -1425,12 +1531,10 @@ u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 * pbufnum) * if no buffers were updated, to be paranoid */ ipath_update_pio_bufs(dd); - /* we scanned here, don't do it at end of scan */ - updated = 1; - i = starti; + updated++; + i = first; } else - i = dd->ipath_lastpioindex; - + i = firsti; rescan: /* * while test_and_set_bit() is atomic, we do that and then the @@ -1438,104 +1542,141 @@ rescan: * of the remaining armlaunch errors. */ spin_lock_irqsave(&ipath_pioavail_lock, flags); - for (j = 0; j < iter; j++, i++) { - if (i >= piobcnt) - i = starti; - /* - * To avoid bus lock overhead, we first find a candidate - * buffer, then do the test and set, and continue if that - * fails. - */ - if (test_bit((2 * i) + 1, shadow) || - test_and_set_bit((2 * i) + 1, shadow)) + for (j = 0; j < piobcnt; j++, i++) { + if (i >= last) + i = first; + if (__test_and_set_bit((2 * i) + 1, shadow)) continue; /* flip generation bit */ - change_bit(2 * i, shadow); + __change_bit(2 * i, shadow); break; } spin_unlock_irqrestore(&ipath_pioavail_lock, flags); - if (j == iter) { - volatile __le64 *dma = dd->ipath_pioavailregs_dma; - - /* - * first time through; shadow exhausted, but may be real - * buffers available, so go see; if any updated, rescan - * (once) - */ + if (j == piobcnt) { if (!updated) { + /* + * first time through; shadow exhausted, but may be + * buffers available, try an update and then rescan. + */ ipath_update_pio_bufs(dd); - updated = 1; - i = starti; + updated++; + i = first; goto rescan; - } - dd->ipath_upd_pio_shadow = 1; - /* - * not atomic, but if we lose one once in a while, that's OK - */ - ipath_stats.sps_nopiobufs++; - if (!(++dd->ipath_consec_nopiobuf % 100000)) { - ipath_dbg( - "%u pio sends with no bufavail; dmacopy: " - "%llx %llx %llx %llx; shadow: " - "%lx %lx %lx %lx\n", - dd->ipath_consec_nopiobuf, - (unsigned long long) le64_to_cpu(dma[0]), - (unsigned long long) le64_to_cpu(dma[1]), - (unsigned long long) le64_to_cpu(dma[2]), - (unsigned long long) le64_to_cpu(dma[3]), - shadow[0], shadow[1], shadow[2], - shadow[3]); + } else if (updated == 1 && piobcnt <= + ((dd->ipath_sendctrl + >> INFINIPATH_S_UPDTHRESH_SHIFT) & + INFINIPATH_S_UPDTHRESH_MASK)) { /* - * 4 buffers per byte, 4 registers above, cover rest - * below + * for chips supporting and using the update + * threshold we need to force an update of the + * in-memory copy if the count is less than the + * thershold, then check one more time. */ - if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > - (sizeof(shadow[0]) * 4 * 4)) - ipath_dbg("2nd group: dmacopy: %llx %llx " - "%llx %llx; shadow: %lx %lx " - "%lx %lx\n", - (unsigned long long) - le64_to_cpu(dma[4]), - (unsigned long long) - le64_to_cpu(dma[5]), - (unsigned long long) - le64_to_cpu(dma[6]), - (unsigned long long) - le64_to_cpu(dma[7]), - shadow[4], shadow[5], - shadow[6], shadow[7]); + ipath_force_pio_avail_update(dd); + ipath_update_pio_bufs(dd); + updated++; + i = first; + goto rescan; } + + no_pio_bufs(dd); buf = NULL; - goto bail; + } else { + if (i < dd->ipath_piobcnt2k) + buf = (u32 __iomem *) (dd->ipath_pio2kbase + + i * dd->ipath_palign); + else + buf = (u32 __iomem *) + (dd->ipath_pio4kbase + + (i - dd->ipath_piobcnt2k) * dd->ipath_4kalign); + if (pbufnum) + *pbufnum = i; } - /* - * set next starting place. Since it's just an optimization, - * it doesn't matter who wins on this, so no locking - */ - dd->ipath_lastpioindex = i + 1; - if (dd->ipath_upd_pio_shadow) - dd->ipath_upd_pio_shadow = 0; - if (dd->ipath_consec_nopiobuf) - dd->ipath_consec_nopiobuf = 0; - if (i < dd->ipath_piobcnt2k) - buf = (u32 __iomem *) (dd->ipath_pio2kbase + - i * dd->ipath_palign); - else - buf = (u32 __iomem *) - (dd->ipath_pio4kbase + - (i - dd->ipath_piobcnt2k) * dd->ipath_4kalign); - ipath_cdbg(VERBOSE, "Return piobuf%u %uk @ %p\n", - i, (i < dd->ipath_piobcnt2k) ? 2 : 4, buf); - if (pbufnum) - *pbufnum = i; + return buf; +} -bail: +/** + * ipath_getpiobuf - find an available pio buffer + * @dd: the infinipath device + * @plen: the size of the PIO buffer needed in 32-bit words + * @pbufnum: the buffer number is placed here + */ +u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 plen, u32 *pbufnum) +{ + u32 __iomem *buf; + u32 pnum, nbufs; + u32 first, lasti; + + if (plen + 1 >= IPATH_SMALLBUF_DWORDS) { + first = dd->ipath_piobcnt2k; + lasti = dd->ipath_lastpioindexl; + } else { + first = 0; + lasti = dd->ipath_lastpioindex; + } + nbufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k; + buf = ipath_getpiobuf_range(dd, &pnum, first, nbufs, lasti); + + if (buf) { + /* + * Set next starting place. It's just an optimization, + * it doesn't matter who wins on this, so no locking + */ + if (plen + 1 >= IPATH_SMALLBUF_DWORDS) + dd->ipath_lastpioindexl = pnum + 1; + else + dd->ipath_lastpioindex = pnum + 1; + if (dd->ipath_upd_pio_shadow) + dd->ipath_upd_pio_shadow = 0; + if (dd->ipath_consec_nopiobuf) + dd->ipath_consec_nopiobuf = 0; + ipath_cdbg(VERBOSE, "Return piobuf%u %uk @ %p\n", + pnum, (pnum < dd->ipath_piobcnt2k) ? 2 : 4, buf); + if (pbufnum) + *pbufnum = pnum; + + } return buf; } /** + * ipath_chg_pioavailkernel - change which send buffers are available for kernel + * @dd: the infinipath device + * @start: the starting send buffer number + * @len: the number of send buffers + * @avail: true if the buffers are available for kernel use, false otherwise + */ +void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start, + unsigned len, int avail) +{ + unsigned long flags; + unsigned end; + + /* There are two bits per send buffer (busy and generation) */ + start *= 2; + len *= 2; + end = start + len; + + /* Set or clear the generation bits. */ + spin_lock_irqsave(&ipath_pioavail_lock, flags); + while (start < end) { + if (avail) { + __clear_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT, + dd->ipath_pioavailshadow); + __set_bit(start, dd->ipath_pioavailkernel); + } else { + __set_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT, + dd->ipath_pioavailshadow); + __clear_bit(start, dd->ipath_pioavailkernel); + } + start += 2; + } + spin_unlock_irqrestore(&ipath_pioavail_lock, flags); +} + +/** * ipath_create_rcvhdrq - create a receive header queue * @dd: the infinipath device * @pd: the port data @@ -1566,19 +1707,27 @@ int ipath_create_rcvhdrq(struct ipath_devdata *dd, ret = -ENOMEM; goto bail; } - pd->port_rcvhdrtail_kvaddr = dma_alloc_coherent( - &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, GFP_KERNEL); - if (!pd->port_rcvhdrtail_kvaddr) { - ipath_dev_err(dd, "attempt to allocate 1 page " - "for port %u rcvhdrqtailaddr failed\n", - pd->port_port); - ret = -ENOMEM; - dma_free_coherent(&dd->pcidev->dev, amt, - pd->port_rcvhdrq, pd->port_rcvhdrq_phys); - pd->port_rcvhdrq = NULL; - goto bail; + + if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) { + pd->port_rcvhdrtail_kvaddr = dma_alloc_coherent( + &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, + GFP_KERNEL); + if (!pd->port_rcvhdrtail_kvaddr) { + ipath_dev_err(dd, "attempt to allocate 1 page " + "for port %u rcvhdrqtailaddr " + "failed\n", pd->port_port); + ret = -ENOMEM; + dma_free_coherent(&dd->pcidev->dev, amt, + pd->port_rcvhdrq, + pd->port_rcvhdrq_phys); + pd->port_rcvhdrq = NULL; + goto bail; + } + pd->port_rcvhdrqtailaddr_phys = phys_hdrqtail; + ipath_cdbg(VERBOSE, "port %d hdrtailaddr, %llx " + "physical\n", pd->port_port, + (unsigned long long) phys_hdrqtail); } - pd->port_rcvhdrqtailaddr_phys = phys_hdrqtail; pd->port_rcvhdrq_size = amt; @@ -1588,10 +1737,6 @@ int ipath_create_rcvhdrq(struct ipath_devdata *dd, (unsigned long) pd->port_rcvhdrq_phys, (unsigned long) pd->port_rcvhdrq_size, pd->port_port); - - ipath_cdbg(VERBOSE, "port %d hdrtailaddr, %llx physical\n", - pd->port_port, - (unsigned long long) phys_hdrqtail); } else ipath_cdbg(VERBOSE, "reuse port %d rcvhdrq @%p %llx phys; " @@ -1615,7 +1760,6 @@ int ipath_create_rcvhdrq(struct ipath_devdata *dd, ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr, pd->port_port, pd->port_rcvhdrq_phys); - ret = 0; bail: return ret; } @@ -1632,52 +1776,149 @@ bail: */ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl) { + unsigned long flags; + + if (dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) { + ipath_cdbg(VERBOSE, "Ignore while in autonegotiation\n"); + goto bail; + } + /* + * If we have SDMA, and it's not disabled, we have to kick off the + * abort state machine, provided we aren't already aborting. + * If we are in the process of aborting SDMA (!DISABLED, but ABORTING), + * we skip the rest of this routine. It is already "in progress" + */ + if (dd->ipath_flags & IPATH_HAS_SEND_DMA) { + int skip_cancel; + u64 *statp = &dd->ipath_sdma_status; + + spin_lock_irqsave(&dd->ipath_sdma_lock, flags); + skip_cancel = + !test_bit(IPATH_SDMA_DISABLED, statp) && + test_and_set_bit(IPATH_SDMA_ABORTING, statp); + spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); + if (skip_cancel) + goto bail; + } + ipath_dbg("Cancelling all in-progress send buffers\n"); - dd->ipath_lastcancel = jiffies+HZ/2; /* skip armlaunch errs a bit */ + + /* skip armlaunch errs for a while */ + dd->ipath_lastcancel = jiffies + HZ / 2; + /* - * the abort bit is auto-clearing. We read scratch to be sure - * that cancels and the abort have taken effect in the chip. + * The abort bit is auto-clearing. We also don't want pioavail + * update happening during this, and we don't want any other + * sends going out, so turn those off for the duration. We read + * the scratch register to be sure that cancels and the abort + * have taken effect in the chip. Otherwise two parts are same + * as ipath_force_pio_avail_update() */ + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + dd->ipath_sendctrl &= ~(INFINIPATH_S_PIOBUFAVAILUPD + | INFINIPATH_S_PIOENABLE); ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - INFINIPATH_S_ABORT); + dd->ipath_sendctrl | INFINIPATH_S_ABORT); ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); + + /* disarm all send buffers */ ipath_disarm_piobufs(dd, 0, - (unsigned)(dd->ipath_piobcnt2k + dd->ipath_piobcnt4k)); - if (restore_sendctrl) /* else done by caller later */ + dd->ipath_piobcnt2k + dd->ipath_piobcnt4k); + + if (restore_sendctrl) { + /* else done by caller later if needed */ + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + dd->ipath_sendctrl |= INFINIPATH_S_PIOBUFAVAILUPD | + INFINIPATH_S_PIOENABLE; ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl); + dd->ipath_sendctrl); + /* and again, be sure all have hit the chip */ + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); + } - /* and again, be sure all have hit the chip */ - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + if ((dd->ipath_flags & IPATH_HAS_SEND_DMA) && + !test_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status) && + test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status)) { + spin_lock_irqsave(&dd->ipath_sdma_lock, flags); + /* only wait so long for intr */ + dd->ipath_sdma_abort_intr_timeout = jiffies + HZ; + dd->ipath_sdma_reset_wait = 200; + __set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status); + if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) + tasklet_hi_schedule(&dd->ipath_sdma_abort_task); + spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); + } +bail:; } +/* + * Force an update of in-memory copy of the pioavail registers, when + * needed for any of a variety of reasons. We read the scratch register + * to make it highly likely that the update will have happened by the + * time we return. If already off (as in cancel_sends above), this + * routine is a nop, on the assumption that the caller will "do the + * right thing". + */ +void ipath_force_pio_avail_update(struct ipath_devdata *dd) +{ + unsigned long flags; + + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + if (dd->ipath_sendctrl & INFINIPATH_S_PIOBUFAVAILUPD) { + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, + dd->ipath_sendctrl & ~INFINIPATH_S_PIOBUFAVAILUPD); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, + dd->ipath_sendctrl); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + } + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); +} -static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which) +static void ipath_set_ib_lstate(struct ipath_devdata *dd, int linkcmd, + int linitcmd) { + u64 mod_wd; static const char *what[4] = { [0] = "NOP", [INFINIPATH_IBCC_LINKCMD_DOWN] = "DOWN", [INFINIPATH_IBCC_LINKCMD_ARMED] = "ARMED", [INFINIPATH_IBCC_LINKCMD_ACTIVE] = "ACTIVE" }; - int linkcmd = (which >> INFINIPATH_IBCC_LINKCMD_SHIFT) & - INFINIPATH_IBCC_LINKCMD_MASK; - - ipath_cdbg(VERBOSE, "Trying to move unit %u to %s, current ltstate " - "is %s\n", dd->ipath_unit, - what[linkcmd], - ipath_ibcstatus_str[ - (ipath_read_kreg64 - (dd, dd->ipath_kregs->kr_ibcstatus) >> - INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) & - INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]); - /* flush all queued sends when going to DOWN to be sure that - * they don't block MAD packets */ - if (linkcmd == INFINIPATH_IBCC_LINKCMD_DOWN) - ipath_cancel_sends(dd, 1); + + if (linitcmd == INFINIPATH_IBCC_LINKINITCMD_DISABLE) { + /* + * If we are told to disable, note that so link-recovery + * code does not attempt to bring us back up. + */ + preempt_disable(); + dd->ipath_flags |= IPATH_IB_LINK_DISABLED; + preempt_enable(); + } else if (linitcmd) { + /* + * Any other linkinitcmd will lead to LINKDOWN and then + * to INIT (if all is well), so clear flag to let + * link-recovery code attempt to bring us back up. + */ + preempt_disable(); + dd->ipath_flags &= ~IPATH_IB_LINK_DISABLED; + preempt_enable(); + } + + mod_wd = (linkcmd << dd->ibcc_lc_shift) | + (linitcmd << INFINIPATH_IBCC_LINKINITCMD_SHIFT); + ipath_cdbg(VERBOSE, + "Moving unit %u to %s (initcmd=0x%x), current ltstate is %s\n", + dd->ipath_unit, what[linkcmd], linitcmd, + ipath_ibcstatus_str[ipath_ib_linktrstate(dd, + ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus))]); ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, - dd->ipath_ibcctrl | which); + dd->ipath_ibcctrl | mod_wd); + /* read from chip so write is flushed */ + (void) ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); } int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate) @@ -1687,30 +1928,28 @@ int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate) switch (newstate) { case IPATH_IB_LINKDOWN_ONLY: - ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN << - INFINIPATH_IBCC_LINKCMD_SHIFT); + ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, 0); /* don't wait */ ret = 0; goto bail; case IPATH_IB_LINKDOWN: - ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_POLL << - INFINIPATH_IBCC_LINKINITCMD_SHIFT); + ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, + INFINIPATH_IBCC_LINKINITCMD_POLL); /* don't wait */ ret = 0; goto bail; case IPATH_IB_LINKDOWN_SLEEP: - ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_SLEEP << - INFINIPATH_IBCC_LINKINITCMD_SHIFT); + ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, + INFINIPATH_IBCC_LINKINITCMD_SLEEP); /* don't wait */ ret = 0; goto bail; case IPATH_IB_LINKDOWN_DISABLE: - ipath_set_ib_lstate(dd, - INFINIPATH_IBCC_LINKINITCMD_DISABLE << - INFINIPATH_IBCC_LINKINITCMD_SHIFT); + ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, + INFINIPATH_IBCC_LINKINITCMD_DISABLE); /* don't wait */ ret = 0; goto bail; @@ -1725,8 +1964,8 @@ int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate) ret = -EINVAL; goto bail; } - ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED << - INFINIPATH_IBCC_LINKCMD_SHIFT); + ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED, 0); + /* * Since the port can transition to ACTIVE by receiving * a non VL 15 packet, wait for either state. @@ -1743,8 +1982,7 @@ int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate) ret = -EINVAL; goto bail; } - ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE << - INFINIPATH_IBCC_LINKCMD_SHIFT); + ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE, 0); lstate = IPATH_LINKACTIVE; break; @@ -1753,16 +1991,41 @@ int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate) dd->ipath_ibcctrl |= INFINIPATH_IBCC_LOOPBACK; ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, dd->ipath_ibcctrl); + + /* turn heartbeat off, as it causes loopback to fail */ + dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, + IPATH_IB_HRTBT_OFF); + /* don't wait */ ret = 0; - goto bail; // no state change to wait for + goto bail; case IPATH_IB_LINK_EXTERNAL: - dev_info(&dd->pcidev->dev, "Disabling IB local loopback (normal)\n"); + dev_info(&dd->pcidev->dev, + "Disabling IB local loopback (normal)\n"); + dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, + IPATH_IB_HRTBT_ON); dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LOOPBACK; ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, dd->ipath_ibcctrl); + /* don't wait */ ret = 0; - goto bail; // no state change to wait for + goto bail; + + /* + * Heartbeat can be explicitly enabled by the user via + * "hrtbt_enable" "file", and if disabled, trying to enable here + * will have no effect. Implicit changes (heartbeat off when + * loopback on, and vice versa) are included to ease testing. + */ + case IPATH_IB_LINK_HRTBT: + ret = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, + IPATH_IB_HRTBT_ON); + goto bail; + + case IPATH_IB_LINK_NO_HRTBT: + ret = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, + IPATH_IB_HRTBT_OFF); + goto bail; default: ipath_dbg("Invalid linkstate 0x%x requested\n", newstate); @@ -1785,7 +2048,7 @@ bail: * sanity checking on this, and we don't deal with what happens to * programs that are already running when the size changes. * NOTE: changing the MTU will usually cause the IBC to go back to - * link initialize (IPATH_IBSTATE_INIT) state... + * link INIT state... */ int ipath_set_mtu(struct ipath_devdata *dd, u16 arg) { @@ -1800,7 +2063,7 @@ int ipath_set_mtu(struct ipath_devdata *dd, u16 arg) * piosize). We check that it's one of the valid IB sizes. */ if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 && - arg != 4096) { + (arg != 4096 || !ipath_mtu4096)) { ipath_dbg("Trying to set invalid mtu %u, failing\n", arg); ret = -EINVAL; goto bail; @@ -1816,6 +2079,8 @@ int ipath_set_mtu(struct ipath_devdata *dd, u16 arg) if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) { /* Only if it's not the initial value (or reset to it) */ if (piosize != dd->ipath_init_ibmaxlen) { + if (arg > piosize && arg <= dd->ipath_init_ibmaxlen) + piosize = dd->ipath_init_ibmaxlen; dd->ipath_ibmaxlen = piosize; changed = 1; } @@ -1829,24 +2094,17 @@ int ipath_set_mtu(struct ipath_devdata *dd, u16 arg) } if (changed) { + u64 ibc = dd->ipath_ibcctrl, ibdw; /* - * set the IBC maxpktlength to the size of our pio - * buffers in words + * update our housekeeping variables, and set IBC max + * size, same as init code; max IBC is max we allow in + * buffer, less the qword pbc, plus 1 for ICRC, in dwords */ - u64 ibc = dd->ipath_ibcctrl; + dd->ipath_ibmaxlen = piosize - 2 * sizeof(u32); + ibdw = (dd->ipath_ibmaxlen >> 2) + 1; ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK << - INFINIPATH_IBCC_MAXPKTLEN_SHIFT); - - piosize = piosize - 2 * sizeof(u32); /* ignore pbc */ - dd->ipath_ibmaxlen = piosize; - piosize /= sizeof(u32); /* in words */ - /* - * for ICRC, which we only send in diag test pkt mode, and - * we don't need to worry about that for mtu - */ - piosize += 1; - - ibc |= piosize << INFINIPATH_IBCC_MAXPKTLEN_SHIFT; + dd->ibcc_mpl_shift); + ibc |= ibdw << dd->ibcc_mpl_shift; dd->ipath_ibcctrl = ibc; ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, dd->ipath_ibcctrl); @@ -1859,11 +2117,16 @@ bail: return ret; } -int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc) +int ipath_set_lid(struct ipath_devdata *dd, u32 lid, u8 lmc) { - dd->ipath_lid = arg; + dd->ipath_lid = lid; dd->ipath_lmc = lmc; + dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LIDLMC, lid | + (~((1U << lmc) - 1)) << 16); + + dev_info(&dd->pcidev->dev, "We got a lid: 0x%x\n", lid); + return 0; } @@ -1925,10 +2188,8 @@ static void ipath_run_led_override(unsigned long opaque) * but leave that to per-chip functions. */ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); - ltstate = (val >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) & - INFINIPATH_IBCS_LINKTRAININGSTATE_MASK; - lstate = (val >> INFINIPATH_IBCS_LINKSTATE_SHIFT) & - INFINIPATH_IBCS_LINKSTATE_MASK; + ltstate = ipath_ib_linktrstate(dd, val); + lstate = ipath_ib_linkstate(dd, val); dd->ipath_f_setextled(dd, lstate, ltstate); mod_timer(&dd->ipath_led_override_timer, jiffies + timeoff); @@ -1969,9 +2230,8 @@ void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val) dd->ipath_led_override_timer.data = (unsigned long) dd; dd->ipath_led_override_timer.expires = jiffies + 1; add_timer(&dd->ipath_led_override_timer); - } else { + } else atomic_dec(&dd->ipath_led_override_timer_active); - } } /** @@ -1989,6 +2249,8 @@ void ipath_shutdown_device(struct ipath_devdata *dd) ipath_dbg("Shutting down the device\n"); + ipath_hol_up(dd); /* make sure user processes aren't suspended */ + dd->ipath_flags |= IPATH_LINKUNK; dd->ipath_flags &= ~(IPATH_INITTED | IPATH_LINKDOWN | IPATH_LINKINIT | IPATH_LINKARMED | @@ -2003,6 +2265,9 @@ void ipath_shutdown_device(struct ipath_devdata *dd) ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, dd->ipath_rcvctrl); + if (dd->ipath_flags & IPATH_HAS_SEND_DMA) + teardown_sdma(dd); + /* * gracefully stop all sends allowing any in progress to trickle out * first. @@ -2020,10 +2285,16 @@ void ipath_shutdown_device(struct ipath_devdata *dd) */ udelay(5); - ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_DISABLE << - INFINIPATH_IBCC_LINKINITCMD_SHIFT); + dd->ipath_f_setextled(dd, 0, 0); /* make sure LEDs are off */ + + ipath_set_ib_lstate(dd, 0, INFINIPATH_IBCC_LINKINITCMD_DISABLE); ipath_cancel_sends(dd, 0); + /* + * we are shutting down, so tell components that care. We don't do + * this on just a link state change, much like ethernet, a cable + * unplug, etc. doesn't change driver state + */ signal_ib_event(dd, IB_EVENT_PORT_ERR); /* disable IBC */ @@ -2038,10 +2309,20 @@ void ipath_shutdown_device(struct ipath_devdata *dd) */ dd->ipath_f_quiet_serdes(dd); + /* stop all the timers that might still be running */ + del_timer_sync(&dd->ipath_hol_timer); if (dd->ipath_stats_timer_active) { del_timer_sync(&dd->ipath_stats_timer); dd->ipath_stats_timer_active = 0; } + if (dd->ipath_intrchk_timer.data) { + del_timer_sync(&dd->ipath_intrchk_timer); + dd->ipath_intrchk_timer.data = 0; + } + if (atomic_read(&dd->ipath_led_override_timer_active)) { + del_timer_sync(&dd->ipath_led_override_timer); + atomic_set(&dd->ipath_led_override_timer_active, 0); + } /* * clear all interrupts and errors, so that the next time the driver @@ -2115,13 +2396,13 @@ void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd) ipath_cdbg(VERBOSE, "free closed port %d " "ipath_port0_skbinfo @ %p\n", pd->port_port, skbinfo); - for (e = 0; e < dd->ipath_rcvegrcnt; e++) - if (skbinfo[e].skb) { - pci_unmap_single(dd->pcidev, skbinfo[e].phys, - dd->ipath_ibmaxlen, - PCI_DMA_FROMDEVICE); - dev_kfree_skb(skbinfo[e].skb); - } + for (e = 0; e < dd->ipath_p0_rcvegrcnt; e++) + if (skbinfo[e].skb) { + pci_unmap_single(dd->pcidev, skbinfo[e].phys, + dd->ipath_ibmaxlen, + PCI_DMA_FROMDEVICE); + dev_kfree_skb(skbinfo[e].skb); + } vfree(skbinfo); } kfree(pd->port_tid_pg_list); @@ -2144,6 +2425,7 @@ static int __init infinipath_init(void) */ idr_init(&unit_table); if (!idr_pre_get(&unit_table, GFP_KERNEL)) { + printk(KERN_ERR IPATH_DRV_NAME ": idr_pre_get() failed\n"); ret = -ENOMEM; goto bail; } @@ -2235,13 +2517,18 @@ int ipath_reset_device(int unit) } } + if (dd->ipath_flags & IPATH_HAS_SEND_DMA) + teardown_sdma(dd); + dd->ipath_flags &= ~IPATH_INITTED; + ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL); ret = dd->ipath_f_reset(dd); - if (ret != 1) - ipath_dbg("reset was not successful\n"); - ipath_dbg("Trying to reinitialize unit %u after reset attempt\n", - unit); - ret = ipath_init_chip(dd, 1); + if (ret == 1) { + ipath_dbg("Reinitializing unit %u after reset attempt\n", + unit); + ret = ipath_init_chip(dd, 1); + } else + ret = -EAGAIN; if (ret) ipath_dev_err(dd, "Reinitialize unit %u after " "reset failed with %d\n", unit, ret); @@ -2253,13 +2540,121 @@ bail: return ret; } +/* + * send a signal to all the processes that have the driver open + * through the normal interfaces (i.e., everything other than diags + * interface). Returns number of signalled processes. + */ +static int ipath_signal_procs(struct ipath_devdata *dd, int sig) +{ + int i, sub, any = 0; + pid_t pid; + + if (!dd->ipath_pd) + return 0; + for (i = 1; i < dd->ipath_cfgports; i++) { + if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt || + !dd->ipath_pd[i]->port_pid) + continue; + pid = dd->ipath_pd[i]->port_pid; + dev_info(&dd->pcidev->dev, "context %d in use " + "(PID %u), sending signal %d\n", + i, pid, sig); + kill_proc(pid, sig, 1); + any++; + for (sub = 0; sub < INFINIPATH_MAX_SUBPORT; sub++) { + pid = dd->ipath_pd[i]->port_subpid[sub]; + if (!pid) + continue; + dev_info(&dd->pcidev->dev, "sub-context " + "%d:%d in use (PID %u), sending " + "signal %d\n", i, sub, pid, sig); + kill_proc(pid, sig, 1); + any++; + } + } + return any; +} + +static void ipath_hol_signal_down(struct ipath_devdata *dd) +{ + if (ipath_signal_procs(dd, SIGSTOP)) + ipath_dbg("Stopped some processes\n"); + ipath_cancel_sends(dd, 1); +} + + +static void ipath_hol_signal_up(struct ipath_devdata *dd) +{ + if (ipath_signal_procs(dd, SIGCONT)) + ipath_dbg("Continued some processes\n"); +} + +/* + * link is down, stop any users processes, and flush pending sends + * to prevent HoL blocking, then start the HoL timer that + * periodically continues, then stop procs, so they can detect + * link down if they want, and do something about it. + * Timer may already be running, so use __mod_timer, not add_timer. + */ +void ipath_hol_down(struct ipath_devdata *dd) +{ + dd->ipath_hol_state = IPATH_HOL_DOWN; + ipath_hol_signal_down(dd); + dd->ipath_hol_next = IPATH_HOL_DOWNCONT; + dd->ipath_hol_timer.expires = jiffies + + msecs_to_jiffies(ipath_hol_timeout_ms); + __mod_timer(&dd->ipath_hol_timer, dd->ipath_hol_timer.expires); +} + +/* + * link is up, continue any user processes, and ensure timer + * is a nop, if running. Let timer keep running, if set; it + * will nop when it sees the link is up + */ +void ipath_hol_up(struct ipath_devdata *dd) +{ + ipath_hol_signal_up(dd); + dd->ipath_hol_state = IPATH_HOL_UP; +} + +/* + * toggle the running/not running state of user proceses + * to prevent HoL blocking on chip resources, but still allow + * user processes to do link down special case handling. + * Should only be called via the timer + */ +void ipath_hol_event(unsigned long opaque) +{ + struct ipath_devdata *dd = (struct ipath_devdata *)opaque; + + if (dd->ipath_hol_next == IPATH_HOL_DOWNSTOP + && dd->ipath_hol_state != IPATH_HOL_UP) { + dd->ipath_hol_next = IPATH_HOL_DOWNCONT; + ipath_dbg("Stopping processes\n"); + ipath_hol_signal_down(dd); + } else { /* may do "extra" if also in ipath_hol_up() */ + dd->ipath_hol_next = IPATH_HOL_DOWNSTOP; + ipath_dbg("Continuing processes\n"); + ipath_hol_signal_up(dd); + } + if (dd->ipath_hol_state == IPATH_HOL_UP) + ipath_dbg("link's up, don't resched timer\n"); + else { + dd->ipath_hol_timer.expires = jiffies + + msecs_to_jiffies(ipath_hol_timeout_ms); + __mod_timer(&dd->ipath_hol_timer, + dd->ipath_hol_timer.expires); + } +} + int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv) { u64 val; - if ( new_pol_inv > INFINIPATH_XGXS_RX_POL_MASK ) { + + if (new_pol_inv > INFINIPATH_XGXS_RX_POL_MASK) return -1; - } - if ( dd->ipath_rx_pol_inv != new_pol_inv ) { + if (dd->ipath_rx_pol_inv != new_pol_inv) { dd->ipath_rx_pol_inv = new_pol_inv; val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig); val &= ~(INFINIPATH_XGXS_RX_POL_MASK << |