diff options
author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2015-11-04 01:45:16 +0300 |
---|---|---|
committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2015-11-04 01:45:16 +0300 |
commit | 84a73014d86fd660822a20c032625e3afe99ca58 (patch) | |
tree | 9e42a2d3c40730947ae18363d00e8a0ee2743792 /drivers/ntb | |
parent | 195562194aad3a0a3915941077f283bcc6347b9b (diff) | |
parent | bf5f18d708802737fa0db6306f6b9148f85b2efd (diff) | |
download | linux-84a73014d86fd660822a20c032625e3afe99ca58.tar.xz |
Merge branch 'next' into for-linus
Prepare first round of input updates for 4.3 merge window.
Diffstat (limited to 'drivers/ntb')
-rw-r--r-- | drivers/ntb/hw/intel/ntb_hw_intel.c | 39 | ||||
-rw-r--r-- | drivers/ntb/hw/intel/ntb_hw_intel.h | 3 | ||||
-rw-r--r-- | drivers/ntb/ntb.c | 2 | ||||
-rw-r--r-- | drivers/ntb/ntb_transport.c | 325 |
4 files changed, 250 insertions, 119 deletions
diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c index 87751cfd6f4f..865a3e3cc581 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.c +++ b/drivers/ntb/hw/intel/ntb_hw_intel.c @@ -190,14 +190,17 @@ static inline int pdev_is_xeon(struct pci_dev *pdev) case PCI_DEVICE_ID_INTEL_NTB_SS_SNB: case PCI_DEVICE_ID_INTEL_NTB_SS_IVT: case PCI_DEVICE_ID_INTEL_NTB_SS_HSX: + case PCI_DEVICE_ID_INTEL_NTB_SS_BDX: case PCI_DEVICE_ID_INTEL_NTB_PS_JSF: case PCI_DEVICE_ID_INTEL_NTB_PS_SNB: case PCI_DEVICE_ID_INTEL_NTB_PS_IVT: case PCI_DEVICE_ID_INTEL_NTB_PS_HSX: + case PCI_DEVICE_ID_INTEL_NTB_PS_BDX: case PCI_DEVICE_ID_INTEL_NTB_B2B_JSF: case PCI_DEVICE_ID_INTEL_NTB_B2B_SNB: case PCI_DEVICE_ID_INTEL_NTB_B2B_IVT: case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX: + case PCI_DEVICE_ID_INTEL_NTB_B2B_BDX: return 1; } return 0; @@ -237,7 +240,7 @@ static inline int ndev_ignore_unsafe(struct intel_ntb_dev *ndev, static int ndev_mw_to_bar(struct intel_ntb_dev *ndev, int idx) { - if (idx < 0 || idx > ndev->mw_count) + if (idx < 0 || idx >= ndev->mw_count) return -EINVAL; return ndev->reg->mw_bar[idx]; } @@ -572,10 +575,13 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf, "Connection Topology -\t%s\n", ntb_topo_string(ndev->ntb.topo)); - off += scnprintf(buf + off, buf_size - off, - "B2B Offset -\t\t%#lx\n", ndev->b2b_off); - off += scnprintf(buf + off, buf_size - off, - "B2B MW Idx -\t\t%d\n", ndev->b2b_idx); + if (ndev->b2b_idx != UINT_MAX) { + off += scnprintf(buf + off, buf_size - off, + "B2B MW Idx -\t\t%u\n", ndev->b2b_idx); + off += scnprintf(buf + off, buf_size - off, + "B2B Offset -\t\t%#lx\n", ndev->b2b_off); + } + off += scnprintf(buf + off, buf_size - off, "BAR4 Split -\t\t%s\n", ndev->bar4_split ? "yes" : "no"); @@ -1484,7 +1490,7 @@ static int xeon_setup_b2b_mw(struct intel_ntb_dev *ndev, pdev = ndev_pdev(ndev); mmio = ndev->self_mmio; - if (ndev->b2b_idx >= ndev->mw_count) { + if (ndev->b2b_idx == UINT_MAX) { dev_dbg(ndev_dev(ndev), "not using b2b mw\n"); b2b_bar = 0; ndev->b2b_off = 0; @@ -1776,6 +1782,13 @@ static int xeon_init_ntb(struct intel_ntb_dev *ndev) else ndev->b2b_idx = b2b_mw_idx; + if (ndev->b2b_idx >= ndev->mw_count) { + dev_dbg(ndev_dev(ndev), + "b2b_mw_idx %d invalid for mw_count %u\n", + b2b_mw_idx, ndev->mw_count); + return -EINVAL; + } + dev_dbg(ndev_dev(ndev), "setting up b2b mw idx %d means %d\n", b2b_mw_idx, ndev->b2b_idx); @@ -1843,6 +1856,9 @@ static int xeon_init_dev(struct intel_ntb_dev *ndev) case PCI_DEVICE_ID_INTEL_NTB_SS_HSX: case PCI_DEVICE_ID_INTEL_NTB_PS_HSX: case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX: + case PCI_DEVICE_ID_INTEL_NTB_SS_BDX: + case PCI_DEVICE_ID_INTEL_NTB_PS_BDX: + case PCI_DEVICE_ID_INTEL_NTB_B2B_BDX: ndev->hwerr_flags |= NTB_HWERR_SDOORBELL_LOCKUP; break; } @@ -1857,6 +1873,9 @@ static int xeon_init_dev(struct intel_ntb_dev *ndev) case PCI_DEVICE_ID_INTEL_NTB_SS_HSX: case PCI_DEVICE_ID_INTEL_NTB_PS_HSX: case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX: + case PCI_DEVICE_ID_INTEL_NTB_SS_BDX: + case PCI_DEVICE_ID_INTEL_NTB_PS_BDX: + case PCI_DEVICE_ID_INTEL_NTB_B2B_BDX: ndev->hwerr_flags |= NTB_HWERR_SB01BASE_LOCKUP; break; } @@ -1878,6 +1897,9 @@ static int xeon_init_dev(struct intel_ntb_dev *ndev) case PCI_DEVICE_ID_INTEL_NTB_SS_HSX: case PCI_DEVICE_ID_INTEL_NTB_PS_HSX: case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX: + case PCI_DEVICE_ID_INTEL_NTB_SS_BDX: + case PCI_DEVICE_ID_INTEL_NTB_PS_BDX: + case PCI_DEVICE_ID_INTEL_NTB_B2B_BDX: ndev->hwerr_flags |= NTB_HWERR_B2BDOORBELL_BIT14; break; } @@ -1996,7 +2018,7 @@ static inline void ndev_init_struct(struct intel_ntb_dev *ndev, ndev->ntb.ops = &intel_ntb_ops; ndev->b2b_off = 0; - ndev->b2b_idx = INT_MAX; + ndev->b2b_idx = UINT_MAX; ndev->bar4_split = 0; @@ -2234,14 +2256,17 @@ static const struct pci_device_id intel_ntb_pci_tbl[] = { {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_SNB)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_IVT)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_HSX)}, + {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_BDX)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_JSF)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_SNB)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_IVT)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_HSX)}, + {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_BDX)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_JSF)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_SNB)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_IVT)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_HSX)}, + {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_BDX)}, {0} }; MODULE_DEVICE_TABLE(pci, intel_ntb_pci_tbl); diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.h b/drivers/ntb/hw/intel/ntb_hw_intel.h index 7ddaf387b679..ea0612f797df 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.h +++ b/drivers/ntb/hw/intel/ntb_hw_intel.h @@ -67,6 +67,9 @@ #define PCI_DEVICE_ID_INTEL_NTB_PS_HSX 0x2F0E #define PCI_DEVICE_ID_INTEL_NTB_SS_HSX 0x2F0F #define PCI_DEVICE_ID_INTEL_NTB_B2B_BWD 0x0C4E +#define PCI_DEVICE_ID_INTEL_NTB_B2B_BDX 0x6F0D +#define PCI_DEVICE_ID_INTEL_NTB_PS_BDX 0x6F0E +#define PCI_DEVICE_ID_INTEL_NTB_SS_BDX 0x6F0F /* Intel Xeon hardware */ diff --git a/drivers/ntb/ntb.c b/drivers/ntb/ntb.c index 23435f2a5486..2e2530743831 100644 --- a/drivers/ntb/ntb.c +++ b/drivers/ntb/ntb.c @@ -114,7 +114,7 @@ int ntb_register_device(struct ntb_dev *ntb) ntb->dev.bus = &ntb_bus; ntb->dev.parent = &ntb->pdev->dev; ntb->dev.release = ntb_dev_release; - dev_set_name(&ntb->dev, pci_name(ntb->pdev)); + dev_set_name(&ntb->dev, "%s", pci_name(ntb->pdev)); ntb->ctx = NULL; ntb->ctx_ops = NULL; diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index efe3ad4122f2..6e3ee907d186 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -119,7 +119,8 @@ struct ntb_transport_qp { struct ntb_transport_ctx *transport; struct ntb_dev *ndev; void *cb_data; - struct dma_chan *dma_chan; + struct dma_chan *tx_dma_chan; + struct dma_chan *rx_dma_chan; bool client_ready; bool link_is_up; @@ -142,10 +143,11 @@ struct ntb_transport_qp { void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data, void *data, int len); + struct list_head rx_post_q; struct list_head rx_pend_q; struct list_head rx_free_q; - spinlock_t ntb_rx_pend_q_lock; - spinlock_t ntb_rx_free_q_lock; + /* ntb_rx_q_lock: synchronize access to rx_XXXX_q */ + spinlock_t ntb_rx_q_lock; void *rx_buff; unsigned int rx_index; unsigned int rx_max_entry; @@ -211,6 +213,8 @@ struct ntb_transport_ctx { bool link_is_up; struct delayed_work link_work; struct work_struct link_cleanup; + + struct dentry *debugfs_node_dir; }; enum { @@ -294,7 +298,7 @@ static LIST_HEAD(ntb_transport_list); static int ntb_bus_init(struct ntb_transport_ctx *nt) { - list_add(&nt->entry, &ntb_transport_list); + list_add_tail(&nt->entry, &ntb_transport_list); return 0; } @@ -436,16 +440,20 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, char *buf; ssize_t ret, out_offset, out_count; + qp = filp->private_data; + + if (!qp || !qp->link_is_up) + return 0; + out_count = 1000; buf = kmalloc(out_count, GFP_KERNEL); if (!buf) return -ENOMEM; - qp = filp->private_data; out_offset = 0; out_offset += snprintf(buf + out_offset, out_count - out_offset, - "NTB QP stats\n"); + "\nNTB QP stats:\n\n"); out_offset += snprintf(buf + out_offset, out_count - out_offset, "rx_bytes - \t%llu\n", qp->rx_bytes); out_offset += snprintf(buf + out_offset, out_count - out_offset, @@ -463,11 +471,11 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, out_offset += snprintf(buf + out_offset, out_count - out_offset, "rx_err_ver - \t%llu\n", qp->rx_err_ver); out_offset += snprintf(buf + out_offset, out_count - out_offset, - "rx_buff - \t%p\n", qp->rx_buff); + "rx_buff - \t0x%p\n", qp->rx_buff); out_offset += snprintf(buf + out_offset, out_count - out_offset, "rx_index - \t%u\n", qp->rx_index); out_offset += snprintf(buf + out_offset, out_count - out_offset, - "rx_max_entry - \t%u\n", qp->rx_max_entry); + "rx_max_entry - \t%u\n\n", qp->rx_max_entry); out_offset += snprintf(buf + out_offset, out_count - out_offset, "tx_bytes - \t%llu\n", qp->tx_bytes); @@ -482,15 +490,32 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, out_offset += snprintf(buf + out_offset, out_count - out_offset, "tx_err_no_buf - %llu\n", qp->tx_err_no_buf); out_offset += snprintf(buf + out_offset, out_count - out_offset, - "tx_mw - \t%p\n", qp->tx_mw); + "tx_mw - \t0x%p\n", qp->tx_mw); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "tx_index (H) - \t%u\n", qp->tx_index); out_offset += snprintf(buf + out_offset, out_count - out_offset, - "tx_index - \t%u\n", qp->tx_index); + "RRI (T) - \t%u\n", + qp->remote_rx_info->entry); out_offset += snprintf(buf + out_offset, out_count - out_offset, "tx_max_entry - \t%u\n", qp->tx_max_entry); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "free tx - \t%u\n", + ntb_transport_tx_free_entry(qp)); out_offset += snprintf(buf + out_offset, out_count - out_offset, - "\nQP Link %s\n", + "\n"); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "Using TX DMA - \t%s\n", + qp->tx_dma_chan ? "Yes" : "No"); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "Using RX DMA - \t%s\n", + qp->rx_dma_chan ? "Yes" : "No"); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "QP Link - \t%s\n", qp->link_is_up ? "Up" : "Down"); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "\n"); + if (out_offset > out_count) out_offset = out_count; @@ -528,12 +553,34 @@ static struct ntb_queue_entry *ntb_list_rm(spinlock_t *lock, } entry = list_first_entry(list, struct ntb_queue_entry, entry); list_del(&entry->entry); + out: spin_unlock_irqrestore(lock, flags); return entry; } +static struct ntb_queue_entry *ntb_list_mv(spinlock_t *lock, + struct list_head *list, + struct list_head *to_list) +{ + struct ntb_queue_entry *entry; + unsigned long flags; + + spin_lock_irqsave(lock, flags); + + if (list_empty(list)) { + entry = NULL; + } else { + entry = list_first_entry(list, struct ntb_queue_entry, entry); + list_move_tail(&entry->entry, to_list); + } + + spin_unlock_irqrestore(lock, flags); + + return entry; +} + static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt, unsigned int qp_num) { @@ -601,13 +648,16 @@ static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw) } static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw, - unsigned int size) + resource_size_t size) { struct ntb_transport_mw *mw = &nt->mw_vec[num_mw]; struct pci_dev *pdev = nt->ndev->pdev; - unsigned int xlat_size, buff_size; + size_t xlat_size, buff_size; int rc; + if (!size) + return -EINVAL; + xlat_size = round_up(size, mw->xlat_align_size); buff_size = round_up(size, mw->xlat_align); @@ -627,7 +677,7 @@ static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw, if (!mw->virt_addr) { mw->xlat_size = 0; mw->buff_size = 0; - dev_err(&pdev->dev, "Unable to alloc MW buff of size %d\n", + dev_err(&pdev->dev, "Unable to alloc MW buff of size %zu\n", buff_size); return -ENOMEM; } @@ -867,6 +917,8 @@ static void ntb_qp_link_work(struct work_struct *work) if (qp->event_handler) qp->event_handler(qp->cb_data, qp->link_is_up); + + tasklet_schedule(&qp->rxc_db_work); } else if (nt->link_is_up) schedule_delayed_work(&qp->link_work, msecs_to_jiffies(NTB_LINK_DOWN_TIMEOUT)); @@ -923,12 +975,12 @@ static int ntb_transport_init_queue(struct ntb_transport_ctx *nt, qp->tx_max_frame = min(transport_mtu, tx_size / 2); qp->tx_max_entry = tx_size / qp->tx_max_frame; - if (nt_debugfs_dir) { + if (nt->debugfs_node_dir) { char debugfs_name[4]; snprintf(debugfs_name, 4, "qp%d", qp_num); qp->debugfs_dir = debugfs_create_dir(debugfs_name, - nt_debugfs_dir); + nt->debugfs_node_dir); qp->debugfs_stats = debugfs_create_file("stats", S_IRUSR, qp->debugfs_dir, qp, @@ -941,10 +993,10 @@ static int ntb_transport_init_queue(struct ntb_transport_ctx *nt, INIT_DELAYED_WORK(&qp->link_work, ntb_qp_link_work); INIT_WORK(&qp->link_cleanup, ntb_qp_link_cleanup_work); - spin_lock_init(&qp->ntb_rx_pend_q_lock); - spin_lock_init(&qp->ntb_rx_free_q_lock); + spin_lock_init(&qp->ntb_rx_q_lock); spin_lock_init(&qp->ntb_tx_free_q_lock); + INIT_LIST_HEAD(&qp->rx_post_q); INIT_LIST_HEAD(&qp->rx_pend_q); INIT_LIST_HEAD(&qp->rx_free_q); INIT_LIST_HEAD(&qp->tx_free_q); @@ -1031,6 +1083,12 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev) goto err2; } + if (nt_debugfs_dir) { + nt->debugfs_node_dir = + debugfs_create_dir(pci_name(ndev->pdev), + nt_debugfs_dir); + } + for (i = 0; i < qp_count; i++) { rc = ntb_transport_init_queue(nt, i); if (rc) @@ -1107,22 +1165,47 @@ static void ntb_transport_free(struct ntb_client *self, struct ntb_dev *ndev) kfree(nt); } -static void ntb_rx_copy_callback(void *data) +static void ntb_complete_rxc(struct ntb_transport_qp *qp) { - struct ntb_queue_entry *entry = data; - struct ntb_transport_qp *qp = entry->qp; - void *cb_data = entry->cb_data; - unsigned int len = entry->len; - struct ntb_payload_header *hdr = entry->rx_hdr; + struct ntb_queue_entry *entry; + void *cb_data; + unsigned int len; + unsigned long irqflags; + + spin_lock_irqsave(&qp->ntb_rx_q_lock, irqflags); + + while (!list_empty(&qp->rx_post_q)) { + entry = list_first_entry(&qp->rx_post_q, + struct ntb_queue_entry, entry); + if (!(entry->flags & DESC_DONE_FLAG)) + break; + + entry->rx_hdr->flags = 0; + iowrite32(entry->index, &qp->rx_info->entry); + + cb_data = entry->cb_data; + len = entry->len; - hdr->flags = 0; + list_move_tail(&entry->entry, &qp->rx_free_q); + + spin_unlock_irqrestore(&qp->ntb_rx_q_lock, irqflags); + + if (qp->rx_handler && qp->client_ready) + qp->rx_handler(qp, qp->cb_data, cb_data, len); + + spin_lock_irqsave(&qp->ntb_rx_q_lock, irqflags); + } + + spin_unlock_irqrestore(&qp->ntb_rx_q_lock, irqflags); +} - iowrite32(entry->index, &qp->rx_info->entry); +static void ntb_rx_copy_callback(void *data) +{ + struct ntb_queue_entry *entry = data; - ntb_list_add(&qp->ntb_rx_free_q_lock, &entry->entry, &qp->rx_free_q); + entry->flags |= DESC_DONE_FLAG; - if (qp->rx_handler && qp->client_ready) - qp->rx_handler(qp, qp->cb_data, cb_data, len); + ntb_complete_rxc(entry->qp); } static void ntb_memcpy_rx(struct ntb_queue_entry *entry, void *offset) @@ -1138,36 +1221,35 @@ static void ntb_memcpy_rx(struct ntb_queue_entry *entry, void *offset) ntb_rx_copy_callback(entry); } -static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset, - size_t len) +static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset) { struct dma_async_tx_descriptor *txd; struct ntb_transport_qp *qp = entry->qp; - struct dma_chan *chan = qp->dma_chan; + struct dma_chan *chan = qp->rx_dma_chan; struct dma_device *device; - size_t pay_off, buff_off; + size_t pay_off, buff_off, len; struct dmaengine_unmap_data *unmap; dma_cookie_t cookie; void *buf = entry->buf; - entry->len = len; + len = entry->len; if (!chan) goto err; if (len < copy_bytes) - goto err_wait; + goto err; device = chan->device; pay_off = (size_t)offset & ~PAGE_MASK; buff_off = (size_t)buf & ~PAGE_MASK; if (!is_dma_copy_aligned(device, pay_off, buff_off, len)) - goto err_wait; + goto err; unmap = dmaengine_get_unmap_data(device->dev, 2, GFP_NOWAIT); if (!unmap) - goto err_wait; + goto err; unmap->len = len; unmap->addr[0] = dma_map_page(device->dev, virt_to_page(offset), @@ -1210,12 +1292,6 @@ err_set_unmap: dmaengine_unmap_put(unmap); err_get_unmap: dmaengine_unmap_put(unmap); -err_wait: - /* If the callbacks come out of order, the writing of the index to the - * last completed will be out of order. This may result in the - * receive stalling forever. - */ - dma_sync_wait(chan, qp->last_cookie); err: ntb_memcpy_rx(entry, offset); qp->rx_memcpy++; @@ -1226,7 +1302,6 @@ static int ntb_process_rxc(struct ntb_transport_qp *qp) struct ntb_payload_header *hdr; struct ntb_queue_entry *entry; void *offset; - int rc; offset = qp->rx_buff + qp->rx_max_frame * qp->rx_index; hdr = offset + qp->rx_max_frame - sizeof(struct ntb_payload_header); @@ -1255,65 +1330,43 @@ static int ntb_process_rxc(struct ntb_transport_qp *qp) return -EIO; } - entry = ntb_list_rm(&qp->ntb_rx_pend_q_lock, &qp->rx_pend_q); + entry = ntb_list_mv(&qp->ntb_rx_q_lock, &qp->rx_pend_q, &qp->rx_post_q); if (!entry) { dev_dbg(&qp->ndev->pdev->dev, "no receive buffer\n"); qp->rx_err_no_buf++; - - rc = -ENOMEM; - goto err; + return -EAGAIN; } + entry->rx_hdr = hdr; + entry->index = qp->rx_index; + if (hdr->len > entry->len) { dev_dbg(&qp->ndev->pdev->dev, "receive buffer overflow! Wanted %d got %d\n", hdr->len, entry->len); qp->rx_err_oflow++; - rc = -EIO; - goto err; - } + entry->len = -EIO; + entry->flags |= DESC_DONE_FLAG; - dev_dbg(&qp->ndev->pdev->dev, - "RX OK index %u ver %u size %d into buf size %d\n", - qp->rx_index, hdr->ver, hdr->len, entry->len); + ntb_complete_rxc(qp); + } else { + dev_dbg(&qp->ndev->pdev->dev, + "RX OK index %u ver %u size %d into buf size %d\n", + qp->rx_index, hdr->ver, hdr->len, entry->len); - qp->rx_bytes += hdr->len; - qp->rx_pkts++; + qp->rx_bytes += hdr->len; + qp->rx_pkts++; - entry->index = qp->rx_index; - entry->rx_hdr = hdr; + entry->len = hdr->len; - ntb_async_rx(entry, offset, hdr->len); + ntb_async_rx(entry, offset); + } qp->rx_index++; qp->rx_index %= qp->rx_max_entry; return 0; - -err: - /* FIXME: if this syncrhonous update of the rx_index gets ahead of - * asyncrhonous ntb_rx_copy_callback of previous entry, there are three - * scenarios: - * - * 1) The peer might miss this update, but observe the update - * from the memcpy completion callback. In this case, the buffer will - * not be freed on the peer to be reused for a different packet. The - * successful rx of a later packet would clear the condition, but the - * condition could persist if several rx fail in a row. - * - * 2) The peer may observe this update before the asyncrhonous copy of - * prior packets is completed. The peer may overwrite the buffers of - * the prior packets before they are copied. - * - * 3) Both: the peer may observe the update, and then observe the index - * decrement by the asynchronous completion callback. Who knows what - * badness that will cause. - */ - hdr->flags = 0; - iowrite32(qp->rx_index, &qp->rx_info->entry); - - return rc; } static void ntb_transport_rxc_db(unsigned long data) @@ -1333,8 +1386,8 @@ static void ntb_transport_rxc_db(unsigned long data) break; } - if (qp->dma_chan) - dma_async_issue_pending(qp->dma_chan); + if (i && qp->rx_dma_chan) + dma_async_issue_pending(qp->rx_dma_chan); if (i == qp->rx_max_entry) { /* there is more work to do */ @@ -1401,7 +1454,7 @@ static void ntb_async_tx(struct ntb_transport_qp *qp, { struct ntb_payload_header __iomem *hdr; struct dma_async_tx_descriptor *txd; - struct dma_chan *chan = qp->dma_chan; + struct dma_chan *chan = qp->tx_dma_chan; struct dma_device *device; size_t dest_off, buff_off; struct dmaengine_unmap_data *unmap; @@ -1594,14 +1647,27 @@ ntb_transport_create_queue(void *data, struct device *client_dev, dma_cap_set(DMA_MEMCPY, dma_mask); if (use_dma) { - qp->dma_chan = dma_request_channel(dma_mask, ntb_dma_filter_fn, - (void *)(unsigned long)node); - if (!qp->dma_chan) - dev_info(&pdev->dev, "Unable to allocate DMA channel\n"); + qp->tx_dma_chan = + dma_request_channel(dma_mask, ntb_dma_filter_fn, + (void *)(unsigned long)node); + if (!qp->tx_dma_chan) + dev_info(&pdev->dev, "Unable to allocate TX DMA channel\n"); + + qp->rx_dma_chan = + dma_request_channel(dma_mask, ntb_dma_filter_fn, + (void *)(unsigned long)node); + if (!qp->rx_dma_chan) + dev_info(&pdev->dev, "Unable to allocate RX DMA channel\n"); } else { - qp->dma_chan = NULL; + qp->tx_dma_chan = NULL; + qp->rx_dma_chan = NULL; } - dev_dbg(&pdev->dev, "Using %s memcpy\n", qp->dma_chan ? "DMA" : "CPU"); + + dev_dbg(&pdev->dev, "Using %s memcpy for TX\n", + qp->tx_dma_chan ? "DMA" : "CPU"); + + dev_dbg(&pdev->dev, "Using %s memcpy for RX\n", + qp->rx_dma_chan ? "DMA" : "CPU"); for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) { entry = kzalloc_node(sizeof(*entry), GFP_ATOMIC, node); @@ -1609,7 +1675,7 @@ ntb_transport_create_queue(void *data, struct device *client_dev, goto err1; entry->qp = qp; - ntb_list_add(&qp->ntb_rx_free_q_lock, &entry->entry, + ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_free_q); } @@ -1634,10 +1700,12 @@ err2: while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q))) kfree(entry); err1: - while ((entry = ntb_list_rm(&qp->ntb_rx_free_q_lock, &qp->rx_free_q))) + while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q))) kfree(entry); - if (qp->dma_chan) - dma_release_channel(qp->dma_chan); + if (qp->tx_dma_chan) + dma_release_channel(qp->tx_dma_chan); + if (qp->rx_dma_chan) + dma_release_channel(qp->rx_dma_chan); nt->qp_bitmap_free |= qp_bit; err: return NULL; @@ -1652,7 +1720,6 @@ EXPORT_SYMBOL_GPL(ntb_transport_create_queue); */ void ntb_transport_free_queue(struct ntb_transport_qp *qp) { - struct ntb_transport_ctx *nt = qp->transport; struct pci_dev *pdev; struct ntb_queue_entry *entry; u64 qp_bit; @@ -1662,12 +1729,27 @@ void ntb_transport_free_queue(struct ntb_transport_qp *qp) pdev = qp->ndev->pdev; - if (qp->dma_chan) { - struct dma_chan *chan = qp->dma_chan; + if (qp->tx_dma_chan) { + struct dma_chan *chan = qp->tx_dma_chan; /* Putting the dma_chan to NULL will force any new traffic to be * processed by the CPU instead of the DAM engine */ - qp->dma_chan = NULL; + qp->tx_dma_chan = NULL; + + /* Try to be nice and wait for any queued DMA engine + * transactions to process before smashing it with a rock + */ + dma_sync_wait(chan, qp->last_cookie); + dmaengine_terminate_all(chan); + dma_release_channel(chan); + } + + if (qp->rx_dma_chan) { + struct dma_chan *chan = qp->rx_dma_chan; + /* Putting the dma_chan to NULL will force any new traffic to be + * processed by the CPU instead of the DAM engine + */ + qp->rx_dma_chan = NULL; /* Try to be nice and wait for any queued DMA engine * transactions to process before smashing it with a rock @@ -1689,18 +1771,23 @@ void ntb_transport_free_queue(struct ntb_transport_qp *qp) qp->tx_handler = NULL; qp->event_handler = NULL; - while ((entry = ntb_list_rm(&qp->ntb_rx_free_q_lock, &qp->rx_free_q))) + while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q))) + kfree(entry); + + while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_pend_q))) { + dev_warn(&pdev->dev, "Freeing item from non-empty rx_pend_q\n"); kfree(entry); + } - while ((entry = ntb_list_rm(&qp->ntb_rx_pend_q_lock, &qp->rx_pend_q))) { - dev_warn(&pdev->dev, "Freeing item from a non-empty queue\n"); + while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_post_q))) { + dev_warn(&pdev->dev, "Freeing item from non-empty rx_post_q\n"); kfree(entry); } while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q))) kfree(entry); - nt->qp_bitmap_free |= qp_bit; + qp->transport->qp_bitmap_free |= qp_bit; dev_info(&pdev->dev, "NTB Transport QP %d freed\n", qp->qp_num); } @@ -1724,14 +1811,14 @@ void *ntb_transport_rx_remove(struct ntb_transport_qp *qp, unsigned int *len) if (!qp || qp->client_ready) return NULL; - entry = ntb_list_rm(&qp->ntb_rx_pend_q_lock, &qp->rx_pend_q); + entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_pend_q); if (!entry) return NULL; buf = entry->cb_data; *len = entry->len; - ntb_list_add(&qp->ntb_rx_free_q_lock, &entry->entry, &qp->rx_free_q); + ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_free_q); return buf; } @@ -1757,15 +1844,18 @@ int ntb_transport_rx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data, if (!qp) return -EINVAL; - entry = ntb_list_rm(&qp->ntb_rx_free_q_lock, &qp->rx_free_q); + entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q); if (!entry) return -ENOMEM; entry->cb_data = cb; entry->buf = data; entry->len = len; + entry->flags = 0; + + ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_pend_q); - ntb_list_add(&qp->ntb_rx_pend_q_lock, &entry->entry, &qp->rx_pend_q); + tasklet_schedule(&qp->rxc_db_work); return 0; } @@ -1796,7 +1886,7 @@ int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data, entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q); if (!entry) { qp->tx_err_no_buf++; - return -ENOMEM; + return -EBUSY; } entry->cb_data = cb; @@ -1907,21 +1997,34 @@ EXPORT_SYMBOL_GPL(ntb_transport_qp_num); unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp) { unsigned int max; + unsigned int copy_align; if (!qp) return 0; - if (!qp->dma_chan) + if (!qp->tx_dma_chan && !qp->rx_dma_chan) return qp->tx_max_frame - sizeof(struct ntb_payload_header); + copy_align = max(qp->tx_dma_chan->device->copy_align, + qp->rx_dma_chan->device->copy_align); + /* If DMA engine usage is possible, try to find the max size for that */ max = qp->tx_max_frame - sizeof(struct ntb_payload_header); - max -= max % (1 << qp->dma_chan->device->copy_align); + max -= max % (1 << copy_align); return max; } EXPORT_SYMBOL_GPL(ntb_transport_max_size); +unsigned int ntb_transport_tx_free_entry(struct ntb_transport_qp *qp) +{ + unsigned int head = qp->tx_index; + unsigned int tail = qp->remote_rx_info->entry; + + return tail > head ? tail - head : qp->tx_max_entry + tail - head; +} +EXPORT_SYMBOL_GPL(ntb_transport_tx_free_entry); + static void ntb_transport_doorbell_callback(void *data, int vector) { struct ntb_transport_ctx *nt = data; |