From 50f9f97e70fa4679fa197cb6dea358329298b987 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 4 Mar 2013 10:59:54 -0700 Subject: ioatdma: make debug output more readable Making OP field a hex instead of integer to make it more readable. Also add the dump out of the NEXT field. Signed-off-by: Dave Jiang Acked-by: Dan Williams Signed-off-by: Vinod Koul --- drivers/dma/ioat/dma.h | 2 +- drivers/dma/ioat/dma_v3.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/dma/ioat') diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index 53a4cbb78f47..95ae7b3139ec 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -179,7 +179,7 @@ __dump_desc_dbg(struct ioat_chan_common *chan, struct ioat_dma_descriptor *hw, struct device *dev = to_dev(chan); dev_dbg(dev, "desc[%d]: (%#llx->%#llx) cookie: %d flags: %#x" - " ctl: %#x (op: %d int_en: %d compl: %d)\n", id, + " ctl: %#10.8x (op: %#x int_en: %d compl: %d)\n", id, (unsigned long long) tx->phys, (unsigned long long) hw->next, tx->cookie, tx->flags, hw->ctl, hw->ctl_f.op, hw->ctl_f.int_en, hw->ctl_f.compl_write); diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index e8336cce360b..dc8dcfb7a8a6 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -605,7 +605,7 @@ dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct int i; dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" - " sz: %#x ctl: %#x (op: %d int: %d compl: %d pq: '%s%s' src_cnt: %d)\n", + " sz: %#10.8x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s' src_cnt: %d)\n", desc_id(desc), (unsigned long long) desc->txd.phys, (unsigned long long) (pq_ex ? pq_ex->next : pq->next), desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en, @@ -617,6 +617,7 @@ dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct (unsigned long long) pq_get_src(descs, i), pq->coef[i]); dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); + dev_dbg(dev, "\tNEXT: %#llx\n", pq->next); } static struct dma_async_tx_descriptor * -- cgit v1.2.3 From 9a37f644046c2f5b7889be642487e6f9dd58c5d7 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 26 Feb 2013 09:20:36 -0700 Subject: ioatdma: allow all channels to have irq coalescing support Looks like only the RAID channels are allowed to have irq coalescing support in the existing code. Fixing that. The ioat3 cleanup code can handle memcpy ops anyways Signed-off-by: Dave Jiang Acked-by: Dan Williams Signed-off-by: Vinod Koul --- drivers/dma/ioat/dma_v3.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'drivers/dma/ioat') diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index dc8dcfb7a8a6..a1dcfb2670f7 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -1331,15 +1331,9 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca) } - if (is_raid_device) { - dma->device_tx_status = ioat3_tx_status; - device->cleanup_fn = ioat3_cleanup_event; - device->timer_fn = ioat3_timer_event; - } else { - dma->device_tx_status = ioat_dma_tx_status; - device->cleanup_fn = ioat2_cleanup_event; - device->timer_fn = ioat2_timer_event; - } + dma->device_tx_status = ioat3_tx_status; + device->cleanup_fn = ioat3_cleanup_event; + device->timer_fn = ioat3_timer_event; #ifdef CONFIG_ASYNC_TX_DISABLE_PQ_VAL_DMA dma_cap_clear(DMA_PQ_VAL, dma->cap_mask); -- cgit v1.2.3 From 570727b5520878d1263e33f118463d77d7fd92d1 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 25 Mar 2013 14:37:31 -0700 Subject: ioatdma: Adding Haswell devid for ioatdma Adding Haswell PCI device IDs for ioatdma and simplify the detection of certain Xeon CPUs that has alignment bugs so that modifications can be changed at a single place going forward. Signed-off-by: Dave Jiang Acked-by: Dan Williams Signed-off-by: Vinod Koul --- drivers/dma/ioat/dma_v3.c | 28 +++++++++++++++++++++++++++- drivers/dma/ioat/hw.h | 22 +++++++++++++++++----- drivers/dma/ioat/pci.c | 11 +++++++++++ 3 files changed, 55 insertions(+), 6 deletions(-) (limited to 'drivers/dma/ioat') diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index a1dcfb2670f7..ab5655eb0602 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -1260,6 +1260,32 @@ static bool is_ivb_ioat(struct pci_dev *pdev) } +static bool is_hsw_ioat(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_HSW0: + case PCI_DEVICE_ID_INTEL_IOAT_HSW1: + case PCI_DEVICE_ID_INTEL_IOAT_HSW2: + case PCI_DEVICE_ID_INTEL_IOAT_HSW3: + case PCI_DEVICE_ID_INTEL_IOAT_HSW4: + case PCI_DEVICE_ID_INTEL_IOAT_HSW5: + case PCI_DEVICE_ID_INTEL_IOAT_HSW6: + case PCI_DEVICE_ID_INTEL_IOAT_HSW7: + case PCI_DEVICE_ID_INTEL_IOAT_HSW8: + case PCI_DEVICE_ID_INTEL_IOAT_HSW9: + return true; + default: + return false; + } + +} + +static bool is_xeon_cb32(struct pci_dev *pdev) +{ + return is_jf_ioat(pdev) || is_snb_ioat(pdev) || is_ivb_ioat(pdev) || + is_hsw_ioat(pdev); +} + int ioat3_dma_probe(struct ioatdma_device *device, int dca) { struct pci_dev *pdev = device->pdev; @@ -1280,7 +1306,7 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca) dma->device_alloc_chan_resources = ioat2_alloc_chan_resources; dma->device_free_chan_resources = ioat2_free_chan_resources; - if (is_jf_ioat(pdev) || is_snb_ioat(pdev) || is_ivb_ioat(pdev)) + if (is_xeon_cb32(pdev)) dma->copy_align = 6; dma_cap_set(DMA_INTERRUPT, dma->cap_mask); diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h index 7cb74c62c719..8cfa07789888 100644 --- a/drivers/dma/ioat/hw.h +++ b/drivers/dma/ioat/hw.h @@ -30,11 +30,6 @@ #define IOAT_PCI_DID_SCNB 0x65FF #define IOAT_PCI_DID_SNB 0x402F -#define IOAT_VER_1_2 0x12 /* Version 1.2 */ -#define IOAT_VER_2_0 0x20 /* Version 2.0 */ -#define IOAT_VER_3_0 0x30 /* Version 3.0 */ -#define IOAT_VER_3_2 0x32 /* Version 3.2 */ - #define PCI_DEVICE_ID_INTEL_IOAT_IVB0 0x0e20 #define PCI_DEVICE_ID_INTEL_IOAT_IVB1 0x0e21 #define PCI_DEVICE_ID_INTEL_IOAT_IVB2 0x0e22 @@ -46,6 +41,23 @@ #define PCI_DEVICE_ID_INTEL_IOAT_IVB8 0x0e2e #define PCI_DEVICE_ID_INTEL_IOAT_IVB9 0x0e2f +#define PCI_DEVICE_ID_INTEL_IOAT_HSW0 0x2f20 +#define PCI_DEVICE_ID_INTEL_IOAT_HSW1 0x2f21 +#define PCI_DEVICE_ID_INTEL_IOAT_HSW2 0x2f22 +#define PCI_DEVICE_ID_INTEL_IOAT_HSW3 0x2f23 +#define PCI_DEVICE_ID_INTEL_IOAT_HSW4 0x2f24 +#define PCI_DEVICE_ID_INTEL_IOAT_HSW5 0x2f25 +#define PCI_DEVICE_ID_INTEL_IOAT_HSW6 0x2f26 +#define PCI_DEVICE_ID_INTEL_IOAT_HSW7 0x2f27 +#define PCI_DEVICE_ID_INTEL_IOAT_HSW8 0x2f2e +#define PCI_DEVICE_ID_INTEL_IOAT_HSW9 0x2f2f + +#define IOAT_VER_1_2 0x12 /* Version 1.2 */ +#define IOAT_VER_2_0 0x20 /* Version 2.0 */ +#define IOAT_VER_3_0 0x30 /* Version 3.0 */ +#define IOAT_VER_3_2 0x32 /* Version 3.2 */ + + int system_has_dca_enabled(struct pci_dev *pdev); struct ioat_dma_descriptor { diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c index 71c7ecd80fac..67c8e83bd90b 100644 --- a/drivers/dma/ioat/pci.c +++ b/drivers/dma/ioat/pci.c @@ -94,6 +94,17 @@ static struct pci_device_id ioat_pci_tbl[] = { { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB8) }, { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB9) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW7) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW8) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW9) }, + { 0, } }; MODULE_DEVICE_TABLE(pci, ioat_pci_tbl); -- cgit v1.2.3 From 0132bcef76301a8dc7794c893fd9342a04059082 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 26 Mar 2013 15:42:35 -0700 Subject: ioatdma: Adding PCI IDs for Intel Atom S1200 product family ioatdma devices These should be good for the IOAT DMA devices on the Intel Atom S1269, S1279, and S1289 platforms. We are also adding IOAT v3.3 definition for the new DMA engine. Signed-off-by: Dave Jiang Acked-by: Dan Williams Signed-off-by: Vinod Koul --- drivers/dma/ioat/hw.h | 6 ++++++ drivers/dma/ioat/pci.c | 6 ++++++ 2 files changed, 12 insertions(+) (limited to 'drivers/dma/ioat') diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h index 8cfa07789888..ce431f5a9b2a 100644 --- a/drivers/dma/ioat/hw.h +++ b/drivers/dma/ioat/hw.h @@ -52,10 +52,16 @@ #define PCI_DEVICE_ID_INTEL_IOAT_HSW8 0x2f2e #define PCI_DEVICE_ID_INTEL_IOAT_HSW9 0x2f2f +#define PCI_DEVICE_ID_INTEL_IOAT_BWD0 0x0C50 +#define PCI_DEVICE_ID_INTEL_IOAT_BWD1 0x0C51 +#define PCI_DEVICE_ID_INTEL_IOAT_BWD2 0x0C52 +#define PCI_DEVICE_ID_INTEL_IOAT_BWD3 0x0C53 + #define IOAT_VER_1_2 0x12 /* Version 1.2 */ #define IOAT_VER_2_0 0x20 /* Version 2.0 */ #define IOAT_VER_3_0 0x30 /* Version 3.0 */ #define IOAT_VER_3_2 0x32 /* Version 3.2 */ +#define IOAT_VER_3_3 0x33 /* Version 3.3 */ int system_has_dca_enabled(struct pci_dev *pdev); diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c index 67c8e83bd90b..1f632968d4fb 100644 --- a/drivers/dma/ioat/pci.c +++ b/drivers/dma/ioat/pci.c @@ -105,6 +105,12 @@ static struct pci_device_id ioat_pci_tbl[] = { { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW8) }, { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW9) }, + /* I/OAT v3.3 platforms */ + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD3) }, + { 0, } }; MODULE_DEVICE_TABLE(pci, ioat_pci_tbl); -- cgit v1.2.3 From d92a8d7cbb6941d5d985ccb3453a2ac5c92f60e4 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 26 Mar 2013 15:42:41 -0700 Subject: ioatdma: Add 64bit chansts register read for ioat v3.3. The channel status register for v3.3 is now 64bit. Use readq if available on v3.3 platforms. Signed-off-by: Dave Jiang Acked-by: Dan Williams Signed-off-by: Vinod Koul --- drivers/dma/ioat/dma.h | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'drivers/dma/ioat') diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index 95ae7b3139ec..9285caadf825 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -201,7 +201,7 @@ ioat_chan_by_index(struct ioatdma_device *device, int index) return device->idx[index]; } -static inline u64 ioat_chansts(struct ioat_chan_common *chan) +static inline u64 ioat_chansts_32(struct ioat_chan_common *chan) { u8 ver = chan->device->version; u64 status; @@ -218,6 +218,26 @@ static inline u64 ioat_chansts(struct ioat_chan_common *chan) return status; } +#if BITS_PER_LONG == 64 + +static inline u64 ioat_chansts(struct ioat_chan_common *chan) +{ + u8 ver = chan->device->version; + u64 status; + + /* With IOAT v3.3 the status register is 64bit. */ + if (ver >= IOAT_VER_3_3) + status = readq(chan->reg_base + IOAT_CHANSTS_OFFSET(ver)); + else + status = ioat_chansts_32(chan); + + return status; +} + +#else +#define ioat_chansts ioat_chansts_32 +#endif + static inline void ioat_start(struct ioat_chan_common *chan) { u8 ver = chan->device->version; -- cgit v1.2.3 From 8a52b9ff1154a68b6a2a8da9a31a87e52f5f6418 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 26 Mar 2013 15:42:47 -0700 Subject: ioatdma: channel reset scheme fixup on Intel Atom S1200 platforms The Intel Atom S1200 family ioatdma changed the channel reset behavior. It does a reset similar to PCI FLR by resetting all the MSIX registers. We have to re-init msix interrupts because of this. This workaround is only specific to this platform and is not expected to carry over to the later generations. Signed-off-by: Dave Jiang Acked-by: Dan Williams Signed-off-by: Vinod Koul --- drivers/dma/ioat/dma.c | 8 +- drivers/dma/ioat/dma.h | 10 ++ drivers/dma/ioat/dma_v3.c | 236 ++++++++++++++++++++++++++++++---------------- 3 files changed, 171 insertions(+), 83 deletions(-) (limited to 'drivers/dma/ioat') diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index 1879a5942bfc..17a2393b3e25 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -892,7 +892,7 @@ MODULE_PARM_DESC(ioat_interrupt_style, * ioat_dma_setup_interrupts - setup interrupt handler * @device: ioat device */ -static int ioat_dma_setup_interrupts(struct ioatdma_device *device) +int ioat_dma_setup_interrupts(struct ioatdma_device *device) { struct ioat_chan_common *chan; struct pci_dev *pdev = device->pdev; @@ -941,6 +941,7 @@ msix: } } intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL; + device->irq_mode = IOAT_MSIX; goto done; msix_single_vector: @@ -956,6 +957,7 @@ msix_single_vector: pci_disable_msix(pdev); goto msi; } + device->irq_mode = IOAT_MSIX_SINGLE; goto done; msi: @@ -969,6 +971,7 @@ msi: pci_disable_msi(pdev); goto intx; } + device->irq_mode = IOAT_MSIX; goto done; intx: @@ -977,6 +980,7 @@ intx: if (err) goto err_no_irq; + device->irq_mode = IOAT_INTX; done: if (device->intr_quirk) device->intr_quirk(device); @@ -987,9 +991,11 @@ done: err_no_irq: /* Disable all interrupt generation */ writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); + device->irq_mode = IOAT_NOIRQ; dev_err(dev, "no usable interrupts\n"); return err; } +EXPORT_SYMBOL(ioat_dma_setup_interrupts); static void ioat_disable_interrupts(struct ioatdma_device *device) { diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index 9285caadf825..b16902cd2684 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -48,6 +48,14 @@ */ #define NULL_DESC_BUFFER_SIZE 1 +enum ioat_irq_mode { + IOAT_NOIRQ = 0, + IOAT_MSIX, + IOAT_MSIX_SINGLE, + IOAT_MSI, + IOAT_INTX +}; + /** * struct ioatdma_device - internal representation of a IOAT device * @pdev: PCI-Express device @@ -77,6 +85,7 @@ struct ioatdma_device { struct msix_entry msix_entries[4]; struct ioat_chan_common *idx[4]; struct dca_provider *dca; + enum ioat_irq_mode irq_mode; void (*intr_quirk)(struct ioatdma_device *device); int (*enumerate_channels)(struct ioatdma_device *device); int (*reset_hw)(struct ioat_chan_common *chan); @@ -341,6 +350,7 @@ bool ioat_cleanup_preamble(struct ioat_chan_common *chan, dma_addr_t *phys_complete); void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type); void ioat_kobject_del(struct ioatdma_device *device); +int ioat_dma_setup_interrupts(struct ioatdma_device *device); extern const struct sysfs_ops ioat_sysfs_ops; extern struct ioat_sysfs_entry ioat_version_attr; extern struct ioat_sysfs_entry ioat_cap_attr; diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index ab5655eb0602..65b912aa1012 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -111,6 +111,103 @@ static void pq_set_src(struct ioat_raw_descriptor *descs[2], pq->coef[idx] = coef; } +static bool is_jf_ioat(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_JSF0: + case PCI_DEVICE_ID_INTEL_IOAT_JSF1: + case PCI_DEVICE_ID_INTEL_IOAT_JSF2: + case PCI_DEVICE_ID_INTEL_IOAT_JSF3: + case PCI_DEVICE_ID_INTEL_IOAT_JSF4: + case PCI_DEVICE_ID_INTEL_IOAT_JSF5: + case PCI_DEVICE_ID_INTEL_IOAT_JSF6: + case PCI_DEVICE_ID_INTEL_IOAT_JSF7: + case PCI_DEVICE_ID_INTEL_IOAT_JSF8: + case PCI_DEVICE_ID_INTEL_IOAT_JSF9: + return true; + default: + return false; + } +} + +static bool is_snb_ioat(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_SNB0: + case PCI_DEVICE_ID_INTEL_IOAT_SNB1: + case PCI_DEVICE_ID_INTEL_IOAT_SNB2: + case PCI_DEVICE_ID_INTEL_IOAT_SNB3: + case PCI_DEVICE_ID_INTEL_IOAT_SNB4: + case PCI_DEVICE_ID_INTEL_IOAT_SNB5: + case PCI_DEVICE_ID_INTEL_IOAT_SNB6: + case PCI_DEVICE_ID_INTEL_IOAT_SNB7: + case PCI_DEVICE_ID_INTEL_IOAT_SNB8: + case PCI_DEVICE_ID_INTEL_IOAT_SNB9: + return true; + default: + return false; + } +} + +static bool is_ivb_ioat(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_IVB0: + case PCI_DEVICE_ID_INTEL_IOAT_IVB1: + case PCI_DEVICE_ID_INTEL_IOAT_IVB2: + case PCI_DEVICE_ID_INTEL_IOAT_IVB3: + case PCI_DEVICE_ID_INTEL_IOAT_IVB4: + case PCI_DEVICE_ID_INTEL_IOAT_IVB5: + case PCI_DEVICE_ID_INTEL_IOAT_IVB6: + case PCI_DEVICE_ID_INTEL_IOAT_IVB7: + case PCI_DEVICE_ID_INTEL_IOAT_IVB8: + case PCI_DEVICE_ID_INTEL_IOAT_IVB9: + return true; + default: + return false; + } + +} + +static bool is_hsw_ioat(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_HSW0: + case PCI_DEVICE_ID_INTEL_IOAT_HSW1: + case PCI_DEVICE_ID_INTEL_IOAT_HSW2: + case PCI_DEVICE_ID_INTEL_IOAT_HSW3: + case PCI_DEVICE_ID_INTEL_IOAT_HSW4: + case PCI_DEVICE_ID_INTEL_IOAT_HSW5: + case PCI_DEVICE_ID_INTEL_IOAT_HSW6: + case PCI_DEVICE_ID_INTEL_IOAT_HSW7: + case PCI_DEVICE_ID_INTEL_IOAT_HSW8: + case PCI_DEVICE_ID_INTEL_IOAT_HSW9: + return true; + default: + return false; + } + +} + +static bool is_xeon_cb32(struct pci_dev *pdev) +{ + return is_jf_ioat(pdev) || is_snb_ioat(pdev) || is_ivb_ioat(pdev) || + is_hsw_ioat(pdev); +} + +static bool is_bwd_ioat(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_BWD0: + case PCI_DEVICE_ID_INTEL_IOAT_BWD1: + case PCI_DEVICE_ID_INTEL_IOAT_BWD2: + case PCI_DEVICE_ID_INTEL_IOAT_BWD3: + return true; + default: + return false; + } +} + static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, int idx) { @@ -1168,6 +1265,56 @@ static int ioat3_dma_self_test(struct ioatdma_device *device) return 0; } +static int ioat3_irq_reinit(struct ioatdma_device *device) +{ + int msixcnt = device->common.chancnt; + struct pci_dev *pdev = device->pdev; + int i; + struct msix_entry *msix; + struct ioat_chan_common *chan; + int err = 0; + + switch (device->irq_mode) { + case IOAT_MSIX: + + for (i = 0; i < msixcnt; i++) { + msix = &device->msix_entries[i]; + chan = ioat_chan_by_index(device, i); + devm_free_irq(&pdev->dev, msix->vector, chan); + } + + pci_disable_msix(pdev); + break; + + case IOAT_MSIX_SINGLE: + msix = &device->msix_entries[0]; + chan = ioat_chan_by_index(device, 0); + devm_free_irq(&pdev->dev, msix->vector, chan); + pci_disable_msix(pdev); + break; + + case IOAT_MSI: + chan = ioat_chan_by_index(device, 0); + devm_free_irq(&pdev->dev, pdev->irq, chan); + pci_disable_msi(pdev); + break; + + case IOAT_INTX: + chan = ioat_chan_by_index(device, 0); + devm_free_irq(&pdev->dev, pdev->irq, chan); + break; + + default: + return 0; + } + + device->irq_mode = IOAT_NOIRQ; + + err = ioat_dma_setup_interrupts(device); + + return err; +} + static int ioat3_reset_hw(struct ioat_chan_common *chan) { /* throw away whatever the channel was doing and get it @@ -1199,91 +1346,16 @@ static int ioat3_reset_hw(struct ioat_chan_common *chan) if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10); - return ioat2_reset_sync(chan, msecs_to_jiffies(200)); -} - -static bool is_jf_ioat(struct pci_dev *pdev) -{ - switch (pdev->device) { - case PCI_DEVICE_ID_INTEL_IOAT_JSF0: - case PCI_DEVICE_ID_INTEL_IOAT_JSF1: - case PCI_DEVICE_ID_INTEL_IOAT_JSF2: - case PCI_DEVICE_ID_INTEL_IOAT_JSF3: - case PCI_DEVICE_ID_INTEL_IOAT_JSF4: - case PCI_DEVICE_ID_INTEL_IOAT_JSF5: - case PCI_DEVICE_ID_INTEL_IOAT_JSF6: - case PCI_DEVICE_ID_INTEL_IOAT_JSF7: - case PCI_DEVICE_ID_INTEL_IOAT_JSF8: - case PCI_DEVICE_ID_INTEL_IOAT_JSF9: - return true; - default: - return false; - } -} - -static bool is_snb_ioat(struct pci_dev *pdev) -{ - switch (pdev->device) { - case PCI_DEVICE_ID_INTEL_IOAT_SNB0: - case PCI_DEVICE_ID_INTEL_IOAT_SNB1: - case PCI_DEVICE_ID_INTEL_IOAT_SNB2: - case PCI_DEVICE_ID_INTEL_IOAT_SNB3: - case PCI_DEVICE_ID_INTEL_IOAT_SNB4: - case PCI_DEVICE_ID_INTEL_IOAT_SNB5: - case PCI_DEVICE_ID_INTEL_IOAT_SNB6: - case PCI_DEVICE_ID_INTEL_IOAT_SNB7: - case PCI_DEVICE_ID_INTEL_IOAT_SNB8: - case PCI_DEVICE_ID_INTEL_IOAT_SNB9: - return true; - default: - return false; - } -} - -static bool is_ivb_ioat(struct pci_dev *pdev) -{ - switch (pdev->device) { - case PCI_DEVICE_ID_INTEL_IOAT_IVB0: - case PCI_DEVICE_ID_INTEL_IOAT_IVB1: - case PCI_DEVICE_ID_INTEL_IOAT_IVB2: - case PCI_DEVICE_ID_INTEL_IOAT_IVB3: - case PCI_DEVICE_ID_INTEL_IOAT_IVB4: - case PCI_DEVICE_ID_INTEL_IOAT_IVB5: - case PCI_DEVICE_ID_INTEL_IOAT_IVB6: - case PCI_DEVICE_ID_INTEL_IOAT_IVB7: - case PCI_DEVICE_ID_INTEL_IOAT_IVB8: - case PCI_DEVICE_ID_INTEL_IOAT_IVB9: - return true; - default: - return false; - } - -} - -static bool is_hsw_ioat(struct pci_dev *pdev) -{ - switch (pdev->device) { - case PCI_DEVICE_ID_INTEL_IOAT_HSW0: - case PCI_DEVICE_ID_INTEL_IOAT_HSW1: - case PCI_DEVICE_ID_INTEL_IOAT_HSW2: - case PCI_DEVICE_ID_INTEL_IOAT_HSW3: - case PCI_DEVICE_ID_INTEL_IOAT_HSW4: - case PCI_DEVICE_ID_INTEL_IOAT_HSW5: - case PCI_DEVICE_ID_INTEL_IOAT_HSW6: - case PCI_DEVICE_ID_INTEL_IOAT_HSW7: - case PCI_DEVICE_ID_INTEL_IOAT_HSW8: - case PCI_DEVICE_ID_INTEL_IOAT_HSW9: - return true; - default: - return false; + err = ioat2_reset_sync(chan, msecs_to_jiffies(200)); + if (err) { + dev_err(&pdev->dev, "Failed to reset!\n"); + return err; } -} + if (device->irq_mode != IOAT_NOIRQ && is_bwd_ioat(pdev)) + err = ioat3_irq_reinit(device); -static bool is_xeon_cb32(struct pci_dev *pdev) -{ - return is_jf_ioat(pdev) || is_snb_ioat(pdev) || is_ivb_ioat(pdev) || - is_hsw_ioat(pdev); + return err; } int ioat3_dma_probe(struct ioatdma_device *device, int dca) -- cgit v1.2.3 From 6ead7e484957f2ae9bf2085688518d95ce75ab80 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 26 Mar 2013 15:42:59 -0700 Subject: ioatdma: skip legacy reset bits since v3.3 plattform doesn't need it Make it so only 3.2 and earlier platform need the PCI config register clearings since this implementation does not have the registers. Signed-off-by: Dave Jiang Acked-by: Dan Williams Signed-off-by: Vinod Koul --- drivers/dma/ioat/dma_v3.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) (limited to 'drivers/dma/ioat') diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index 65b912aa1012..804522c1300a 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -1331,20 +1331,28 @@ static int ioat3_reset_hw(struct ioat_chan_common *chan) chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); - /* clear any pending errors */ - err = pci_read_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, &chanerr); - if (err) { - dev_err(&pdev->dev, "channel error register unreachable\n"); - return err; - } - pci_write_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, chanerr); + if (device->version < IOAT_VER_3_3) { + /* clear any pending errors */ + err = pci_read_config_dword(pdev, + IOAT_PCI_CHANERR_INT_OFFSET, &chanerr); + if (err) { + dev_err(&pdev->dev, + "channel error register unreachable\n"); + return err; + } + pci_write_config_dword(pdev, + IOAT_PCI_CHANERR_INT_OFFSET, chanerr); - /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit - * (workaround for spurious config parity error after restart) - */ - pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id); - if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) - pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10); + /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit + * (workaround for spurious config parity error after restart) + */ + pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id); + if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) { + pci_write_config_dword(pdev, + IOAT_PCI_DMAUNCERRSTS_OFFSET, + 0x10); + } + } err = ioat2_reset_sync(chan, msecs_to_jiffies(200)); if (err) { -- cgit v1.2.3 From 3f09ede4237fe4691ac687c6c43cb4c1a530777b Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 26 Mar 2013 15:43:09 -0700 Subject: ioatdma: Removing PQ val disable for cb3.3 The PQ Val ops work on the newer hardware so we should actually provide support for it and remove the disabling bits. Signed-off-by: Dave Jiang Acked-by: Dan Williams Signed-off-by: Vinod Koul --- drivers/dma/Kconfig | 2 - drivers/dma/ioat/dma.h | 1 + drivers/dma/ioat/dma_v3.c | 134 +++++++++++++++++++++++++++++++++++++++---- drivers/dma/ioat/registers.h | 2 + 4 files changed, 125 insertions(+), 14 deletions(-) (limited to 'drivers/dma/ioat') diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index aeaea32bcfda..d5c58e839b27 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -63,8 +63,6 @@ config INTEL_IOATDMA depends on PCI && X86 select DMA_ENGINE select DCA - select ASYNC_TX_DISABLE_PQ_VAL_DMA - select ASYNC_TX_DISABLE_XOR_VAL_DMA help Enable support for the Intel(R) I/OAT DMA engine present in recent Intel Xeon chipsets. diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index b16902cd2684..976eba8c06c7 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -39,6 +39,7 @@ #define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node) #define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, txd) #define to_dev(ioat_chan) (&(ioat_chan)->device->pdev->dev) +#define to_pdev(ioat_chan) ((ioat_chan)->device->pdev) #define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80) diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index 804522c1300a..9628ba2ff70c 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -79,6 +79,8 @@ static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 }; static const u8 pq_idx_to_desc = 0xf8; static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 }; +static void ioat3_eh(struct ioat2_dma_chan *ioat); + static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx) { struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1]; @@ -347,6 +349,33 @@ static bool desc_has_ext(struct ioat_ring_ent *desc) return false; } +static u64 ioat3_get_current_completion(struct ioat_chan_common *chan) +{ + u64 phys_complete; + u64 completion; + + completion = *chan->completion; + phys_complete = ioat_chansts_to_addr(completion); + + dev_dbg(to_dev(chan), "%s: phys_complete: %#llx\n", __func__, + (unsigned long long) phys_complete); + + return phys_complete; +} + +static bool ioat3_cleanup_preamble(struct ioat_chan_common *chan, + u64 *phys_complete) +{ + *phys_complete = ioat3_get_current_completion(chan); + if (*phys_complete == chan->last_completion) + return false; + + clear_bit(IOAT_COMPLETION_ACK, &chan->state); + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + + return true; +} + /** * __cleanup - reclaim used descriptors * @ioat: channel (ring) to clean @@ -365,6 +394,16 @@ static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete) dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n", __func__, ioat->head, ioat->tail, ioat->issued); + /* + * At restart of the channel, the completion address and the + * channel status will be 0 due to starting a new chain. Since + * it's new chain and the first descriptor "fails", there is + * nothing to clean up. We do not want to reap the entire submitted + * chain due to this 0 address value and then BUG. + */ + if (!phys_complete) + return; + active = ioat2_ring_active(ioat); for (i = 0; i < active && !seen_current; i++) { struct dma_async_tx_descriptor *tx; @@ -411,11 +450,22 @@ static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete) static void ioat3_cleanup(struct ioat2_dma_chan *ioat) { struct ioat_chan_common *chan = &ioat->base; - dma_addr_t phys_complete; + u64 phys_complete; spin_lock_bh(&chan->cleanup_lock); - if (ioat_cleanup_preamble(chan, &phys_complete)) + + if (ioat3_cleanup_preamble(chan, &phys_complete)) __cleanup(ioat, phys_complete); + + if (is_ioat_halted(*chan->completion)) { + u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + + if (chanerr & IOAT_CHANERR_HANDLE_MASK) { + mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); + ioat3_eh(ioat); + } + } + spin_unlock_bh(&chan->cleanup_lock); } @@ -430,15 +480,77 @@ static void ioat3_cleanup_event(unsigned long data) static void ioat3_restart_channel(struct ioat2_dma_chan *ioat) { struct ioat_chan_common *chan = &ioat->base; - dma_addr_t phys_complete; + u64 phys_complete; ioat2_quiesce(chan, 0); - if (ioat_cleanup_preamble(chan, &phys_complete)) + if (ioat3_cleanup_preamble(chan, &phys_complete)) __cleanup(ioat, phys_complete); __ioat2_restart_chan(ioat); } +static void ioat3_eh(struct ioat2_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + struct pci_dev *pdev = to_pdev(chan); + struct ioat_dma_descriptor *hw; + u64 phys_complete; + struct ioat_ring_ent *desc; + u32 err_handled = 0; + u32 chanerr_int; + u32 chanerr; + + /* cleanup so tail points to descriptor that caused the error */ + if (ioat3_cleanup_preamble(chan, &phys_complete)) + __cleanup(ioat, phys_complete); + + chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + pci_read_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, &chanerr_int); + + dev_dbg(to_dev(chan), "%s: error = %x:%x\n", + __func__, chanerr, chanerr_int); + + desc = ioat2_get_ring_ent(ioat, ioat->tail); + hw = desc->hw; + dump_desc_dbg(ioat, desc); + + switch (hw->ctl_f.op) { + case IOAT_OP_XOR_VAL: + if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) { + *desc->result |= SUM_CHECK_P_RESULT; + err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR; + } + break; + case IOAT_OP_PQ_VAL: + if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) { + *desc->result |= SUM_CHECK_P_RESULT; + err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR; + } + if (chanerr & IOAT_CHANERR_XOR_Q_ERR) { + *desc->result |= SUM_CHECK_Q_RESULT; + err_handled |= IOAT_CHANERR_XOR_Q_ERR; + } + break; + } + + /* fault on unhandled error or spurious halt */ + if (chanerr ^ err_handled || chanerr == 0) { + dev_err(to_dev(chan), "%s: fatal error (%x:%x)\n", + __func__, chanerr, err_handled); + BUG(); + } + + writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); + pci_write_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, chanerr_int); + + /* mark faulting descriptor as complete */ + *chan->completion = desc->txd.phys; + + spin_lock_bh(&ioat->prep_lock); + ioat3_restart_channel(ioat); + spin_unlock_bh(&ioat->prep_lock); +} + static void check_active(struct ioat2_dma_chan *ioat) { struct ioat_chan_common *chan = &ioat->base; @@ -1441,15 +1553,13 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca) device->cleanup_fn = ioat3_cleanup_event; device->timer_fn = ioat3_timer_event; - #ifdef CONFIG_ASYNC_TX_DISABLE_PQ_VAL_DMA - dma_cap_clear(DMA_PQ_VAL, dma->cap_mask); - dma->device_prep_dma_pq_val = NULL; - #endif + if (is_xeon_cb32(pdev)) { + dma_cap_clear(DMA_XOR_VAL, dma->cap_mask); + dma->device_prep_dma_xor_val = NULL; - #ifdef CONFIG_ASYNC_TX_DISABLE_XOR_VAL_DMA - dma_cap_clear(DMA_XOR_VAL, dma->cap_mask); - dma->device_prep_dma_xor_val = NULL; - #endif + dma_cap_clear(DMA_PQ_VAL, dma->cap_mask); + dma->device_prep_dma_pq_val = NULL; + } err = ioat_probe(device); if (err) diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h index 1391798542b6..c1ad1946809e 100644 --- a/drivers/dma/ioat/registers.h +++ b/drivers/dma/ioat/registers.h @@ -93,6 +93,8 @@ #define IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004 #define IOAT_CHANCTRL_INT_REARM 0x0001 #define IOAT_CHANCTRL_RUN (IOAT_CHANCTRL_INT_REARM |\ + IOAT_CHANCTRL_ERR_INT_EN |\ + IOAT_CHANCTRL_ERR_COMPLETION_EN |\ IOAT_CHANCTRL_ANY_ERR_ABORT_EN) #define IOAT_DMA_COMP_OFFSET 0x02 /* 16-bit DMA channel compatibility */ -- cgit v1.2.3 From eceec44ecd7f3285468a684e7216df2316b178f3 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 26 Mar 2013 15:43:15 -0700 Subject: ioatdma: skip silicon bug workaround for pq_align for cb3.3 The alignment workaround is only necessary for cb3.2 or earlier platforms. Signed-off-by: Dave Jiang Acked-by: Dan Williams Signed-off-by: Vinod Koul --- drivers/dma/ioat/dma_v3.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'drivers/dma/ioat') diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index 9628ba2ff70c..cf97e3f16924 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -1521,10 +1521,14 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca) dma_cap_set(DMA_XOR_VAL, dma->cap_mask); dma->device_prep_dma_xor_val = ioat3_prep_xor_val; } + if (cap & IOAT_CAP_PQ) { is_raid_device = true; dma_set_maxpq(dma, 8, 0); - dma->pq_align = 6; + if (is_xeon_cb32(pdev)) + dma->pq_align = 6; + else + dma->pq_align = 0; dma_cap_set(DMA_PQ, dma->cap_mask); dma->device_prep_dma_pq = ioat3_prep_pq; @@ -1534,7 +1538,10 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca) if (!(cap & IOAT_CAP_XOR)) { dma->max_xor = 8; - dma->xor_align = 6; + if (is_xeon_cb32(pdev)) + dma->xor_align = 6; + else + dma->xor_align = 0; dma_cap_set(DMA_XOR, dma->cap_mask); dma->device_prep_dma_xor = ioat3_prep_pqxor; @@ -1543,6 +1550,7 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca) dma->device_prep_dma_xor_val = ioat3_prep_pqxor_val; } } + if (is_raid_device && (cap & IOAT_CAP_FILL_BLOCK)) { dma_cap_set(DMA_MEMSET, dma->cap_mask); dma->device_prep_dma_memset = ioat3_prep_memset_lock; -- cgit v1.2.3 From e0884772d323b745c65baa65df391b1c70829410 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 10 Apr 2013 16:44:20 -0700 Subject: ioatdma: Removing hw bug workaround for CB3.x .2 and earlier CB3.2 and earlier hardware has silicon bugs that are no longer needed with the new hardware. We don't have to use a NULL op to signal interrupt for RAID ops any longer. This code make sure the legacy workarounds only happen on legacy hardware. Signed-off-by: Dave Jiang Acked-by: Dan Williams Signed-off-by: Vinod Koul --- drivers/dma/ioat/dma_v3.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) (limited to 'drivers/dma/ioat') diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index cf97e3f16924..639311598f35 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -837,6 +837,7 @@ __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, { struct ioat2_dma_chan *ioat = to_ioat2_chan(c); struct ioat_chan_common *chan = &ioat->base; + struct ioatdma_device *device = chan->device; struct ioat_ring_ent *compl_desc; struct ioat_ring_ent *desc; struct ioat_ring_ent *ext; @@ -847,6 +848,7 @@ __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, u32 offset = 0; u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ; int i, s, idx, with_ext, num_descs; + int cb32 = (device->version < IOAT_VER_3_3) ? 1 : 0; dev_dbg(to_dev(chan), "%s\n", __func__); /* the engine requires at least two sources (we provide @@ -872,7 +874,7 @@ __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, * order. */ if (likely(num_descs) && - ioat2_check_space_lock(ioat, num_descs+1) == 0) + ioat2_check_space_lock(ioat, num_descs + cb32) == 0) idx = ioat->head; else return NULL; @@ -926,16 +928,23 @@ __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); dump_pq_desc_dbg(ioat, desc, ext); - /* completion descriptor carries interrupt bit */ - compl_desc = ioat2_get_ring_ent(ioat, idx + i); - compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; - hw = compl_desc->hw; - hw->ctl = 0; - hw->ctl_f.null = 1; - hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); - hw->ctl_f.compl_write = 1; - hw->size = NULL_DESC_BUFFER_SIZE; - dump_desc_dbg(ioat, compl_desc); + if (!cb32) { + pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + pq->ctl_f.compl_write = 1; + compl_desc = desc; + } else { + /* completion descriptor carries interrupt bit */ + compl_desc = ioat2_get_ring_ent(ioat, idx + i); + compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; + hw = compl_desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + hw->ctl_f.compl_write = 1; + hw->size = NULL_DESC_BUFFER_SIZE; + dump_desc_dbg(ioat, compl_desc); + } + /* we leave the channel locked to ensure in order submission */ return &compl_desc->txd; -- cgit v1.2.3 From 7727eaa4490b7244934fe31f05e7329f30715267 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 15 Apr 2013 10:25:56 -0700 Subject: ioatdma: Adding support for 16 src PQ ops and super extended descriptors v3.3 introduced 16 sources PQ operations. This also introduced super extended descriptors to support the 16 srcs operations. This patch adds support for the 16 sources ops and in turn adds the super extended descriptors for those ops. 5 SED pools are created depending on the descriptor sizes. An SED can be a 64 bytes sized descriptor or larger and must be physically contiguous. A kmem cache pool is created for allocating the software descriptor that manages the hardware descriptor. The super extended descriptor will take place of extended descriptor under certain operations and be "attached" to the op descriptor during operation. This is a new feature for ioatdma v3.3. Signed-off-by: Dave Jiang Acked-by: Dan Williams Acked-by: Dan Williams Signed-off-by: Vinod Koul --- drivers/dma/ioat/dma.h | 17 ++ drivers/dma/ioat/dma_v2.h | 2 + drivers/dma/ioat/dma_v3.c | 394 ++++++++++++++++++++++++++++++++++++++++--- drivers/dma/ioat/hw.h | 43 ++++- drivers/dma/ioat/pci.c | 3 + drivers/dma/ioat/registers.h | 1 + 6 files changed, 438 insertions(+), 22 deletions(-) (limited to 'drivers/dma/ioat') diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index 976eba8c06c7..35d74028773a 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -81,6 +81,9 @@ struct ioatdma_device { void __iomem *reg_base; struct pci_pool *dma_pool; struct pci_pool *completion_pool; +#define MAX_SED_POOLS 5 + struct dma_pool *sed_hw_pool[MAX_SED_POOLS]; + struct kmem_cache *sed_pool; struct dma_device common; u8 version; struct msix_entry msix_entries[4]; @@ -141,6 +144,20 @@ struct ioat_dma_chan { u16 active; }; +/** + * struct ioat_sed_ent - wrapper around super extended hardware descriptor + * @hw: hardware SED + * @sed_dma: dma address for the SED + * @list: list member + * @parent: point to the dma descriptor that's the parent + */ +struct ioat_sed_ent { + struct ioat_sed_raw_descriptor *hw; + dma_addr_t dma; + struct ioat_ring_ent *parent; + unsigned int hw_pool; +}; + static inline struct ioat_chan_common *to_chan_common(struct dma_chan *c) { return container_of(c, struct ioat_chan_common, common); diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h index e100f644e344..29bf9448035d 100644 --- a/drivers/dma/ioat/dma_v2.h +++ b/drivers/dma/ioat/dma_v2.h @@ -137,6 +137,7 @@ struct ioat_ring_ent { #ifdef DEBUG int id; #endif + struct ioat_sed_ent *sed; }; static inline struct ioat_ring_ent * @@ -157,6 +158,7 @@ static inline void ioat2_set_chainaddr(struct ioat2_dma_chan *ioat, u64 addr) int ioat2_dma_probe(struct ioatdma_device *dev, int dca); int ioat3_dma_probe(struct ioatdma_device *dev, int dca); +void ioat3_dma_remove(struct ioatdma_device *dev); struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase); int ioat2_check_space_lock(struct ioat2_dma_chan *ioat, int num_descs); diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index 639311598f35..71e113dfc8cc 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -55,7 +55,7 @@ /* * Support routines for v3+ hardware */ - +#include #include #include #include @@ -70,6 +70,10 @@ /* ioat hardware assumes at least two sources for raid operations */ #define src_cnt_to_sw(x) ((x) + 2) #define src_cnt_to_hw(x) ((x) - 2) +#define ndest_to_sw(x) ((x) + 1) +#define ndest_to_hw(x) ((x) - 1) +#define src16_cnt_to_sw(x) ((x) + 9) +#define src16_cnt_to_hw(x) ((x) - 9) /* provide a lookup table for setting the source address in the base or * extended descriptor of an xor or pq descriptor @@ -77,7 +81,18 @@ static const u8 xor_idx_to_desc = 0xe0; static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 }; static const u8 pq_idx_to_desc = 0xf8; +static const u8 pq16_idx_to_desc[] = { 0, 0, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2 }; static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 }; +static const u8 pq16_idx_to_field[] = { 1, 4, 1, 2, 3, 4, 5, 6, 7, + 0, 1, 2, 3, 4, 5, 6 }; + +/* + * technically sources 1 and 2 do not require SED, but the op will have + * at least 9 descriptors so that's irrelevant. + */ +static const u8 pq16_idx_to_sed[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1 }; static void ioat3_eh(struct ioat2_dma_chan *ioat); @@ -103,6 +118,13 @@ static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx) return raw->field[pq_idx_to_field[idx]]; } +static dma_addr_t pq16_get_src(struct ioat_raw_descriptor *desc[3], int idx) +{ + struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]]; + + return raw->field[pq16_idx_to_field[idx]]; +} + static void pq_set_src(struct ioat_raw_descriptor *descs[2], dma_addr_t addr, u32 offset, u8 coef, int idx) { @@ -113,6 +135,12 @@ static void pq_set_src(struct ioat_raw_descriptor *descs[2], pq->coef[idx] = coef; } +static int sed_get_pq16_pool_idx(int src_cnt) +{ + + return pq16_idx_to_sed[src_cnt]; +} + static bool is_jf_ioat(struct pci_dev *pdev) { switch (pdev->device) { @@ -210,6 +238,52 @@ static bool is_bwd_ioat(struct pci_dev *pdev) } } +static void pq16_set_src(struct ioat_raw_descriptor *desc[3], + dma_addr_t addr, u32 offset, u8 coef, int idx) +{ + struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *)desc[0]; + struct ioat_pq16a_descriptor *pq16 = + (struct ioat_pq16a_descriptor *)desc[1]; + struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]]; + + raw->field[pq16_idx_to_field[idx]] = addr + offset; + + if (idx < 8) + pq->coef[idx] = coef; + else + pq16->coef[idx - 8] = coef; +} + +struct ioat_sed_ent * +ioat3_alloc_sed(struct ioatdma_device *device, unsigned int hw_pool) +{ + struct ioat_sed_ent *sed; + gfp_t flags = __GFP_ZERO | GFP_ATOMIC; + + sed = kmem_cache_alloc(device->sed_pool, flags); + if (!sed) + return NULL; + + sed->hw_pool = hw_pool; + sed->hw = dma_pool_alloc(device->sed_hw_pool[hw_pool], + flags, &sed->dma); + if (!sed->hw) { + kmem_cache_free(device->sed_pool, sed); + return NULL; + } + + return sed; +} + +void ioat3_free_sed(struct ioatdma_device *device, struct ioat_sed_ent *sed) +{ + if (!sed) + return; + + dma_pool_free(device->sed_hw_pool[sed->hw_pool], sed->hw, sed->dma); + kmem_cache_free(device->sed_pool, sed); +} + static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, int idx) { @@ -322,6 +396,54 @@ static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat, } break; } + case IOAT_OP_PQ_16S: + case IOAT_OP_PQ_VAL_16S: { + struct ioat_pq_descriptor *pq = desc->pq; + int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt); + struct ioat_raw_descriptor *descs[4]; + int i; + + /* in the 'continue' case don't unmap the dests as sources */ + if (dmaf_p_disabled_continue(flags)) + src_cnt--; + else if (dmaf_continue(flags)) + src_cnt -= 3; + + if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + descs[0] = (struct ioat_raw_descriptor *)pq; + descs[1] = (struct ioat_raw_descriptor *)(desc->sed->hw); + descs[2] = (struct ioat_raw_descriptor *)(&desc->sed->hw->b[0]); + for (i = 0; i < src_cnt; i++) { + dma_addr_t src = pq16_get_src(descs, i); + + ioat_unmap(pdev, src - offset, len, + PCI_DMA_TODEVICE, flags, 0); + } + + /* the dests are sources in pq validate operations */ + if (pq->ctl_f.op == IOAT_OP_XOR_VAL) { + if (!(flags & DMA_PREP_PQ_DISABLE_P)) + ioat_unmap(pdev, pq->p_addr - offset, + len, PCI_DMA_TODEVICE, + flags, 0); + if (!(flags & DMA_PREP_PQ_DISABLE_Q)) + ioat_unmap(pdev, pq->q_addr - offset, + len, PCI_DMA_TODEVICE, + flags, 0); + break; + } + } + + if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) { + if (!(flags & DMA_PREP_PQ_DISABLE_P)) + ioat_unmap(pdev, pq->p_addr - offset, len, + PCI_DMA_BIDIRECTIONAL, flags, 1); + if (!(flags & DMA_PREP_PQ_DISABLE_Q)) + ioat_unmap(pdev, pq->q_addr - offset, len, + PCI_DMA_BIDIRECTIONAL, flags, 1); + } + break; + } default: dev_err(&pdev->dev, "%s: unknown op type: %#x\n", __func__, desc->hw->ctl_f.op); @@ -386,6 +508,7 @@ static bool ioat3_cleanup_preamble(struct ioat_chan_common *chan, static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete) { struct ioat_chan_common *chan = &ioat->base; + struct ioatdma_device *device = chan->device; struct ioat_ring_ent *desc; bool seen_current = false; int idx = ioat->tail, i; @@ -430,6 +553,12 @@ static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete) BUG_ON(i + 1 >= active); i++; } + + /* cleanup super extended descriptors */ + if (desc->sed) { + ioat3_free_sed(device, desc->sed); + desc->sed = NULL; + } } smp_mb(); /* finish all descriptor reads before incrementing tail */ ioat->tail = idx + i; @@ -522,6 +651,7 @@ static void ioat3_eh(struct ioat2_dma_chan *ioat) } break; case IOAT_OP_PQ_VAL: + case IOAT_OP_PQ_VAL_16S: if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) { *desc->result |= SUM_CHECK_P_RESULT; err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR; @@ -814,7 +944,8 @@ dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct int i; dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" - " sz: %#10.8x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s' src_cnt: %d)\n", + " sz: %#10.8x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'" + " src_cnt: %d)\n", desc_id(desc), (unsigned long long) desc->txd.phys, (unsigned long long) (pq_ex ? pq_ex->next : pq->next), desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en, @@ -829,6 +960,41 @@ dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct dev_dbg(dev, "\tNEXT: %#llx\n", pq->next); } +static void dump_pq16_desc_dbg(struct ioat2_dma_chan *ioat, + struct ioat_ring_ent *desc) +{ + struct device *dev = to_dev(&ioat->base); + struct ioat_pq_descriptor *pq = desc->pq; + struct ioat_raw_descriptor *descs[] = { (void *)pq, + (void *)pq, + (void *)pq }; + int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt); + int i; + + if (desc->sed) { + descs[1] = (void *)desc->sed->hw; + descs[2] = (void *)desc->sed->hw + 64; + } + + dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" + " sz: %#x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'" + " src_cnt: %d)\n", + desc_id(desc), (unsigned long long) desc->txd.phys, + (unsigned long long) pq->next, + desc->txd.flags, pq->size, pq->ctl, + pq->ctl_f.op, pq->ctl_f.int_en, + pq->ctl_f.compl_write, + pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", + pq->ctl_f.src_cnt); + for (i = 0; i < src_cnt; i++) { + dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, + (unsigned long long) pq16_get_src(descs, i), + pq->coef[i]); + } + dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); + dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); +} + static struct dma_async_tx_descriptor * __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, const dma_addr_t *dst, const dma_addr_t *src, @@ -950,11 +1116,115 @@ __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, return &compl_desc->txd; } +static struct dma_async_tx_descriptor * +__ioat3_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result, + const dma_addr_t *dst, const dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, + size_t len, unsigned long flags) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_chan_common *chan = &ioat->base; + struct ioatdma_device *device = chan->device; + struct ioat_ring_ent *desc; + size_t total_len = len; + struct ioat_pq_descriptor *pq; + u32 offset = 0; + u8 op; + int i, s, idx, num_descs; + + /* this function only handles src_cnt 9 - 16 */ + BUG_ON(src_cnt < 9); + + /* this function is only called with 9-16 sources */ + op = result ? IOAT_OP_PQ_VAL_16S : IOAT_OP_PQ_16S; + + dev_dbg(to_dev(chan), "%s\n", __func__); + + num_descs = ioat2_xferlen_to_descs(ioat, len); + + /* + * 16 source pq is only available on cb3.3 and has no completion + * write hw bug. + */ + if (num_descs && ioat2_check_space_lock(ioat, num_descs) == 0) + idx = ioat->head; + else + return NULL; + + i = 0; + + do { + struct ioat_raw_descriptor *descs[4]; + size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); + + desc = ioat2_get_ring_ent(ioat, idx + i); + pq = desc->pq; + + descs[0] = (struct ioat_raw_descriptor *) pq; + + desc->sed = ioat3_alloc_sed(device, + sed_get_pq16_pool_idx(src_cnt)); + if (!desc->sed) { + dev_err(to_dev(chan), + "%s: no free sed entries\n", __func__); + return NULL; + } + + pq->sed_addr = desc->sed->dma; + desc->sed->parent = desc; + + descs[1] = (struct ioat_raw_descriptor *)desc->sed->hw; + descs[2] = (void *)descs[1] + 64; + + for (s = 0; s < src_cnt; s++) + pq16_set_src(descs, src[s], offset, scf[s], s); + + /* see the comment for dma_maxpq in include/linux/dmaengine.h */ + if (dmaf_p_disabled_continue(flags)) + pq16_set_src(descs, dst[1], offset, 1, s++); + else if (dmaf_continue(flags)) { + pq16_set_src(descs, dst[0], offset, 0, s++); + pq16_set_src(descs, dst[1], offset, 1, s++); + pq16_set_src(descs, dst[1], offset, 0, s++); + } + + pq->size = xfer_size; + pq->p_addr = dst[0] + offset; + pq->q_addr = dst[1] + offset; + pq->ctl = 0; + pq->ctl_f.op = op; + pq->ctl_f.src_cnt = src16_cnt_to_hw(s); + pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); + pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); + + len -= xfer_size; + offset += xfer_size; + } while (++i < num_descs); + + /* last pq descriptor carries the unmap parameters and fence bit */ + desc->txd.flags = flags; + desc->len = total_len; + if (result) + desc->result = result; + pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + + /* with cb3.3 we should be able to do completion w/o a null desc */ + pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + pq->ctl_f.compl_write = 1; + + dump_pq16_desc_dbg(ioat, desc); + + /* we leave the channel locked to ensure in order submission */ + return &desc->txd; +} + static struct dma_async_tx_descriptor * ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, unsigned int src_cnt, const unsigned char *scf, size_t len, unsigned long flags) { + struct dma_device *dma = chan->device; + /* specify valid address for disabled result */ if (flags & DMA_PREP_PQ_DISABLE_P) dst[0] = dst[1]; @@ -974,11 +1244,20 @@ ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, single_source_coef[0] = scf[0]; single_source_coef[1] = 0; - return __ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2, - single_source_coef, len, flags); - } else - return __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt, scf, - len, flags); + return (src_cnt > 8) && (dma->max_pq > 8) ? + __ioat3_prep_pq16_lock(chan, NULL, dst, single_source, + 2, single_source_coef, len, + flags) : + __ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2, + single_source_coef, len, flags); + + } else { + return (src_cnt > 8) && (dma->max_pq > 8) ? + __ioat3_prep_pq16_lock(chan, NULL, dst, src, src_cnt, + scf, len, flags) : + __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt, + scf, len, flags); + } } struct dma_async_tx_descriptor * @@ -986,6 +1265,8 @@ ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, unsigned int src_cnt, const unsigned char *scf, size_t len, enum sum_check_flags *pqres, unsigned long flags) { + struct dma_device *dma = chan->device; + /* specify valid address for disabled result */ if (flags & DMA_PREP_PQ_DISABLE_P) pq[0] = pq[1]; @@ -997,14 +1278,18 @@ ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, */ *pqres = 0; - return __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len, - flags); + return (src_cnt > 8) && (dma->max_pq > 8) ? + __ioat3_prep_pq16_lock(chan, pqres, pq, src, src_cnt, scf, len, + flags) : + __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len, + flags); } static struct dma_async_tx_descriptor * ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, unsigned int src_cnt, size_t len, unsigned long flags) { + struct dma_device *dma = chan->device; unsigned char scf[src_cnt]; dma_addr_t pq[2]; @@ -1013,8 +1298,11 @@ ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, flags |= DMA_PREP_PQ_DISABLE_Q; pq[1] = dst; /* specify valid address for disabled result */ - return __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len, - flags); + return (src_cnt > 8) && (dma->max_pq > 8) ? + __ioat3_prep_pq16_lock(chan, NULL, pq, src, src_cnt, scf, len, + flags) : + __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len, + flags); } struct dma_async_tx_descriptor * @@ -1022,6 +1310,7 @@ ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt, size_t len, enum sum_check_flags *result, unsigned long flags) { + struct dma_device *dma = chan->device; unsigned char scf[src_cnt]; dma_addr_t pq[2]; @@ -1035,8 +1324,12 @@ ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src, flags |= DMA_PREP_PQ_DISABLE_Q; pq[1] = pq[0]; /* specify valid address for disabled result */ - return __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, scf, - len, flags); + + return (src_cnt > 8) && (dma->max_pq > 8) ? + __ioat3_prep_pq16_lock(chan, result, pq, &src[1], src_cnt - 1, + scf, len, flags) : + __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, + scf, len, flags); } static struct dma_async_tx_descriptor * @@ -1533,11 +1826,17 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca) if (cap & IOAT_CAP_PQ) { is_raid_device = true; - dma_set_maxpq(dma, 8, 0); - if (is_xeon_cb32(pdev)) - dma->pq_align = 6; - else + + if (cap & IOAT_CAP_RAID16SS) { + dma_set_maxpq(dma, 16, 0); dma->pq_align = 0; + } else { + dma_set_maxpq(dma, 8, 0); + if (is_xeon_cb32(pdev)) + dma->pq_align = 6; + else + dma->pq_align = 0; + } dma_cap_set(DMA_PQ, dma->cap_mask); dma->device_prep_dma_pq = ioat3_prep_pq; @@ -1546,11 +1845,16 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca) dma->device_prep_dma_pq_val = ioat3_prep_pq_val; if (!(cap & IOAT_CAP_XOR)) { - dma->max_xor = 8; - if (is_xeon_cb32(pdev)) - dma->xor_align = 6; - else + if (cap & IOAT_CAP_RAID16SS) { + dma->max_xor = 16; dma->xor_align = 0; + } else { + dma->max_xor = 8; + if (is_xeon_cb32(pdev)) + dma->xor_align = 6; + else + dma->xor_align = 0; + } dma_cap_set(DMA_XOR, dma->cap_mask); dma->device_prep_dma_xor = ioat3_prep_pqxor; @@ -1578,6 +1882,30 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca) dma->device_prep_dma_pq_val = NULL; } + /* starting with CB3.3 super extended descriptors are supported */ + if (cap & IOAT_CAP_RAID16SS) { + char pool_name[14]; + int i; + + /* allocate sw descriptor pool for SED */ + device->sed_pool = kmem_cache_create("ioat_sed", + sizeof(struct ioat_sed_ent), 0, 0, NULL); + if (!device->sed_pool) + return -ENOMEM; + + for (i = 0; i < MAX_SED_POOLS; i++) { + snprintf(pool_name, 14, "ioat_hw%d_sed", i); + + /* allocate SED DMA pool */ + device->sed_hw_pool[i] = dma_pool_create(pool_name, + &pdev->dev, + SED_SIZE * (i + 1), 64, 0); + if (!device->sed_hw_pool[i]) + goto sed_pool_cleanup; + + } + } + err = ioat_probe(device); if (err) return err; @@ -1599,4 +1927,28 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca) device->dca = ioat3_dca_init(pdev, device->reg_base); return 0; + +sed_pool_cleanup: + if (device->sed_pool) { + int i; + kmem_cache_destroy(device->sed_pool); + + for (i = 0; i < MAX_SED_POOLS; i++) + if (device->sed_hw_pool[i]) + dma_pool_destroy(device->sed_hw_pool[i]); + } + + return -ENOMEM; +} + +void ioat3_dma_remove(struct ioatdma_device *device) +{ + if (device->sed_pool) { + int i; + kmem_cache_destroy(device->sed_pool); + + for (i = 0; i < MAX_SED_POOLS; i++) + if (device->sed_hw_pool[i]) + dma_pool_destroy(device->sed_hw_pool[i]); + } } diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h index ce431f5a9b2a..d10570db6e7d 100644 --- a/drivers/dma/ioat/hw.h +++ b/drivers/dma/ioat/hw.h @@ -183,6 +183,8 @@ struct ioat_pq_descriptor { unsigned int rsvd:11; #define IOAT_OP_PQ 0x89 #define IOAT_OP_PQ_VAL 0x8a + #define IOAT_OP_PQ_16S 0xa0 + #define IOAT_OP_PQ_VAL_16S 0xa1 unsigned int op:8; } ctl_f; }; @@ -190,7 +192,10 @@ struct ioat_pq_descriptor { uint64_t p_addr; uint64_t next; uint64_t src_addr2; - uint64_t src_addr3; + union { + uint64_t src_addr3; + uint64_t sed_addr; + }; uint8_t coef[8]; uint64_t q_addr; }; @@ -239,4 +244,40 @@ struct ioat_pq_update_descriptor { struct ioat_raw_descriptor { uint64_t field[8]; }; + +struct ioat_pq16a_descriptor { + uint8_t coef[8]; + uint64_t src_addr3; + uint64_t src_addr4; + uint64_t src_addr5; + uint64_t src_addr6; + uint64_t src_addr7; + uint64_t src_addr8; + uint64_t src_addr9; +}; + +struct ioat_pq16b_descriptor { + uint64_t src_addr10; + uint64_t src_addr11; + uint64_t src_addr12; + uint64_t src_addr13; + uint64_t src_addr14; + uint64_t src_addr15; + uint64_t src_addr16; + uint64_t rsvd; +}; + +union ioat_sed_pq_descriptor { + struct ioat_pq16a_descriptor a; + struct ioat_pq16b_descriptor b; +}; + +#define SED_SIZE 64 + +struct ioat_sed_raw_descriptor { + uint64_t a[8]; + uint64_t b[8]; + uint64_t c[8]; +}; + #endif diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c index 1f632968d4fb..2c8d560e6334 100644 --- a/drivers/dma/ioat/pci.c +++ b/drivers/dma/ioat/pci.c @@ -207,6 +207,9 @@ static void ioat_remove(struct pci_dev *pdev) if (!device) return; + if (device->version >= IOAT_VER_3_0) + ioat3_dma_remove(device); + dev_err(&pdev->dev, "Removing dma and dca services\n"); if (device->dca) { unregister_dca_provider(device->dca, &pdev->dev); diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h index c1ad1946809e..efdd47e47b82 100644 --- a/drivers/dma/ioat/registers.h +++ b/drivers/dma/ioat/registers.h @@ -79,6 +79,7 @@ #define IOAT_CAP_APIC 0x00000080 #define IOAT_CAP_XOR 0x00000100 #define IOAT_CAP_PQ 0x00000200 +#define IOAT_CAP_RAID16SS 0x00020000 #define IOAT_CHANNEL_MMIO_SIZE 0x80 /* Each Channel MMIO space is this size */ -- cgit v1.2.3 From d302398da99956a329c467f195b50d5aaf38fb75 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 10 Apr 2013 16:44:32 -0700 Subject: ioatdma: S1200 platforms ioatdma channel 2 and 3 falsely advertise RAID cap This workaround checks for channel 2&3 and remove RAID cap. Signed-off-by: Dave Jiang Acked-by: Dan Williams Signed-off-by: Vinod Koul --- drivers/dma/ioat/dma_v3.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'drivers/dma/ioat') diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index 71e113dfc8cc..b00b000f238b 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -238,6 +238,18 @@ static bool is_bwd_ioat(struct pci_dev *pdev) } } +static bool is_bwd_noraid(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_BWD2: + case PCI_DEVICE_ID_INTEL_IOAT_BWD3: + return true; + default: + return false; + } + +} + static void pq16_set_src(struct ioat_raw_descriptor *desc[3], dma_addr_t addr, u32 offset, u8 coef, int idx) { @@ -1808,6 +1820,9 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca) cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET); + if (is_bwd_noraid(pdev)) + cap &= ~(IOAT_CAP_XOR | IOAT_CAP_PQ | IOAT_CAP_RAID16SS); + /* dca is incompatible with raid operations */ if (dca_en && (cap & (IOAT_CAP_XOR|IOAT_CAP_PQ))) cap &= ~(IOAT_CAP_XOR|IOAT_CAP_PQ); -- cgit v1.2.3 From 75c6f0ab480657269b5014e0e457c7b18ba8597e Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 10 Apr 2013 16:44:39 -0700 Subject: ioatdma: Adding write back descriptor error status support for ioatdma 3.3 v3.3 provides support for write back descriptor error status. This allows reporting of errors in a descriptor field. In supporting this, certain errors such as P/Q validation errors no longer halts the channel. The DMA engine can continue to execute until the end of the chain and allow software to report the "errors" up the stack. We are also going to mask those error interrupts and handle them when the "chain" has completed at the end. Signed-off-by: Dave Jiang Acked-by: Dan Williams Signed-off-by: Vinod Koul --- drivers/dma/ioat/dma.h | 1 + drivers/dma/ioat/dma_v3.c | 111 ++++++++++++++++++++++++++++++++++--------- drivers/dma/ioat/hw.h | 17 ++++++- drivers/dma/ioat/registers.h | 1 + 4 files changed, 105 insertions(+), 25 deletions(-) (limited to 'drivers/dma/ioat') diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index 35d74028773a..54fb7b9ff9aa 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -90,6 +90,7 @@ struct ioatdma_device { struct ioat_chan_common *idx[4]; struct dca_provider *dca; enum ioat_irq_mode irq_mode; + u32 cap; void (*intr_quirk)(struct ioatdma_device *device); int (*enumerate_channels)(struct ioatdma_device *device); int (*reset_hw)(struct ioat_chan_common *chan); diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index b00b000f238b..28f8957bafe2 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -510,6 +510,36 @@ static bool ioat3_cleanup_preamble(struct ioat_chan_common *chan, return true; } +static void +desc_get_errstat(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc) +{ + struct ioat_dma_descriptor *hw = desc->hw; + + switch (hw->ctl_f.op) { + case IOAT_OP_PQ_VAL: + case IOAT_OP_PQ_VAL_16S: + { + struct ioat_pq_descriptor *pq = desc->pq; + + /* check if there's error written */ + if (!pq->dwbes_f.wbes) + return; + + /* need to set a chanerr var for checking to clear later */ + + if (pq->dwbes_f.p_val_err) + *desc->result |= SUM_CHECK_P_RESULT; + + if (pq->dwbes_f.q_val_err) + *desc->result |= SUM_CHECK_Q_RESULT; + + return; + } + default: + return; + } +} + /** * __cleanup - reclaim used descriptors * @ioat: channel (ring) to clean @@ -547,6 +577,11 @@ static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete) prefetch(ioat2_get_ring_ent(ioat, idx + i + 1)); desc = ioat2_get_ring_ent(ioat, idx + i); dump_desc_dbg(ioat, desc); + + /* set err stat if we are using dwbes */ + if (device->cap & IOAT_CAP_DWBES) + desc_get_errstat(ioat, desc); + tx = &desc->txd; if (tx->cookie) { dma_cookie_complete(tx); @@ -1090,6 +1125,9 @@ __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, pq->q_addr = dst[1] + offset; pq->ctl = 0; pq->ctl_f.op = op; + /* we turn on descriptor write back error status */ + if (device->cap & IOAT_CAP_DWBES) + pq->ctl_f.wb_en = result ? 1 : 0; pq->ctl_f.src_cnt = src_cnt_to_hw(s); pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); @@ -1206,6 +1244,9 @@ __ioat3_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result, pq->ctl = 0; pq->ctl_f.op = op; pq->ctl_f.src_cnt = src16_cnt_to_hw(s); + /* we turn on descriptor write back error status */ + if (device->cap & IOAT_CAP_DWBES) + pq->ctl_f.wb_en = result ? 1 : 0; pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); @@ -1792,6 +1833,32 @@ static int ioat3_reset_hw(struct ioat_chan_common *chan) return err; } +static void ioat3_intr_quirk(struct ioatdma_device *device) +{ + struct dma_device *dma; + struct dma_chan *c; + struct ioat_chan_common *chan; + u32 errmask; + + dma = &device->common; + + /* + * if we have descriptor write back error status, we mask the + * error interrupts + */ + if (device->cap & IOAT_CAP_DWBES) { + list_for_each_entry(c, &dma->channels, device_node) { + chan = to_chan_common(c); + errmask = readl(chan->reg_base + + IOAT_CHANERR_MASK_OFFSET); + errmask |= IOAT_CHANERR_XOR_P_OR_CRC_ERR | + IOAT_CHANERR_XOR_Q_ERR; + writel(errmask, chan->reg_base + + IOAT_CHANERR_MASK_OFFSET); + } + } +} + int ioat3_dma_probe(struct ioatdma_device *device, int dca) { struct pci_dev *pdev = device->pdev; @@ -1801,11 +1868,11 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca) struct ioat_chan_common *chan; bool is_raid_device = false; int err; - u32 cap; device->enumerate_channels = ioat2_enumerate_channels; device->reset_hw = ioat3_reset_hw; device->self_test = ioat3_dma_self_test; + device->intr_quirk = ioat3_intr_quirk; dma = &device->common; dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; dma->device_issue_pending = ioat2_issue_pending; @@ -1818,16 +1885,16 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca) dma_cap_set(DMA_INTERRUPT, dma->cap_mask); dma->device_prep_dma_interrupt = ioat3_prep_interrupt_lock; - cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET); + device->cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET); if (is_bwd_noraid(pdev)) - cap &= ~(IOAT_CAP_XOR | IOAT_CAP_PQ | IOAT_CAP_RAID16SS); + device->cap &= ~(IOAT_CAP_XOR | IOAT_CAP_PQ | IOAT_CAP_RAID16SS); /* dca is incompatible with raid operations */ - if (dca_en && (cap & (IOAT_CAP_XOR|IOAT_CAP_PQ))) - cap &= ~(IOAT_CAP_XOR|IOAT_CAP_PQ); + if (dca_en && (device->cap & (IOAT_CAP_XOR|IOAT_CAP_PQ))) + device->cap &= ~(IOAT_CAP_XOR|IOAT_CAP_PQ); - if (cap & IOAT_CAP_XOR) { + if (device->cap & IOAT_CAP_XOR) { is_raid_device = true; dma->max_xor = 8; dma->xor_align = 6; @@ -1839,10 +1906,15 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca) dma->device_prep_dma_xor_val = ioat3_prep_xor_val; } - if (cap & IOAT_CAP_PQ) { + if (device->cap & IOAT_CAP_PQ) { is_raid_device = true; - if (cap & IOAT_CAP_RAID16SS) { + dma->device_prep_dma_pq = ioat3_prep_pq; + dma->device_prep_dma_pq_val = ioat3_prep_pq_val; + dma_cap_set(DMA_PQ, dma->cap_mask); + dma_cap_set(DMA_PQ_VAL, dma->cap_mask); + + if (device->cap & IOAT_CAP_RAID16SS) { dma_set_maxpq(dma, 16, 0); dma->pq_align = 0; } else { @@ -1853,14 +1925,13 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca) dma->pq_align = 0; } - dma_cap_set(DMA_PQ, dma->cap_mask); - dma->device_prep_dma_pq = ioat3_prep_pq; - - dma_cap_set(DMA_PQ_VAL, dma->cap_mask); - dma->device_prep_dma_pq_val = ioat3_prep_pq_val; + if (!(device->cap & IOAT_CAP_XOR)) { + dma->device_prep_dma_xor = ioat3_prep_pqxor; + dma->device_prep_dma_xor_val = ioat3_prep_pqxor_val; + dma_cap_set(DMA_XOR, dma->cap_mask); + dma_cap_set(DMA_XOR_VAL, dma->cap_mask); - if (!(cap & IOAT_CAP_XOR)) { - if (cap & IOAT_CAP_RAID16SS) { + if (device->cap & IOAT_CAP_RAID16SS) { dma->max_xor = 16; dma->xor_align = 0; } else { @@ -1870,16 +1941,10 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca) else dma->xor_align = 0; } - - dma_cap_set(DMA_XOR, dma->cap_mask); - dma->device_prep_dma_xor = ioat3_prep_pqxor; - - dma_cap_set(DMA_XOR_VAL, dma->cap_mask); - dma->device_prep_dma_xor_val = ioat3_prep_pqxor_val; } } - if (is_raid_device && (cap & IOAT_CAP_FILL_BLOCK)) { + if (is_raid_device && (device->cap & IOAT_CAP_FILL_BLOCK)) { dma_cap_set(DMA_MEMSET, dma->cap_mask); dma->device_prep_dma_memset = ioat3_prep_memset_lock; } @@ -1898,7 +1963,7 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca) } /* starting with CB3.3 super extended descriptors are supported */ - if (cap & IOAT_CAP_RAID16SS) { + if (device->cap & IOAT_CAP_RAID16SS) { char pool_name[14]; int i; diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h index d10570db6e7d..5ee57d402a6e 100644 --- a/drivers/dma/ioat/hw.h +++ b/drivers/dma/ioat/hw.h @@ -165,7 +165,17 @@ struct ioat_xor_ext_descriptor { }; struct ioat_pq_descriptor { - uint32_t size; + union { + uint32_t size; + uint32_t dwbes; + struct { + unsigned int rsvd:25; + unsigned int p_val_err:1; + unsigned int q_val_err:1; + unsigned int rsvd1:4; + unsigned int wbes:1; + } dwbes_f; + }; union { uint32_t ctl; struct { @@ -180,7 +190,10 @@ struct ioat_pq_descriptor { unsigned int hint:1; unsigned int p_disable:1; unsigned int q_disable:1; - unsigned int rsvd:11; + unsigned int rsvd2:2; + unsigned int wb_en:1; + unsigned int prl_en:1; + unsigned int rsvd3:7; #define IOAT_OP_PQ 0x89 #define IOAT_OP_PQ_VAL 0x8a #define IOAT_OP_PQ_16S 0xa0 diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h index efdd47e47b82..2f1cfa0f1f47 100644 --- a/drivers/dma/ioat/registers.h +++ b/drivers/dma/ioat/registers.h @@ -79,6 +79,7 @@ #define IOAT_CAP_APIC 0x00000080 #define IOAT_CAP_XOR 0x00000100 #define IOAT_CAP_PQ 0x00000200 +#define IOAT_CAP_DWBES 0x00002000 #define IOAT_CAP_RAID16SS 0x00020000 #define IOAT_CHANNEL_MMIO_SIZE 0x80 /* Each Channel MMIO space is this size */ -- cgit v1.2.3 From e6a30fec08b421a59064437a7d990c70a80a7e7f Mon Sep 17 00:00:00 2001 From: Fengguang Wu Date: Tue, 16 Apr 2013 13:41:26 +0800 Subject: ioatdma: ioat3_alloc_sed can be static Reported-by: Fengguang Wu Signed-off-by: Fengguang Wu Acked-by: Dave Jiang Signed-off-by: Vinod Koul --- drivers/dma/ioat/dma_v3.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/dma/ioat') diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index 28f8957bafe2..ca6ea9b3551b 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -266,7 +266,7 @@ static void pq16_set_src(struct ioat_raw_descriptor *desc[3], pq16->coef[idx - 8] = coef; } -struct ioat_sed_ent * +static struct ioat_sed_ent * ioat3_alloc_sed(struct ioatdma_device *device, unsigned int hw_pool) { struct ioat_sed_ent *sed; @@ -287,7 +287,7 @@ ioat3_alloc_sed(struct ioatdma_device *device, unsigned int hw_pool) return sed; } -void ioat3_free_sed(struct ioatdma_device *device, struct ioat_sed_ent *sed) +static void ioat3_free_sed(struct ioatdma_device *device, struct ioat_sed_ent *sed) { if (!sed) return; -- cgit v1.2.3