diff options
-rw-r--r-- | drivers/dma/dmaengine.c | 17 | ||||
-rw-r--r-- | drivers/dma/idxd/device.c | 31 | ||||
-rw-r--r-- | drivers/dma/idxd/idxd.h | 3 | ||||
-rw-r--r-- | drivers/dma/idxd/init.c | 5 | ||||
-rw-r--r-- | drivers/dma/idxd/registers.h | 25 | ||||
-rw-r--r-- | drivers/dma/idxd/submit.c | 2 | ||||
-rw-r--r-- | drivers/dma/ioat/dca.c | 10 | ||||
-rw-r--r-- | drivers/dma/pl330.c | 2 | ||||
-rw-r--r-- | drivers/dma/ti/k3-udma-private.c | 2 | ||||
-rw-r--r-- | drivers/dma/ti/omap-dma.c | 37 | ||||
-rw-r--r-- | drivers/dma/xilinx/xilinx_dma.c | 40 |
11 files changed, 111 insertions, 63 deletions
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 7974fa0400d8..962cbb5e5f7f 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -1039,16 +1039,15 @@ static int get_dma_id(struct dma_device *device) static int __dma_async_device_channel_register(struct dma_device *device, struct dma_chan *chan) { - int rc = 0; + int rc; chan->local = alloc_percpu(typeof(*chan->local)); if (!chan->local) - goto err_out; + return -ENOMEM; chan->dev = kzalloc(sizeof(*chan->dev), GFP_KERNEL); if (!chan->dev) { - free_percpu(chan->local); - chan->local = NULL; - goto err_out; + rc = -ENOMEM; + goto err_free_local; } /* @@ -1061,7 +1060,8 @@ static int __dma_async_device_channel_register(struct dma_device *device, if (chan->chan_id < 0) { pr_err("%s: unable to alloc ida for chan: %d\n", __func__, chan->chan_id); - goto err_out; + rc = chan->chan_id; + goto err_free_dev; } chan->dev->device.class = &dma_devclass; @@ -1082,9 +1082,10 @@ static int __dma_async_device_channel_register(struct dma_device *device, mutex_lock(&device->chan_mutex); ida_free(&device->chan_ida, chan->chan_id); mutex_unlock(&device->chan_mutex); - err_out: - free_percpu(chan->local); + err_free_dev: kfree(chan->dev); + err_free_local: + free_percpu(chan->local); return rc; } diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 200b9109cacf..663344987e3f 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -271,7 +271,7 @@ int idxd_wq_map_portal(struct idxd_wq *wq) resource_size_t start; start = pci_resource_start(pdev, IDXD_WQ_BAR); - start = start + wq->id * IDXD_PORTAL_SIZE; + start += idxd_get_wq_portal_full_offset(wq->id, IDXD_PORTAL_LIMITED); wq->dportal = devm_ioremap(dev, start, IDXD_PORTAL_SIZE); if (!wq->dportal) @@ -295,7 +295,7 @@ void idxd_wq_disable_cleanup(struct idxd_wq *wq) int i, wq_offset; lockdep_assert_held(&idxd->dev_lock); - memset(&wq->wqcfg, 0, sizeof(wq->wqcfg)); + memset(wq->wqcfg, 0, idxd->wqcfg_size); wq->type = IDXD_WQT_NONE; wq->size = 0; wq->group = NULL; @@ -304,8 +304,8 @@ void idxd_wq_disable_cleanup(struct idxd_wq *wq) clear_bit(WQ_FLAG_DEDICATED, &wq->flags); memset(wq->name, 0, WQ_NAME_SIZE); - for (i = 0; i < 8; i++) { - wq_offset = idxd->wqcfg_offset + wq->id * 32 + i * sizeof(u32); + for (i = 0; i < WQCFG_STRIDES(idxd); i++) { + wq_offset = WQCFG_OFFSET(idxd, wq->id, i); iowrite32(0, idxd->reg_base + wq_offset); dev_dbg(dev, "WQ[%d][%d][%#x]: %#x\n", wq->id, i, wq_offset, @@ -539,10 +539,10 @@ static int idxd_wq_config_write(struct idxd_wq *wq) if (!wq->group) return 0; - memset(&wq->wqcfg, 0, sizeof(union wqcfg)); + memset(wq->wqcfg, 0, idxd->wqcfg_size); /* byte 0-3 */ - wq->wqcfg.wq_size = wq->size; + wq->wqcfg->wq_size = wq->size; if (wq->size == 0) { dev_warn(dev, "Incorrect work queue size: 0\n"); @@ -550,22 +550,21 @@ static int idxd_wq_config_write(struct idxd_wq *wq) } /* bytes 4-7 */ - wq->wqcfg.wq_thresh = wq->threshold; + wq->wqcfg->wq_thresh = wq->threshold; /* byte 8-11 */ - wq->wqcfg.priv = !!(wq->type == IDXD_WQT_KERNEL); - wq->wqcfg.mode = 1; - - wq->wqcfg.priority = wq->priority; + wq->wqcfg->priv = !!(wq->type == IDXD_WQT_KERNEL); + wq->wqcfg->mode = 1; + wq->wqcfg->priority = wq->priority; /* bytes 12-15 */ - wq->wqcfg.max_xfer_shift = ilog2(wq->max_xfer_bytes); - wq->wqcfg.max_batch_shift = ilog2(wq->max_batch_size); + wq->wqcfg->max_xfer_shift = ilog2(wq->max_xfer_bytes); + wq->wqcfg->max_batch_shift = ilog2(wq->max_batch_size); dev_dbg(dev, "WQ %d CFGs\n", wq->id); - for (i = 0; i < 8; i++) { - wq_offset = idxd->wqcfg_offset + wq->id * 32 + i * sizeof(u32); - iowrite32(wq->wqcfg.bits[i], idxd->reg_base + wq_offset); + for (i = 0; i < WQCFG_STRIDES(idxd); i++) { + wq_offset = WQCFG_OFFSET(idxd, wq->id, i); + iowrite32(wq->wqcfg->bits[i], idxd->reg_base + wq_offset); dev_dbg(dev, "WQ[%d][%d][%#x]: %#x\n", wq->id, i, wq_offset, ioread32(idxd->reg_base + wq_offset)); diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index c64df197e724..d48f193daacc 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -103,7 +103,7 @@ struct idxd_wq { u32 priority; enum idxd_wq_state state; unsigned long flags; - union wqcfg wqcfg; + union wqcfg *wqcfg; u32 vec_ptr; /* interrupt steering */ struct dsa_hw_desc **hw_descs; int num_descs; @@ -183,6 +183,7 @@ struct idxd_device { int max_wq_size; int token_limit; int nr_tokens; /* non-reserved tokens */ + unsigned int wqcfg_size; union sw_err_reg sw_err; wait_queue_head_t cmd_waitq; diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 11e5ce168177..0a4432b063b5 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -178,6 +178,9 @@ static int idxd_setup_internals(struct idxd_device *idxd) wq->idxd_cdev.minor = -1; wq->max_xfer_bytes = idxd->max_xfer_bytes; wq->max_batch_size = idxd->max_batch_size; + wq->wqcfg = devm_kzalloc(dev, idxd->wqcfg_size, GFP_KERNEL); + if (!wq->wqcfg) + return -ENOMEM; } for (i = 0; i < idxd->max_engines; i++) { @@ -251,6 +254,8 @@ static void idxd_read_caps(struct idxd_device *idxd) dev_dbg(dev, "total workqueue size: %u\n", idxd->max_wq_size); idxd->max_wqs = idxd->hw.wq_cap.num_wqs; dev_dbg(dev, "max workqueues: %u\n", idxd->max_wqs); + idxd->wqcfg_size = 1 << (idxd->hw.wq_cap.wqcfg_size + IDXD_WQCFG_MIN); + dev_dbg(dev, "wqcfg size: %u\n", idxd->wqcfg_size); /* reading operation capabilities */ for (i = 0; i < 4; i++) { diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index a39e7ae6b3d9..54390334c243 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -8,7 +8,7 @@ #define IDXD_MMIO_BAR 0 #define IDXD_WQ_BAR 2 -#define IDXD_PORTAL_SIZE 0x4000 +#define IDXD_PORTAL_SIZE PAGE_SIZE /* MMIO Device BAR0 Registers */ #define IDXD_VER_OFFSET 0x00 @@ -43,7 +43,8 @@ union wq_cap_reg { struct { u64 total_wq_size:16; u64 num_wqs:8; - u64 rsvd:24; + u64 wqcfg_size:4; + u64 rsvd:20; u64 shared_mode:1; u64 dedicated_mode:1; u64 rsvd2:1; @@ -55,6 +56,7 @@ union wq_cap_reg { u64 bits; } __packed; #define IDXD_WQCAP_OFFSET 0x20 +#define IDXD_WQCFG_MIN 5 union group_cap_reg { struct { @@ -333,4 +335,23 @@ union wqcfg { }; u32 bits[8]; } __packed; + +/* + * This macro calculates the offset into the WQCFG register + * idxd - struct idxd * + * n - wq id + * ofs - the index of the 32b dword for the config register + * + * The WQCFG register block is divided into groups per each wq. The n index + * allows us to move to the register group that's for that particular wq. + * Each register is 32bits. The ofs gives us the number of register to access. + */ +#define WQCFG_OFFSET(_idxd_dev, n, ofs) \ +({\ + typeof(_idxd_dev) __idxd_dev = (_idxd_dev); \ + (__idxd_dev)->wqcfg_offset + (n) * (__idxd_dev)->wqcfg_size + sizeof(u32) * (ofs); \ +}) + +#define WQCFG_STRIDES(_idxd_dev) ((_idxd_dev)->wqcfg_size / sizeof(u32)) + #endif diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index 156a1ee233aa..417048e3c42a 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -74,7 +74,7 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) if (idxd->state != IDXD_DEV_ENABLED) return -EIO; - portal = wq->dportal + idxd_get_wq_portal_offset(IDXD_PORTAL_UNLIMITED); + portal = wq->dportal; /* * The wmb() flushes writes to coherent DMA data before possibly * triggering a DMA read. The wmb() is necessary even on UP because diff --git a/drivers/dma/ioat/dca.c b/drivers/dma/ioat/dca.c index 0be385587c4c..289c59ed74b9 100644 --- a/drivers/dma/ioat/dca.c +++ b/drivers/dma/ioat/dca.c @@ -40,16 +40,6 @@ #define DCA2_TAG_MAP_BYTE3 0x82 #define DCA2_TAG_MAP_BYTE4 0x82 -/* verify if tag map matches expected values */ -static inline int dca2_tag_map_valid(u8 *tag_map) -{ - return ((tag_map[0] == DCA2_TAG_MAP_BYTE0) && - (tag_map[1] == DCA2_TAG_MAP_BYTE1) && - (tag_map[2] == DCA2_TAG_MAP_BYTE2) && - (tag_map[3] == DCA2_TAG_MAP_BYTE3) && - (tag_map[4] == DCA2_TAG_MAP_BYTE4)); -} - /* * "Legacy" DCA systems do not implement the DCA register set in the * I/OAT device. Software needs direct support for their tag mappings. diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index e9f0101d92fa..0f5c19370f6d 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -2799,7 +2799,7 @@ pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst, * If burst size is smaller than bus width then make sure we only * transfer one at a time to avoid a burst stradling an MFIFO entry. */ - if (desc->rqcfg.brst_size * 8 < pl330->pcfg.data_bus_width) + if (burst * 8 < pl330->pcfg.data_bus_width) desc->rqcfg.brst_len = 1; desc->bytes_requested = len; diff --git a/drivers/dma/ti/k3-udma-private.c b/drivers/dma/ti/k3-udma-private.c index aa24e554f7b4..8563a392f30b 100644 --- a/drivers/dma/ti/k3-udma-private.c +++ b/drivers/dma/ti/k3-udma-private.c @@ -83,7 +83,7 @@ EXPORT_SYMBOL(xudma_rflow_is_gp); #define XUDMA_GET_PUT_RESOURCE(res) \ struct udma_##res *xudma_##res##_get(struct udma_dev *ud, int id) \ { \ - return __udma_reserve_##res(ud, false, id); \ + return __udma_reserve_##res(ud, UDMA_TP_NORMAL, id); \ } \ EXPORT_SYMBOL(xudma_##res##_get); \ \ diff --git a/drivers/dma/ti/omap-dma.c b/drivers/dma/ti/omap-dma.c index c9fe5e3a6b55..268a08058714 100644 --- a/drivers/dma/ti/omap-dma.c +++ b/drivers/dma/ti/omap-dma.c @@ -1522,29 +1522,38 @@ static void omap_dma_free(struct omap_dmadev *od) } } +/* Currently used by omap2 & 3 to block deeper SoC idle states */ +static bool omap_dma_busy(struct omap_dmadev *od) +{ + struct omap_chan *c; + int lch = -1; + + while (1) { + lch = find_next_bit(od->lch_bitmap, od->lch_count, lch + 1); + if (lch >= od->lch_count) + break; + c = od->lch_map[lch]; + if (!c) + continue; + if (omap_dma_chan_read(c, CCR) & CCR_ENABLE) + return true; + } + + return false; +} + /* Currently only used for omap2. For omap1, also a check for lcd_dma is needed */ static int omap_dma_busy_notifier(struct notifier_block *nb, unsigned long cmd, void *v) { struct omap_dmadev *od; - struct omap_chan *c; - int lch = -1; od = container_of(nb, struct omap_dmadev, nb); switch (cmd) { case CPU_CLUSTER_PM_ENTER: - while (1) { - lch = find_next_bit(od->lch_bitmap, od->lch_count, - lch + 1); - if (lch >= od->lch_count) - break; - c = od->lch_map[lch]; - if (!c) - continue; - if (omap_dma_chan_read(c, CCR) & CCR_ENABLE) - return NOTIFY_BAD; - } + if (omap_dma_busy(od)) + return NOTIFY_BAD; break; case CPU_CLUSTER_PM_ENTER_FAILED: case CPU_CLUSTER_PM_EXIT: @@ -1595,6 +1604,8 @@ static int omap_dma_context_notifier(struct notifier_block *nb, switch (cmd) { case CPU_CLUSTER_PM_ENTER: + if (omap_dma_busy(od)) + return NOTIFY_BAD; omap_dma_context_save(od); break; case CPU_CLUSTER_PM_ENTER_FAILED: diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index ecff35402860..22faea653ea8 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -517,8 +517,8 @@ struct xilinx_dma_device { #define to_dma_tx_descriptor(tx) \ container_of(tx, struct xilinx_dma_tx_descriptor, async_tx) #define xilinx_dma_poll_timeout(chan, reg, val, cond, delay_us, timeout_us) \ - readl_poll_timeout(chan->xdev->regs + chan->ctrl_offset + reg, val, \ - cond, delay_us, timeout_us) + readl_poll_timeout_atomic(chan->xdev->regs + chan->ctrl_offset + reg, \ + val, cond, delay_us, timeout_us) /* IO accessors */ static inline u32 dma_read(struct xilinx_dma_chan *chan, u32 reg) @@ -948,8 +948,10 @@ static u32 xilinx_dma_get_residue(struct xilinx_dma_chan *chan, { struct xilinx_cdma_tx_segment *cdma_seg; struct xilinx_axidma_tx_segment *axidma_seg; + struct xilinx_aximcdma_tx_segment *aximcdma_seg; struct xilinx_cdma_desc_hw *cdma_hw; struct xilinx_axidma_desc_hw *axidma_hw; + struct xilinx_aximcdma_desc_hw *aximcdma_hw; struct list_head *entry; u32 residue = 0; @@ -961,13 +963,23 @@ static u32 xilinx_dma_get_residue(struct xilinx_dma_chan *chan, cdma_hw = &cdma_seg->hw; residue += (cdma_hw->control - cdma_hw->status) & chan->xdev->max_buffer_len; - } else { + } else if (chan->xdev->dma_config->dmatype == + XDMA_TYPE_AXIDMA) { axidma_seg = list_entry(entry, struct xilinx_axidma_tx_segment, node); axidma_hw = &axidma_seg->hw; residue += (axidma_hw->control - axidma_hw->status) & chan->xdev->max_buffer_len; + } else { + aximcdma_seg = + list_entry(entry, + struct xilinx_aximcdma_tx_segment, + node); + aximcdma_hw = &aximcdma_seg->hw; + residue += + (aximcdma_hw->control - aximcdma_hw->status) & + chan->xdev->max_buffer_len; } } @@ -1135,7 +1147,7 @@ static int xilinx_dma_alloc_chan_resources(struct dma_chan *dchan) upper_32_bits(chan->seg_p + sizeof(*chan->seg_mv) * ((i + 1) % XILINX_DMA_NUM_DESCS)); chan->seg_mv[i].phys = chan->seg_p + - sizeof(*chan->seg_v) * i; + sizeof(*chan->seg_mv) * i; list_add_tail(&chan->seg_mv[i].node, &chan->free_seg_list); } @@ -1560,7 +1572,7 @@ static void xilinx_dma_start_transfer(struct xilinx_dma_chan *chan) static void xilinx_mcdma_start_transfer(struct xilinx_dma_chan *chan) { struct xilinx_dma_tx_descriptor *head_desc, *tail_desc; - struct xilinx_axidma_tx_segment *tail_segment; + struct xilinx_aximcdma_tx_segment *tail_segment; u32 reg; /* @@ -1582,7 +1594,7 @@ static void xilinx_mcdma_start_transfer(struct xilinx_dma_chan *chan) tail_desc = list_last_entry(&chan->pending_list, struct xilinx_dma_tx_descriptor, node); tail_segment = list_last_entry(&tail_desc->segments, - struct xilinx_axidma_tx_segment, node); + struct xilinx_aximcdma_tx_segment, node); reg = dma_ctrl_read(chan, XILINX_MCDMA_CHAN_CR_OFFSET(chan->tdest)); @@ -1864,6 +1876,7 @@ static void append_desc_queue(struct xilinx_dma_chan *chan, struct xilinx_vdma_tx_segment *tail_segment; struct xilinx_dma_tx_descriptor *tail_desc; struct xilinx_axidma_tx_segment *axidma_tail_segment; + struct xilinx_aximcdma_tx_segment *aximcdma_tail_segment; struct xilinx_cdma_tx_segment *cdma_tail_segment; if (list_empty(&chan->pending_list)) @@ -1885,11 +1898,17 @@ static void append_desc_queue(struct xilinx_dma_chan *chan, struct xilinx_cdma_tx_segment, node); cdma_tail_segment->hw.next_desc = (u32)desc->async_tx.phys; - } else { + } else if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) { axidma_tail_segment = list_last_entry(&tail_desc->segments, struct xilinx_axidma_tx_segment, node); axidma_tail_segment->hw.next_desc = (u32)desc->async_tx.phys; + } else { + aximcdma_tail_segment = + list_last_entry(&tail_desc->segments, + struct xilinx_aximcdma_tx_segment, + node); + aximcdma_tail_segment->hw.next_desc = (u32)desc->async_tx.phys; } /* @@ -2836,10 +2855,11 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev, chan->stop_transfer = xilinx_dma_stop_transfer; } - /* check if SG is enabled (only for AXIDMA and CDMA) */ + /* check if SG is enabled (only for AXIDMA, AXIMCDMA, and CDMA) */ if (xdev->dma_config->dmatype != XDMA_TYPE_VDMA) { - if (dma_ctrl_read(chan, XILINX_DMA_REG_DMASR) & - XILINX_DMA_DMASR_SG_MASK) + if (xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA || + dma_ctrl_read(chan, XILINX_DMA_REG_DMASR) & + XILINX_DMA_DMASR_SG_MASK) chan->has_sg = true; dev_dbg(chan->dev, "ch %d: SG %s\n", chan->id, chan->has_sg ? "enabled" : "disabled"); |