summaryrefslogtreecommitdiff
path: root/drivers/dma/at_hdmac.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/dma/at_hdmac.c')
-rw-r--r--drivers/dma/at_hdmac.c259
1 files changed, 208 insertions, 51 deletions
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index 59892126d175..58d406230d89 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -48,6 +48,8 @@
BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |\
BIT(DMA_SLAVE_BUSWIDTH_4_BYTES))
+#define ATC_MAX_DSCR_TRIALS 10
+
/*
* Initial number of descriptors to allocate for each channel. This could
* be increased during dma usage.
@@ -285,28 +287,19 @@ static struct at_desc *atc_get_desc_by_cookie(struct at_dma_chan *atchan,
*
* @current_len: the number of bytes left before reading CTRLA
* @ctrla: the value of CTRLA
- * @desc: the descriptor containing the transfer width
*/
-static inline int atc_calc_bytes_left(int current_len, u32 ctrla,
- struct at_desc *desc)
+static inline int atc_calc_bytes_left(int current_len, u32 ctrla)
{
- return current_len - ((ctrla & ATC_BTSIZE_MAX) << desc->tx_width);
-}
+ u32 btsize = (ctrla & ATC_BTSIZE_MAX);
+ u32 src_width = ATC_REG_TO_SRC_WIDTH(ctrla);
-/**
- * atc_calc_bytes_left_from_reg - calculates the number of bytes left according
- * to the current value of CTRLA.
- *
- * @current_len: the number of bytes left before reading CTRLA
- * @atchan: the channel to read CTRLA for
- * @desc: the descriptor containing the transfer width
- */
-static inline int atc_calc_bytes_left_from_reg(int current_len,
- struct at_dma_chan *atchan, struct at_desc *desc)
-{
- u32 ctrla = channel_readl(atchan, CTRLA);
-
- return atc_calc_bytes_left(current_len, ctrla, desc);
+ /*
+ * According to the datasheet, when reading the Control A Register
+ * (ctrla), the Buffer Transfer Size (btsize) bitfield refers to the
+ * number of transfers completed on the Source Interface.
+ * So btsize is always a number of source width transfers.
+ */
+ return current_len - (btsize << src_width);
}
/**
@@ -320,7 +313,7 @@ static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie)
struct at_desc *desc_first = atc_first_active(atchan);
struct at_desc *desc;
int ret;
- u32 ctrla, dscr;
+ u32 ctrla, dscr, trials;
/*
* If the cookie doesn't match to the currently running transfer then
@@ -346,15 +339,82 @@ static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie)
* the channel's DSCR register and compare it against the value
* of the hardware linked list structure of each child
* descriptor.
+ *
+ * The CTRLA register provides us with the amount of data
+ * already read from the source for the current child
+ * descriptor. So we can compute a more accurate residue by also
+ * removing the number of bytes corresponding to this amount of
+ * data.
+ *
+ * However, the DSCR and CTRLA registers cannot be read both
+ * atomically. Hence a race condition may occur: the first read
+ * register may refer to one child descriptor whereas the second
+ * read may refer to a later child descriptor in the list
+ * because of the DMA transfer progression inbetween the two
+ * reads.
+ *
+ * One solution could have been to pause the DMA transfer, read
+ * the DSCR and CTRLA then resume the DMA transfer. Nonetheless,
+ * this approach presents some drawbacks:
+ * - If the DMA transfer is paused, RX overruns or TX underruns
+ * are more likey to occur depending on the system latency.
+ * Taking the USART driver as an example, it uses a cyclic DMA
+ * transfer to read data from the Receive Holding Register
+ * (RHR) to avoid RX overruns since the RHR is not protected
+ * by any FIFO on most Atmel SoCs. So pausing the DMA transfer
+ * to compute the residue would break the USART driver design.
+ * - The atc_pause() function masks interrupts but we'd rather
+ * avoid to do so for system latency purpose.
+ *
+ * Then we'd rather use another solution: the DSCR is read a
+ * first time, the CTRLA is read in turn, next the DSCR is read
+ * a second time. If the two consecutive read values of the DSCR
+ * are the same then we assume both refers to the very same
+ * child descriptor as well as the CTRLA value read inbetween
+ * does. For cyclic tranfers, the assumption is that a full loop
+ * is "not so fast".
+ * If the two DSCR values are different, we read again the CTRLA
+ * then the DSCR till two consecutive read values from DSCR are
+ * equal or till the maxium trials is reach.
+ * This algorithm is very unlikely not to find a stable value for
+ * DSCR.
*/
- ctrla = channel_readl(atchan, CTRLA);
- rmb(); /* ensure CTRLA is read before DSCR */
dscr = channel_readl(atchan, DSCR);
+ rmb(); /* ensure DSCR is read before CTRLA */
+ ctrla = channel_readl(atchan, CTRLA);
+ for (trials = 0; trials < ATC_MAX_DSCR_TRIALS; ++trials) {
+ u32 new_dscr;
+
+ rmb(); /* ensure DSCR is read after CTRLA */
+ new_dscr = channel_readl(atchan, DSCR);
+
+ /*
+ * If the DSCR register value has not changed inside the
+ * DMA controller since the previous read, we assume
+ * that both the dscr and ctrla values refers to the
+ * very same descriptor.
+ */
+ if (likely(new_dscr == dscr))
+ break;
+
+ /*
+ * DSCR has changed inside the DMA controller, so the
+ * previouly read value of CTRLA may refer to an already
+ * processed descriptor hence could be outdated.
+ * We need to update ctrla to match the current
+ * descriptor.
+ */
+ dscr = new_dscr;
+ rmb(); /* ensure DSCR is read before CTRLA */
+ ctrla = channel_readl(atchan, CTRLA);
+ }
+ if (unlikely(trials >= ATC_MAX_DSCR_TRIALS))
+ return -ETIMEDOUT;
/* for the first descriptor we can be more accurate */
if (desc_first->lli.dscr == dscr)
- return atc_calc_bytes_left(ret, ctrla, desc_first);
+ return atc_calc_bytes_left(ret, ctrla);
ret -= desc_first->len;
list_for_each_entry(desc, &desc_first->tx_list, desc_node) {
@@ -365,16 +425,14 @@ static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie)
}
/*
- * For the last descriptor in the chain we can calculate
+ * For the current descriptor in the chain we can calculate
* the remaining bytes using the channel's register.
- * Note that the transfer width of the first and last
- * descriptor may differ.
*/
- if (!desc->lli.dscr)
- ret = atc_calc_bytes_left_from_reg(ret, atchan, desc);
+ ret = atc_calc_bytes_left(ret, ctrla);
} else {
/* single transfer */
- ret = atc_calc_bytes_left_from_reg(ret, atchan, desc_first);
+ ctrla = channel_readl(atchan, CTRLA);
+ ret = atc_calc_bytes_left(ret, ctrla);
}
return ret;
@@ -390,6 +448,7 @@ static void
atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc)
{
struct dma_async_tx_descriptor *txd = &desc->txd;
+ struct at_dma *atdma = to_at_dma(atchan->chan_common.device);
dev_vdbg(chan2dev(&atchan->chan_common),
"descriptor %u complete\n", txd->cookie);
@@ -398,6 +457,13 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc)
if (!atc_chan_is_cyclic(atchan))
dma_cookie_complete(txd);
+ /* If the transfer was a memset, free our temporary buffer */
+ if (desc->memset) {
+ dma_pool_free(atdma->memset_pool, desc->memset_vaddr,
+ desc->memset_paddr);
+ desc->memset = false;
+ }
+
/* move children to free_list */
list_splice_init(&desc->tx_list, &atchan->free_list);
/* move myself to free_list */
@@ -659,14 +725,14 @@ atc_prep_dma_interleaved(struct dma_chan *chan,
size_t len = 0;
int i;
+ if (unlikely(!xt || xt->numf != 1 || !xt->frame_size))
+ return NULL;
+
dev_info(chan2dev(chan),
"%s: src=0x%08x, dest=0x%08x, numf=%d, frame_size=%d, flags=0x%lx\n",
__func__, xt->src_start, xt->dst_start, xt->numf,
xt->frame_size, flags);
- if (unlikely(!xt || xt->numf != 1 || !xt->frame_size))
- return NULL;
-
/*
* The controller can only "skip" X bytes every Y bytes, so we
* need to make sure we are given a template that fit that
@@ -726,7 +792,6 @@ atc_prep_dma_interleaved(struct dma_chan *chan,
desc->txd.cookie = -EBUSY;
desc->total_len = desc->len = len;
- desc->tx_width = dwidth;
/* set end-of-link to the last link descriptor of list*/
set_desc_eol(desc);
@@ -804,10 +869,6 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
first->txd.cookie = -EBUSY;
first->total_len = len;
- /* set transfer width for the calculation of the residue */
- first->tx_width = src_width;
- prev->tx_width = src_width;
-
/* set end-of-link to the last link descriptor of list*/
set_desc_eol(desc);
@@ -820,6 +881,93 @@ err_desc_get:
return NULL;
}
+/**
+ * atc_prep_dma_memset - prepare a memcpy operation
+ * @chan: the channel to prepare operation on
+ * @dest: operation virtual destination address
+ * @value: value to set memory buffer to
+ * @len: operation length
+ * @flags: tx descriptor status flags
+ */
+static struct dma_async_tx_descriptor *
+atc_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value,
+ size_t len, unsigned long flags)
+{
+ struct at_dma_chan *atchan = to_at_dma_chan(chan);
+ struct at_dma *atdma = to_at_dma(chan->device);
+ struct at_desc *desc = NULL;
+ size_t xfer_count;
+ u32 ctrla;
+ u32 ctrlb;
+
+ dev_vdbg(chan2dev(chan), "%s: d0x%x v0x%x l0x%zx f0x%lx\n", __func__,
+ dest, value, len, flags);
+
+ if (unlikely(!len)) {
+ dev_dbg(chan2dev(chan), "%s: length is zero!\n", __func__);
+ return NULL;
+ }
+
+ if (!is_dma_fill_aligned(chan->device, dest, 0, len)) {
+ dev_dbg(chan2dev(chan), "%s: buffer is not aligned\n",
+ __func__);
+ return NULL;
+ }
+
+ xfer_count = len >> 2;
+ if (xfer_count > ATC_BTSIZE_MAX) {
+ dev_err(chan2dev(chan), "%s: buffer is too big\n",
+ __func__);
+ return NULL;
+ }
+
+ ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN
+ | ATC_SRC_ADDR_MODE_FIXED
+ | ATC_DST_ADDR_MODE_INCR
+ | ATC_FC_MEM2MEM;
+
+ ctrla = ATC_SRC_WIDTH(2) |
+ ATC_DST_WIDTH(2);
+
+ desc = atc_desc_get(atchan);
+ if (!desc) {
+ dev_err(chan2dev(chan), "%s: can't get a descriptor\n",
+ __func__);
+ return NULL;
+ }
+
+ desc->memset_vaddr = dma_pool_alloc(atdma->memset_pool, GFP_ATOMIC,
+ &desc->memset_paddr);
+ if (!desc->memset_vaddr) {
+ dev_err(chan2dev(chan), "%s: couldn't allocate buffer\n",
+ __func__);
+ goto err_put_desc;
+ }
+
+ *desc->memset_vaddr = value;
+ desc->memset = true;
+
+ desc->lli.saddr = desc->memset_paddr;
+ desc->lli.daddr = dest;
+ desc->lli.ctrla = ctrla | xfer_count;
+ desc->lli.ctrlb = ctrlb;
+
+ desc->txd.cookie = -EBUSY;
+ desc->len = len;
+ desc->total_len = len;
+
+ /* set end-of-link on the descriptor */
+ set_desc_eol(desc);
+
+ desc->txd.flags = flags;
+
+ return &desc->txd;
+
+err_put_desc:
+ atc_desc_put(atchan, desc);
+ return NULL;
+}
+
/**
* atc_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction
@@ -956,10 +1104,6 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
first->txd.cookie = -EBUSY;
first->total_len = total_len;
- /* set transfer width for the calculation of the residue */
- first->tx_width = reg_width;
- prev->tx_width = reg_width;
-
/* first link descriptor of list is responsible of flags */
first->txd.flags = flags; /* client is in control of this ack */
@@ -1077,12 +1221,6 @@ atc_prep_dma_sg(struct dma_chan *chan,
desc->txd.cookie = 0;
desc->len = len;
- /*
- * Although we only need the transfer width for the first and
- * the last descriptor, its easier to set it to all descriptors.
- */
- desc->tx_width = src_width;
-
atc_desc_chain(&first, &prev, desc);
/* update the lengths and addresses for the next loop cycle */
@@ -1256,7 +1394,6 @@ atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
/* First descriptor of the chain embedds additional information */
first->txd.cookie = -EBUSY;
first->total_len = buf_len;
- first->tx_width = reg_width;
return &first->txd;
@@ -1713,6 +1850,8 @@ static int __init at_dma_probe(struct platform_device *pdev)
dma_cap_set(DMA_SG, at91sam9rl_config.cap_mask);
dma_cap_set(DMA_INTERLEAVE, at91sam9g45_config.cap_mask);
dma_cap_set(DMA_MEMCPY, at91sam9g45_config.cap_mask);
+ dma_cap_set(DMA_MEMSET, at91sam9g45_config.cap_mask);
+ dma_cap_set(DMA_PRIVATE, at91sam9g45_config.cap_mask);
dma_cap_set(DMA_SLAVE, at91sam9g45_config.cap_mask);
dma_cap_set(DMA_SG, at91sam9g45_config.cap_mask);
@@ -1776,7 +1915,16 @@ static int __init at_dma_probe(struct platform_device *pdev)
if (!atdma->dma_desc_pool) {
dev_err(&pdev->dev, "No memory for descriptors dma pool\n");
err = -ENOMEM;
- goto err_pool_create;
+ goto err_desc_pool_create;
+ }
+
+ /* create a pool of consistent memory blocks for memset blocks */
+ atdma->memset_pool = dma_pool_create("at_hdmac_memset_pool",
+ &pdev->dev, sizeof(int), 4, 0);
+ if (!atdma->memset_pool) {
+ dev_err(&pdev->dev, "No memory for memset dma pool\n");
+ err = -ENOMEM;
+ goto err_memset_pool_create;
}
/* clear any pending interrupt */
@@ -1822,6 +1970,11 @@ static int __init at_dma_probe(struct platform_device *pdev)
if (dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask))
atdma->dma_common.device_prep_dma_memcpy = atc_prep_dma_memcpy;
+ if (dma_has_cap(DMA_MEMSET, atdma->dma_common.cap_mask)) {
+ atdma->dma_common.device_prep_dma_memset = atc_prep_dma_memset;
+ atdma->dma_common.fill_align = DMAENGINE_ALIGN_4_BYTES;
+ }
+
if (dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask)) {
atdma->dma_common.device_prep_slave_sg = atc_prep_slave_sg;
/* controller can do slave DMA: can trigger cyclic transfers */
@@ -1842,8 +1995,9 @@ static int __init at_dma_probe(struct platform_device *pdev)
dma_writel(atdma, EN, AT_DMA_ENABLE);
- dev_info(&pdev->dev, "Atmel AHB DMA Controller ( %s%s%s), %d channels\n",
+ dev_info(&pdev->dev, "Atmel AHB DMA Controller ( %s%s%s%s), %d channels\n",
dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask) ? "cpy " : "",
+ dma_has_cap(DMA_MEMSET, atdma->dma_common.cap_mask) ? "set " : "",
dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask) ? "slave " : "",
dma_has_cap(DMA_SG, atdma->dma_common.cap_mask) ? "sg-cpy " : "",
plat_dat->nr_channels);
@@ -1868,8 +2022,10 @@ static int __init at_dma_probe(struct platform_device *pdev)
err_of_dma_controller_register:
dma_async_device_unregister(&atdma->dma_common);
+ dma_pool_destroy(atdma->memset_pool);
+err_memset_pool_create:
dma_pool_destroy(atdma->dma_desc_pool);
-err_pool_create:
+err_desc_pool_create:
free_irq(platform_get_irq(pdev, 0), atdma);
err_irq:
clk_disable_unprepare(atdma->clk);
@@ -1894,6 +2050,7 @@ static int at_dma_remove(struct platform_device *pdev)
at_dma_off(atdma);
dma_async_device_unregister(&atdma->dma_common);
+ dma_pool_destroy(atdma->memset_pool);
dma_pool_destroy(atdma->dma_desc_pool);
free_irq(platform_get_irq(pdev, 0), atdma);