diff options
Diffstat (limited to 'drivers/dma')
28 files changed, 2581 insertions, 716 deletions
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 16bd36983cd5..8d0e5ebe1dac 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -332,7 +332,7 @@ config MPC512X_DMA config MV_XOR bool "Marvell XOR engine support" - depends on PLAT_ORION + depends on PLAT_ORION || ARCH_MVEBU || COMPILE_TEST select DMA_ENGINE select DMA_ENGINE_RAID select ASYNC_TX_ENABLE_CHANNEL_SWITCH diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c index 9b42c0588550..81db1c4811ce 100644 --- a/drivers/dma/amba-pl08x.c +++ b/drivers/dma/amba-pl08x.c @@ -107,16 +107,20 @@ struct pl08x_driver_data; /** * struct vendor_data - vendor-specific config parameters for PL08x derivatives * @channels: the number of channels available in this variant + * @signals: the number of request signals available from the hardware * @dualmaster: whether this version supports dual AHB masters or not. * @nomadik: whether the channels have Nomadik security extension bits * that need to be checked for permission before use and some registers are * missing * @pl080s: whether this version is a PL080S, which has separate register and * LLI word for transfer size. + * @max_transfer_size: the maximum single element transfer size for this + * PL08x variant. */ struct vendor_data { u8 config_offset; u8 channels; + u8 signals; bool dualmaster; bool nomadik; bool pl080s; @@ -235,7 +239,7 @@ struct pl08x_dma_chan { struct virt_dma_chan vc; struct pl08x_phy_chan *phychan; const char *name; - const struct pl08x_channel_data *cd; + struct pl08x_channel_data *cd; struct dma_slave_config cfg; struct pl08x_txd *at; struct pl08x_driver_data *host; @@ -1909,6 +1913,12 @@ static int pl08x_dma_init_virtual_channels(struct pl08x_driver_data *pl08x, if (slave) { chan->cd = &pl08x->pd->slave_channels[i]; + /* + * Some implementations have muxed signals, whereas some + * use a mux in front of the signals and need dynamic + * assignment of signals. + */ + chan->signal = i; pl08x_dma_slave_init(chan); } else { chan->cd = &pl08x->pd->memcpy_channel; @@ -2050,40 +2060,33 @@ static struct dma_chan *pl08x_of_xlate(struct of_phandle_args *dma_spec, struct of_dma *ofdma) { struct pl08x_driver_data *pl08x = ofdma->of_dma_data; - struct pl08x_channel_data *data; - struct pl08x_dma_chan *chan; struct dma_chan *dma_chan; + struct pl08x_dma_chan *plchan; if (!pl08x) return NULL; - if (dma_spec->args_count != 2) + if (dma_spec->args_count != 2) { + dev_err(&pl08x->adev->dev, + "DMA channel translation requires two cells\n"); return NULL; + } dma_chan = pl08x_find_chan_id(pl08x, dma_spec->args[0]); - if (dma_chan) - return dma_get_slave_channel(dma_chan); - - chan = devm_kzalloc(pl08x->slave.dev, sizeof(*chan) + sizeof(*data), - GFP_KERNEL); - if (!chan) + if (!dma_chan) { + dev_err(&pl08x->adev->dev, + "DMA slave channel not found\n"); return NULL; + } - data = (void *)&chan[1]; - data->bus_id = "(none)"; - data->periph_buses = dma_spec->args[1]; - - chan->cd = data; - chan->host = pl08x; - chan->slave = true; - chan->name = data->bus_id; - chan->state = PL08X_CHAN_IDLE; - chan->signal = dma_spec->args[0]; - chan->vc.desc_free = pl08x_desc_free; - - vchan_init(&chan->vc, &pl08x->slave); + plchan = to_pl08x_chan(dma_chan); + dev_dbg(&pl08x->adev->dev, + "translated channel for signal %d\n", + dma_spec->args[0]); - return dma_get_slave_channel(&chan->vc.chan); + /* Augment channel data for applicable AHB buses */ + plchan->cd->periph_buses = dma_spec->args[1]; + return dma_get_slave_channel(dma_chan); } static int pl08x_of_probe(struct amba_device *adev, @@ -2091,9 +2094,11 @@ static int pl08x_of_probe(struct amba_device *adev, struct device_node *np) { struct pl08x_platform_data *pd; + struct pl08x_channel_data *chanp = NULL; u32 cctl_memcpy = 0; u32 val; int ret; + int i; pd = devm_kzalloc(&adev->dev, sizeof(*pd), GFP_KERNEL); if (!pd) @@ -2195,6 +2200,27 @@ static int pl08x_of_probe(struct amba_device *adev, /* Use the buses that can access memory, obviously */ pd->memcpy_channel.periph_buses = pd->mem_buses; + /* + * Allocate channel data for all possible slave channels (one + * for each possible signal), channels will then be allocated + * for a device and have it's AHB interfaces set up at + * translation time. + */ + chanp = devm_kcalloc(&adev->dev, + pl08x->vd->signals, + sizeof(struct pl08x_channel_data), + GFP_KERNEL); + if (!chanp) + return -ENOMEM; + + pd->slave_channels = chanp; + for (i = 0; i < pl08x->vd->signals; i++) { + /* chanp->periph_buses will be assigned at translation */ + chanp->bus_id = kasprintf(GFP_KERNEL, "slave%d", i); + chanp++; + } + pd->num_slave_channels = pl08x->vd->signals; + pl08x->pd = pd; return of_dma_controller_register(adev->dev.of_node, pl08x_of_xlate, @@ -2234,6 +2260,10 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id) goto out_no_pl08x; } + /* Assign useful pointers to the driver state */ + pl08x->adev = adev; + pl08x->vd = vd; + /* Initialize memcpy engine */ dma_cap_set(DMA_MEMCPY, pl08x->memcpy.cap_mask); pl08x->memcpy.dev = &adev->dev; @@ -2284,10 +2314,6 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id) } } - /* Assign useful pointers to the driver state */ - pl08x->adev = adev; - pl08x->vd = vd; - /* By default, AHB1 only. If dualmaster, from platform */ pl08x->lli_buses = PL08X_AHB1; pl08x->mem_buses = PL08X_AHB1; @@ -2438,6 +2464,7 @@ out_no_pl08x: static struct vendor_data vendor_pl080 = { .config_offset = PL080_CH_CONFIG, .channels = 8, + .signals = 16, .dualmaster = true, .max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK, }; @@ -2445,6 +2472,7 @@ static struct vendor_data vendor_pl080 = { static struct vendor_data vendor_nomadik = { .config_offset = PL080_CH_CONFIG, .channels = 8, + .signals = 32, .dualmaster = true, .nomadik = true, .max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK, @@ -2453,6 +2481,7 @@ static struct vendor_data vendor_nomadik = { static struct vendor_data vendor_pl080s = { .config_offset = PL080S_CH_CONFIG, .channels = 8, + .signals = 32, .pl080s = true, .max_transfer_size = PL080S_CONTROL_TRANSFER_SIZE_MASK, }; @@ -2460,6 +2489,7 @@ static struct vendor_data vendor_pl080s = { static struct vendor_data vendor_pl081 = { .config_offset = PL080_CH_CONFIG, .channels = 2, + .signals = 16, .dualmaster = false, .max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK, }; diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c index 996c4b00d323..6149b27c33ad 100644 --- a/drivers/dma/bcm2835-dma.c +++ b/drivers/dma/bcm2835-dma.c @@ -46,6 +46,9 @@ #include "virt-dma.h" +#define BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED 14 +#define BCM2835_DMA_CHAN_NAME_SIZE 8 + struct bcm2835_dmadev { struct dma_device ddev; spinlock_t lock; @@ -73,7 +76,6 @@ struct bcm2835_chan { struct list_head node; struct dma_slave_config cfg; - bool cyclic; unsigned int dreq; int ch; @@ -82,6 +84,9 @@ struct bcm2835_chan { void __iomem *chan_base; int irq_number; + unsigned int irq_flags; + + bool is_lite_channel; }; struct bcm2835_desc { @@ -89,47 +94,104 @@ struct bcm2835_desc { struct virt_dma_desc vd; enum dma_transfer_direction dir; - struct bcm2835_cb_entry *cb_list; - unsigned int frames; size_t size; + + bool cyclic; + + struct bcm2835_cb_entry cb_list[]; }; #define BCM2835_DMA_CS 0x00 #define BCM2835_DMA_ADDR 0x04 +#define BCM2835_DMA_TI 0x08 #define BCM2835_DMA_SOURCE_AD 0x0c #define BCM2835_DMA_DEST_AD 0x10 -#define BCM2835_DMA_NEXTCB 0x1C +#define BCM2835_DMA_LEN 0x14 +#define BCM2835_DMA_STRIDE 0x18 +#define BCM2835_DMA_NEXTCB 0x1c +#define BCM2835_DMA_DEBUG 0x20 /* DMA CS Control and Status bits */ -#define BCM2835_DMA_ACTIVE BIT(0) -#define BCM2835_DMA_INT BIT(2) +#define BCM2835_DMA_ACTIVE BIT(0) /* activate the DMA */ +#define BCM2835_DMA_END BIT(1) /* current CB has ended */ +#define BCM2835_DMA_INT BIT(2) /* interrupt status */ +#define BCM2835_DMA_DREQ BIT(3) /* DREQ state */ #define BCM2835_DMA_ISPAUSED BIT(4) /* Pause requested or not active */ #define BCM2835_DMA_ISHELD BIT(5) /* Is held by DREQ flow control */ -#define BCM2835_DMA_ERR BIT(8) +#define BCM2835_DMA_WAITING_FOR_WRITES BIT(6) /* waiting for last + * AXI-write to ack + */ +#define BCM2835_DMA_ERR BIT(8) +#define BCM2835_DMA_PRIORITY(x) ((x & 15) << 16) /* AXI priority */ +#define BCM2835_DMA_PANIC_PRIORITY(x) ((x & 15) << 20) /* panic priority */ +/* current value of TI.BCM2835_DMA_WAIT_RESP */ +#define BCM2835_DMA_WAIT_FOR_WRITES BIT(28) +#define BCM2835_DMA_DIS_DEBUG BIT(29) /* disable debug pause signal */ #define BCM2835_DMA_ABORT BIT(30) /* Stop current CB, go to next, WO */ #define BCM2835_DMA_RESET BIT(31) /* WO, self clearing */ +/* Transfer information bits - also bcm2835_cb.info field */ #define BCM2835_DMA_INT_EN BIT(0) +#define BCM2835_DMA_TDMODE BIT(1) /* 2D-Mode */ +#define BCM2835_DMA_WAIT_RESP BIT(3) /* wait for AXI-write to be acked */ #define BCM2835_DMA_D_INC BIT(4) -#define BCM2835_DMA_D_DREQ BIT(6) +#define BCM2835_DMA_D_WIDTH BIT(5) /* 128bit writes if set */ +#define BCM2835_DMA_D_DREQ BIT(6) /* enable DREQ for destination */ +#define BCM2835_DMA_D_IGNORE BIT(7) /* ignore destination writes */ #define BCM2835_DMA_S_INC BIT(8) -#define BCM2835_DMA_S_DREQ BIT(10) - -#define BCM2835_DMA_PER_MAP(x) ((x) << 16) +#define BCM2835_DMA_S_WIDTH BIT(9) /* 128bit writes if set */ +#define BCM2835_DMA_S_DREQ BIT(10) /* enable SREQ for source */ +#define BCM2835_DMA_S_IGNORE BIT(11) /* ignore source reads - read 0 */ +#define BCM2835_DMA_BURST_LENGTH(x) ((x & 15) << 12) +#define BCM2835_DMA_PER_MAP(x) ((x & 31) << 16) /* REQ source */ +#define BCM2835_DMA_WAIT(x) ((x & 31) << 21) /* add DMA-wait cycles */ +#define BCM2835_DMA_NO_WIDE_BURSTS BIT(26) /* no 2 beat write bursts */ + +/* debug register bits */ +#define BCM2835_DMA_DEBUG_LAST_NOT_SET_ERR BIT(0) +#define BCM2835_DMA_DEBUG_FIFO_ERR BIT(1) +#define BCM2835_DMA_DEBUG_READ_ERR BIT(2) +#define BCM2835_DMA_DEBUG_OUTSTANDING_WRITES_SHIFT 4 +#define BCM2835_DMA_DEBUG_OUTSTANDING_WRITES_BITS 4 +#define BCM2835_DMA_DEBUG_ID_SHIFT 16 +#define BCM2835_DMA_DEBUG_ID_BITS 9 +#define BCM2835_DMA_DEBUG_STATE_SHIFT 16 +#define BCM2835_DMA_DEBUG_STATE_BITS 9 +#define BCM2835_DMA_DEBUG_VERSION_SHIFT 25 +#define BCM2835_DMA_DEBUG_VERSION_BITS 3 +#define BCM2835_DMA_DEBUG_LITE BIT(28) + +/* shared registers for all dma channels */ +#define BCM2835_DMA_INT_STATUS 0xfe0 +#define BCM2835_DMA_ENABLE 0xff0 #define BCM2835_DMA_DATA_TYPE_S8 1 #define BCM2835_DMA_DATA_TYPE_S16 2 #define BCM2835_DMA_DATA_TYPE_S32 4 #define BCM2835_DMA_DATA_TYPE_S128 16 -#define BCM2835_DMA_BULK_MASK BIT(0) -#define BCM2835_DMA_FIQ_MASK (BIT(2) | BIT(3)) - /* Valid only for channels 0 - 14, 15 has its own base address */ #define BCM2835_DMA_CHAN(n) ((n) << 8) /* Base address */ #define BCM2835_DMA_CHANIO(base, n) ((base) + BCM2835_DMA_CHAN(n)) +/* the max dma length for different channels */ +#define MAX_DMA_LEN SZ_1G +#define MAX_LITE_DMA_LEN (SZ_64K - 4) + +static inline size_t bcm2835_dma_max_frame_length(struct bcm2835_chan *c) +{ + /* lite and normal channels have different max frame length */ + return c->is_lite_channel ? MAX_LITE_DMA_LEN : MAX_DMA_LEN; +} + +/* how many frames of max_len size do we need to transfer len bytes */ +static inline size_t bcm2835_dma_frames_for_length(size_t len, + size_t max_len) +{ + return DIV_ROUND_UP(len, max_len); +} + static inline struct bcm2835_dmadev *to_bcm2835_dma_dev(struct dma_device *d) { return container_of(d, struct bcm2835_dmadev, ddev); @@ -146,19 +208,209 @@ static inline struct bcm2835_desc *to_bcm2835_dma_desc( return container_of(t, struct bcm2835_desc, vd.tx); } -static void bcm2835_dma_desc_free(struct virt_dma_desc *vd) +static void bcm2835_dma_free_cb_chain(struct bcm2835_desc *desc) { - struct bcm2835_desc *desc = container_of(vd, struct bcm2835_desc, vd); - int i; + size_t i; for (i = 0; i < desc->frames; i++) dma_pool_free(desc->c->cb_pool, desc->cb_list[i].cb, desc->cb_list[i].paddr); - kfree(desc->cb_list); kfree(desc); } +static void bcm2835_dma_desc_free(struct virt_dma_desc *vd) +{ + bcm2835_dma_free_cb_chain( + container_of(vd, struct bcm2835_desc, vd)); +} + +static void bcm2835_dma_create_cb_set_length( + struct bcm2835_chan *chan, + struct bcm2835_dma_cb *control_block, + size_t len, + size_t period_len, + size_t *total_len, + u32 finalextrainfo) +{ + size_t max_len = bcm2835_dma_max_frame_length(chan); + + /* set the length taking lite-channel limitations into account */ + control_block->length = min_t(u32, len, max_len); + + /* finished if we have no period_length */ + if (!period_len) + return; + + /* + * period_len means: that we need to generate + * transfers that are terminating at every + * multiple of period_len - this is typically + * used to set the interrupt flag in info + * which is required during cyclic transfers + */ + + /* have we filled in period_length yet? */ + if (*total_len + control_block->length < period_len) + return; + + /* calculate the length that remains to reach period_length */ + control_block->length = period_len - *total_len; + + /* reset total_length for next period */ + *total_len = 0; + + /* add extrainfo bits in info */ + control_block->info |= finalextrainfo; +} + +static inline size_t bcm2835_dma_count_frames_for_sg( + struct bcm2835_chan *c, + struct scatterlist *sgl, + unsigned int sg_len) +{ + size_t frames = 0; + struct scatterlist *sgent; + unsigned int i; + size_t plength = bcm2835_dma_max_frame_length(c); + + for_each_sg(sgl, sgent, sg_len, i) + frames += bcm2835_dma_frames_for_length( + sg_dma_len(sgent), plength); + + return frames; +} + +/** + * bcm2835_dma_create_cb_chain - create a control block and fills data in + * + * @chan: the @dma_chan for which we run this + * @direction: the direction in which we transfer + * @cyclic: it is a cyclic transfer + * @info: the default info bits to apply per controlblock + * @frames: number of controlblocks to allocate + * @src: the src address to assign (if the S_INC bit is set + * in @info, then it gets incremented) + * @dst: the dst address to assign (if the D_INC bit is set + * in @info, then it gets incremented) + * @buf_len: the full buffer length (may also be 0) + * @period_len: the period length when to apply @finalextrainfo + * in addition to the last transfer + * this will also break some control-blocks early + * @finalextrainfo: additional bits in last controlblock + * (or when period_len is reached in case of cyclic) + * @gfp: the GFP flag to use for allocation + */ +static struct bcm2835_desc *bcm2835_dma_create_cb_chain( + struct dma_chan *chan, enum dma_transfer_direction direction, + bool cyclic, u32 info, u32 finalextrainfo, size_t frames, + dma_addr_t src, dma_addr_t dst, size_t buf_len, + size_t period_len, gfp_t gfp) +{ + struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); + size_t len = buf_len, total_len; + size_t frame; + struct bcm2835_desc *d; + struct bcm2835_cb_entry *cb_entry; + struct bcm2835_dma_cb *control_block; + + if (!frames) + return NULL; + + /* allocate and setup the descriptor. */ + d = kzalloc(sizeof(*d) + frames * sizeof(struct bcm2835_cb_entry), + gfp); + if (!d) + return NULL; + + d->c = c; + d->dir = direction; + d->cyclic = cyclic; + + /* + * Iterate over all frames, create a control block + * for each frame and link them together. + */ + for (frame = 0, total_len = 0; frame < frames; d->frames++, frame++) { + cb_entry = &d->cb_list[frame]; + cb_entry->cb = dma_pool_alloc(c->cb_pool, gfp, + &cb_entry->paddr); + if (!cb_entry->cb) + goto error_cb; + + /* fill in the control block */ + control_block = cb_entry->cb; + control_block->info = info; + control_block->src = src; + control_block->dst = dst; + control_block->stride = 0; + control_block->next = 0; + /* set up length in control_block if requested */ + if (buf_len) { + /* calculate length honoring period_length */ + bcm2835_dma_create_cb_set_length( + c, control_block, + len, period_len, &total_len, + cyclic ? finalextrainfo : 0); + + /* calculate new remaining length */ + len -= control_block->length; + } + + /* link this the last controlblock */ + if (frame) + d->cb_list[frame - 1].cb->next = cb_entry->paddr; + + /* update src and dst and length */ + if (src && (info & BCM2835_DMA_S_INC)) + src += control_block->length; + if (dst && (info & BCM2835_DMA_D_INC)) + dst += control_block->length; + + /* Length of total transfer */ + d->size += control_block->length; + } + + /* the last frame requires extra flags */ + d->cb_list[d->frames - 1].cb->info |= finalextrainfo; + + /* detect a size missmatch */ + if (buf_len && (d->size != buf_len)) + goto error_cb; + + return d; +error_cb: + bcm2835_dma_free_cb_chain(d); + + return NULL; +} + +static void bcm2835_dma_fill_cb_chain_with_sg( + struct dma_chan *chan, + enum dma_transfer_direction direction, + struct bcm2835_cb_entry *cb, + struct scatterlist *sgl, + unsigned int sg_len) +{ + struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); + size_t max_len = bcm2835_dma_max_frame_length(c); + unsigned int i, len; + dma_addr_t addr; + struct scatterlist *sgent; + + for_each_sg(sgl, sgent, sg_len, i) { + for (addr = sg_dma_address(sgent), len = sg_dma_len(sgent); + len > 0; + addr += cb->cb->length, len -= cb->cb->length, cb++) { + if (direction == DMA_DEV_TO_MEM) + cb->cb->dst = addr; + else + cb->cb->src = addr; + cb->cb->length = min(len, max_len); + } + } +} + static int bcm2835_dma_abort(void __iomem *chan_base) { unsigned long cs; @@ -218,6 +470,15 @@ static irqreturn_t bcm2835_dma_callback(int irq, void *data) struct bcm2835_desc *d; unsigned long flags; + /* check the shared interrupt */ + if (c->irq_flags & IRQF_SHARED) { + /* check if the interrupt is enabled */ + flags = readl(c->chan_base + BCM2835_DMA_CS); + /* if not set then we are not the reason for the irq */ + if (!(flags & BCM2835_DMA_INT)) + return IRQ_NONE; + } + spin_lock_irqsave(&c->vc.lock, flags); /* Acknowledge interrupt */ @@ -226,12 +487,18 @@ static irqreturn_t bcm2835_dma_callback(int irq, void *data) d = c->desc; if (d) { - /* TODO Only works for cyclic DMA */ - vchan_cyclic_callback(&d->vd); - } + if (d->cyclic) { + /* call the cyclic callback */ + vchan_cyclic_callback(&d->vd); - /* Keep the DMA engine running */ - writel(BCM2835_DMA_ACTIVE, c->chan_base + BCM2835_DMA_CS); + /* Keep the DMA engine running */ + writel(BCM2835_DMA_ACTIVE, + c->chan_base + BCM2835_DMA_CS); + } else { + vchan_cookie_complete(&c->desc->vd); + bcm2835_dma_start_desc(c); + } + } spin_unlock_irqrestore(&c->vc.lock, flags); @@ -252,8 +519,8 @@ static int bcm2835_dma_alloc_chan_resources(struct dma_chan *chan) return -ENOMEM; } - return request_irq(c->irq_number, - bcm2835_dma_callback, 0, "DMA IRQ", c); + return request_irq(c->irq_number, bcm2835_dma_callback, + c->irq_flags, "DMA IRQ", c); } static void bcm2835_dma_free_chan_resources(struct dma_chan *chan) @@ -339,8 +606,6 @@ static void bcm2835_dma_issue_pending(struct dma_chan *chan) struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); unsigned long flags; - c->cyclic = true; /* Nothing else is implemented */ - spin_lock_irqsave(&c->vc.lock, flags); if (vchan_issue_pending(&c->vc) && !c->desc) bcm2835_dma_start_desc(c); @@ -348,122 +613,160 @@ static void bcm2835_dma_issue_pending(struct dma_chan *chan) spin_unlock_irqrestore(&c->vc.lock, flags); } -static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic( - struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, - size_t period_len, enum dma_transfer_direction direction, - unsigned long flags) +struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_memcpy( + struct dma_chan *chan, dma_addr_t dst, dma_addr_t src, + size_t len, unsigned long flags) { struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); - enum dma_slave_buswidth dev_width; struct bcm2835_desc *d; - dma_addr_t dev_addr; - unsigned int es, sync_type; - unsigned int frame; - int i; + u32 info = BCM2835_DMA_D_INC | BCM2835_DMA_S_INC; + u32 extra = BCM2835_DMA_INT_EN | BCM2835_DMA_WAIT_RESP; + size_t max_len = bcm2835_dma_max_frame_length(c); + size_t frames; + + /* if src, dst or len is not given return with an error */ + if (!src || !dst || !len) + return NULL; + + /* calculate number of frames */ + frames = bcm2835_dma_frames_for_length(len, max_len); + + /* allocate the CB chain - this also fills in the pointers */ + d = bcm2835_dma_create_cb_chain(chan, DMA_MEM_TO_MEM, false, + info, extra, frames, + src, dst, len, 0, GFP_KERNEL); + if (!d) + return NULL; + + return vchan_tx_prep(&c->vc, &d->vd, flags); +} + +static struct dma_async_tx_descriptor *bcm2835_dma_prep_slave_sg( + struct dma_chan *chan, + struct scatterlist *sgl, unsigned int sg_len, + enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); + struct bcm2835_desc *d; + dma_addr_t src = 0, dst = 0; + u32 info = BCM2835_DMA_WAIT_RESP; + u32 extra = BCM2835_DMA_INT_EN; + size_t frames; - /* Grab configuration */ if (!is_slave_direction(direction)) { - dev_err(chan->device->dev, "%s: bad direction?\n", __func__); + dev_err(chan->device->dev, + "%s: bad direction?\n", __func__); return NULL; } + if (c->dreq != 0) + info |= BCM2835_DMA_PER_MAP(c->dreq); + if (direction == DMA_DEV_TO_MEM) { - dev_addr = c->cfg.src_addr; - dev_width = c->cfg.src_addr_width; - sync_type = BCM2835_DMA_S_DREQ; + if (c->cfg.src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES) + return NULL; + src = c->cfg.src_addr; + info |= BCM2835_DMA_S_DREQ | BCM2835_DMA_D_INC; } else { - dev_addr = c->cfg.dst_addr; - dev_width = c->cfg.dst_addr_width; - sync_type = BCM2835_DMA_D_DREQ; + if (c->cfg.dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES) + return NULL; + dst = c->cfg.dst_addr; + info |= BCM2835_DMA_D_DREQ | BCM2835_DMA_S_INC; } - /* Bus width translates to the element size (ES) */ - switch (dev_width) { - case DMA_SLAVE_BUSWIDTH_4_BYTES: - es = BCM2835_DMA_DATA_TYPE_S32; - break; - default: - return NULL; - } + /* count frames in sg list */ + frames = bcm2835_dma_count_frames_for_sg(c, sgl, sg_len); - /* Now allocate and setup the descriptor. */ - d = kzalloc(sizeof(*d), GFP_NOWAIT); + /* allocate the CB chain */ + d = bcm2835_dma_create_cb_chain(chan, direction, false, + info, extra, + frames, src, dst, 0, 0, + GFP_KERNEL); if (!d) return NULL; - d->c = c; - d->dir = direction; - d->frames = buf_len / period_len; + /* fill in frames with scatterlist pointers */ + bcm2835_dma_fill_cb_chain_with_sg(chan, direction, d->cb_list, + sgl, sg_len); - d->cb_list = kcalloc(d->frames, sizeof(*d->cb_list), GFP_KERNEL); - if (!d->cb_list) { - kfree(d); + return vchan_tx_prep(&c->vc, &d->vd, flags); +} + +static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic( + struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, + size_t period_len, enum dma_transfer_direction direction, + unsigned long flags) +{ + struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); + struct bcm2835_desc *d; + dma_addr_t src, dst; + u32 info = BCM2835_DMA_WAIT_RESP; + u32 extra = BCM2835_DMA_INT_EN; + size_t max_len = bcm2835_dma_max_frame_length(c); + size_t frames; + + /* Grab configuration */ + if (!is_slave_direction(direction)) { + dev_err(chan->device->dev, "%s: bad direction?\n", __func__); return NULL; } - /* Allocate memory for control blocks */ - for (i = 0; i < d->frames; i++) { - struct bcm2835_cb_entry *cb_entry = &d->cb_list[i]; - cb_entry->cb = dma_pool_zalloc(c->cb_pool, GFP_ATOMIC, - &cb_entry->paddr); - if (!cb_entry->cb) - goto error_cb; + if (!buf_len) { + dev_err(chan->device->dev, + "%s: bad buffer length (= 0)\n", __func__); + return NULL; } /* - * Iterate over all frames, create a control block - * for each frame and link them together. + * warn if buf_len is not a multiple of period_len - this may leed + * to unexpected latencies for interrupts and thus audiable clicks */ - for (frame = 0; frame < d->frames; frame++) { - struct bcm2835_dma_cb *control_block = d->cb_list[frame].cb; - - /* Setup adresses */ - if (d->dir == DMA_DEV_TO_MEM) { - control_block->info = BCM2835_DMA_D_INC; - control_block->src = dev_addr; - control_block->dst = buf_addr + frame * period_len; - } else { - control_block->info = BCM2835_DMA_S_INC; - control_block->src = buf_addr + frame * period_len; - control_block->dst = dev_addr; - } + if (buf_len % period_len) + dev_warn_once(chan->device->dev, + "%s: buffer_length (%zd) is not a multiple of period_len (%zd)\n", + __func__, buf_len, period_len); - /* Enable interrupt */ - control_block->info |= BCM2835_DMA_INT_EN; + /* Setup DREQ channel */ + if (c->dreq != 0) + info |= BCM2835_DMA_PER_MAP(c->dreq); - /* Setup synchronization */ - if (sync_type != 0) - control_block->info |= sync_type; + if (direction == DMA_DEV_TO_MEM) { + if (c->cfg.src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES) + return NULL; + src = c->cfg.src_addr; + dst = buf_addr; + info |= BCM2835_DMA_S_DREQ | BCM2835_DMA_D_INC; + } else { + if (c->cfg.dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES) + return NULL; + dst = c->cfg.dst_addr; + src = buf_addr; + info |= BCM2835_DMA_D_DREQ | BCM2835_DMA_S_INC; + } - /* Setup DREQ channel */ - if (c->dreq != 0) - control_block->info |= - BCM2835_DMA_PER_MAP(c->dreq); + /* calculate number of frames */ + frames = /* number of periods */ + DIV_ROUND_UP(buf_len, period_len) * + /* number of frames per period */ + bcm2835_dma_frames_for_length(period_len, max_len); - /* Length of a frame */ - control_block->length = period_len; - d->size += control_block->length; + /* + * allocate the CB chain + * note that we need to use GFP_NOWAIT, as the ALSA i2s dmaengine + * implementation calls prep_dma_cyclic with interrupts disabled. + */ + d = bcm2835_dma_create_cb_chain(chan, direction, true, + info, extra, + frames, src, dst, buf_len, + period_len, GFP_NOWAIT); + if (!d) + return NULL; - /* - * Next block is the next frame. - * This DMA engine driver currently only supports cyclic DMA. - * Therefore, wrap around at number of frames. - */ - control_block->next = d->cb_list[((frame + 1) % d->frames)].paddr; - } + /* wrap around into a loop */ + d->cb_list[d->frames - 1].cb->next = d->cb_list[0].paddr; return vchan_tx_prep(&c->vc, &d->vd, flags); -error_cb: - i--; - for (; i >= 0; i--) { - struct bcm2835_cb_entry *cb_entry = &d->cb_list[i]; - - dma_pool_free(c->cb_pool, cb_entry->cb, cb_entry->paddr); - } - - kfree(d->cb_list); - kfree(d); - return NULL; } static int bcm2835_dma_slave_config(struct dma_chan *chan, @@ -529,7 +832,8 @@ static int bcm2835_dma_terminate_all(struct dma_chan *chan) return 0; } -static int bcm2835_dma_chan_init(struct bcm2835_dmadev *d, int chan_id, int irq) +static int bcm2835_dma_chan_init(struct bcm2835_dmadev *d, int chan_id, + int irq, unsigned int irq_flags) { struct bcm2835_chan *c; @@ -544,6 +848,12 @@ static int bcm2835_dma_chan_init(struct bcm2835_dmadev *d, int chan_id, int irq) c->chan_base = BCM2835_DMA_CHANIO(d->base, chan_id); c->ch = chan_id; c->irq_number = irq; + c->irq_flags = irq_flags; + + /* check in DEBUG register if this is a LITE channel */ + if (readl(c->chan_base + BCM2835_DMA_DEBUG) & + BCM2835_DMA_DEBUG_LITE) + c->is_lite_channel = true; return 0; } @@ -587,9 +897,11 @@ static int bcm2835_dma_probe(struct platform_device *pdev) struct resource *res; void __iomem *base; int rc; - int i; - int irq; + int i, j; + int irq[BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED + 1]; + int irq_flags; uint32_t chans_available; + char chan_name[BCM2835_DMA_CHAN_NAME_SIZE]; if (!pdev->dev.dma_mask) pdev->dev.dma_mask = &pdev->dev.coherent_dma_mask; @@ -615,16 +927,22 @@ static int bcm2835_dma_probe(struct platform_device *pdev) dma_cap_set(DMA_SLAVE, od->ddev.cap_mask); dma_cap_set(DMA_PRIVATE, od->ddev.cap_mask); dma_cap_set(DMA_CYCLIC, od->ddev.cap_mask); + dma_cap_set(DMA_SLAVE, od->ddev.cap_mask); + dma_cap_set(DMA_MEMCPY, od->ddev.cap_mask); od->ddev.device_alloc_chan_resources = bcm2835_dma_alloc_chan_resources; od->ddev.device_free_chan_resources = bcm2835_dma_free_chan_resources; od->ddev.device_tx_status = bcm2835_dma_tx_status; od->ddev.device_issue_pending = bcm2835_dma_issue_pending; od->ddev.device_prep_dma_cyclic = bcm2835_dma_prep_dma_cyclic; + od->ddev.device_prep_slave_sg = bcm2835_dma_prep_slave_sg; + od->ddev.device_prep_dma_memcpy = bcm2835_dma_prep_dma_memcpy; od->ddev.device_config = bcm2835_dma_slave_config; od->ddev.device_terminate_all = bcm2835_dma_terminate_all; od->ddev.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); od->ddev.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); - od->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + od->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV) | + BIT(DMA_MEM_TO_MEM); + od->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; od->ddev.dev = &pdev->dev; INIT_LIST_HEAD(&od->ddev.channels); spin_lock_init(&od->lock); @@ -640,22 +958,48 @@ static int bcm2835_dma_probe(struct platform_device *pdev) goto err_no_dma; } - /* - * Do not use the FIQ and BULK channels, - * because they are used by the GPU. - */ - chans_available &= ~(BCM2835_DMA_FIQ_MASK | BCM2835_DMA_BULK_MASK); + /* get irqs for each channel that we support */ + for (i = 0; i <= BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED; i++) { + /* skip masked out channels */ + if (!(chans_available & (1 << i))) { + irq[i] = -1; + continue; + } - for (i = 0; i < pdev->num_resources; i++) { - irq = platform_get_irq(pdev, i); - if (irq < 0) - break; + /* get the named irq */ + snprintf(chan_name, sizeof(chan_name), "dma%i", i); + irq[i] = platform_get_irq_byname(pdev, chan_name); + if (irq[i] >= 0) + continue; - if (chans_available & (1 << i)) { - rc = bcm2835_dma_chan_init(od, i, irq); - if (rc) - goto err_no_dma; - } + /* legacy device tree case handling */ + dev_warn_once(&pdev->dev, + "missing interrupt-names property in device tree - legacy interpretation is used\n"); + /* + * in case of channel >= 11 + * use the 11th interrupt and that is shared + */ + irq[i] = platform_get_irq(pdev, i < 11 ? i : 11); + } + + /* get irqs for each channel */ + for (i = 0; i <= BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED; i++) { + /* skip channels without irq */ + if (irq[i] < 0) + continue; + + /* check if there are other channels that also use this irq */ + irq_flags = 0; + for (j = 0; j <= BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED; j++) + if ((i != j) && (irq[j] == irq[i])) { + irq_flags = IRQF_SHARED; + break; + } + + /* initialize the channel */ + rc = bcm2835_dma_chan_init(od, i, irq[i], irq_flags); + if (rc) + goto err_no_dma; } dev_dbg(&pdev->dev, "Initialized %i DMA channels\n", i); diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 0cb259c59916..8c9f45fd55fc 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -289,7 +289,7 @@ enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie) do { status = dma_async_is_tx_complete(chan, cookie, NULL, NULL); if (time_after_eq(jiffies, dma_sync_wait_timeout)) { - pr_err("%s: timeout!\n", __func__); + dev_err(chan->device->dev, "%s: timeout!\n", __func__); return DMA_ERROR; } if (status != DMA_IN_PROGRESS) @@ -482,7 +482,8 @@ int dma_get_slave_caps(struct dma_chan *chan, struct dma_slave_caps *caps) device = chan->device; /* check if the channel supports slave transactions */ - if (!test_bit(DMA_SLAVE, device->cap_mask.bits)) + if (!(test_bit(DMA_SLAVE, device->cap_mask.bits) || + test_bit(DMA_CYCLIC, device->cap_mask.bits))) return -ENXIO; /* @@ -518,7 +519,7 @@ static struct dma_chan *private_candidate(const dma_cap_mask_t *mask, struct dma_chan *chan; if (mask && !__dma_device_satisfies_mask(dev, mask)) { - pr_debug("%s: wrong capabilities\n", __func__); + dev_dbg(dev->dev, "%s: wrong capabilities\n", __func__); return NULL; } /* devices with multiple channels need special handling as we need to @@ -533,12 +534,12 @@ static struct dma_chan *private_candidate(const dma_cap_mask_t *mask, list_for_each_entry(chan, &dev->channels, device_node) { if (chan->client_count) { - pr_debug("%s: %s busy\n", + dev_dbg(dev->dev, "%s: %s busy\n", __func__, dma_chan_name(chan)); continue; } if (fn && !fn(chan, fn_param)) { - pr_debug("%s: %s filter said false\n", + dev_dbg(dev->dev, "%s: %s filter said false\n", __func__, dma_chan_name(chan)); continue; } @@ -567,11 +568,12 @@ static struct dma_chan *find_candidate(struct dma_device *device, if (err) { if (err == -ENODEV) { - pr_debug("%s: %s module removed\n", __func__, - dma_chan_name(chan)); + dev_dbg(device->dev, "%s: %s module removed\n", + __func__, dma_chan_name(chan)); list_del_rcu(&device->global_node); } else - pr_debug("%s: failed to get %s: (%d)\n", + dev_dbg(device->dev, + "%s: failed to get %s: (%d)\n", __func__, dma_chan_name(chan), err); if (--device->privatecnt == 0) @@ -602,7 +604,8 @@ struct dma_chan *dma_get_slave_channel(struct dma_chan *chan) device->privatecnt++; err = dma_chan_get(chan); if (err) { - pr_debug("%s: failed to get %s: (%d)\n", + dev_dbg(chan->device->dev, + "%s: failed to get %s: (%d)\n", __func__, dma_chan_name(chan), err); chan = NULL; if (--device->privatecnt == 0) @@ -814,8 +817,9 @@ void dmaengine_get(void) list_del_rcu(&device->global_node); break; } else if (err) - pr_debug("%s: failed to get %s: (%d)\n", - __func__, dma_chan_name(chan), err); + dev_dbg(chan->device->dev, + "%s: failed to get %s: (%d)\n", + __func__, dma_chan_name(chan), err); } } @@ -862,12 +866,12 @@ static bool device_has_all_tx_types(struct dma_device *device) return false; #endif - #if defined(CONFIG_ASYNC_MEMCPY) || defined(CONFIG_ASYNC_MEMCPY_MODULE) + #if IS_ENABLED(CONFIG_ASYNC_MEMCPY) if (!dma_has_cap(DMA_MEMCPY, device->cap_mask)) return false; #endif - #if defined(CONFIG_ASYNC_XOR) || defined(CONFIG_ASYNC_XOR_MODULE) + #if IS_ENABLED(CONFIG_ASYNC_XOR) if (!dma_has_cap(DMA_XOR, device->cap_mask)) return false; @@ -877,7 +881,7 @@ static bool device_has_all_tx_types(struct dma_device *device) #endif #endif - #if defined(CONFIG_ASYNC_PQ) || defined(CONFIG_ASYNC_PQ_MODULE) + #if IS_ENABLED(CONFIG_ASYNC_PQ) if (!dma_has_cap(DMA_PQ, device->cap_mask)) return false; @@ -1222,8 +1226,9 @@ dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) while (tx->cookie == -EBUSY) { if (time_after_eq(jiffies, dma_sync_wait_timeout)) { - pr_err("%s timeout waiting for descriptor submission\n", - __func__); + dev_err(tx->chan->device->dev, + "%s timeout waiting for descriptor submission\n", + __func__); return DMA_ERROR; } cpu_relax(); diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index 5ad0ec1f0e29..edf053f73a49 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -45,22 +45,19 @@ DW_DMA_MSIZE_16; \ u8 _dmsize = _is_slave ? _sconfig->dst_maxburst : \ DW_DMA_MSIZE_16; \ + u8 _dms = (_dwc->direction == DMA_MEM_TO_DEV) ? \ + _dwc->p_master : _dwc->m_master; \ + u8 _sms = (_dwc->direction == DMA_DEV_TO_MEM) ? \ + _dwc->p_master : _dwc->m_master; \ \ (DWC_CTLL_DST_MSIZE(_dmsize) \ | DWC_CTLL_SRC_MSIZE(_smsize) \ | DWC_CTLL_LLP_D_EN \ | DWC_CTLL_LLP_S_EN \ - | DWC_CTLL_DMS(_dwc->dst_master) \ - | DWC_CTLL_SMS(_dwc->src_master)); \ + | DWC_CTLL_DMS(_dms) \ + | DWC_CTLL_SMS(_sms)); \ }) -/* - * Number of descriptors to allocate for each channel. This should be - * made configurable somehow; preferably, the clients (at least the - * ones using slave transfers) should be able to give us a hint. - */ -#define NR_DESCS_PER_CHANNEL 64 - /* The set of bus widths supported by the DMA controller */ #define DW_DMA_BUSWIDTHS \ BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) | \ @@ -80,76 +77,78 @@ static struct dw_desc *dwc_first_active(struct dw_dma_chan *dwc) return to_dw_desc(dwc->active_list.next); } -static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc) +static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx) { - struct dw_desc *desc, *_desc; - struct dw_desc *ret = NULL; - unsigned int i = 0; - unsigned long flags; + struct dw_desc *desc = txd_to_dw_desc(tx); + struct dw_dma_chan *dwc = to_dw_dma_chan(tx->chan); + dma_cookie_t cookie; + unsigned long flags; spin_lock_irqsave(&dwc->lock, flags); - list_for_each_entry_safe(desc, _desc, &dwc->free_list, desc_node) { - i++; - if (async_tx_test_ack(&desc->txd)) { - list_del(&desc->desc_node); - ret = desc; - break; - } - dev_dbg(chan2dev(&dwc->chan), "desc %p not ACKed\n", desc); - } + cookie = dma_cookie_assign(tx); + + /* + * REVISIT: We should attempt to chain as many descriptors as + * possible, perhaps even appending to those already submitted + * for DMA. But this is hard to do in a race-free manner. + */ + + list_add_tail(&desc->desc_node, &dwc->queue); spin_unlock_irqrestore(&dwc->lock, flags); + dev_vdbg(chan2dev(tx->chan), "%s: queued %u\n", + __func__, desc->txd.cookie); - dev_vdbg(chan2dev(&dwc->chan), "scanned %u descriptors on freelist\n", i); + return cookie; +} - return ret; +static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc) +{ + struct dw_dma *dw = to_dw_dma(dwc->chan.device); + struct dw_desc *desc; + dma_addr_t phys; + + desc = dma_pool_zalloc(dw->desc_pool, GFP_ATOMIC, &phys); + if (!desc) + return NULL; + + dwc->descs_allocated++; + INIT_LIST_HEAD(&desc->tx_list); + dma_async_tx_descriptor_init(&desc->txd, &dwc->chan); + desc->txd.tx_submit = dwc_tx_submit; + desc->txd.flags = DMA_CTRL_ACK; + desc->txd.phys = phys; + return desc; } -/* - * Move a descriptor, including any children, to the free list. - * `desc' must not be on any lists. - */ static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc) { - unsigned long flags; + struct dw_dma *dw = to_dw_dma(dwc->chan.device); + struct dw_desc *child, *_next; - if (desc) { - struct dw_desc *child; + if (unlikely(!desc)) + return; - spin_lock_irqsave(&dwc->lock, flags); - list_for_each_entry(child, &desc->tx_list, desc_node) - dev_vdbg(chan2dev(&dwc->chan), - "moving child desc %p to freelist\n", - child); - list_splice_init(&desc->tx_list, &dwc->free_list); - dev_vdbg(chan2dev(&dwc->chan), "moving desc %p to freelist\n", desc); - list_add(&desc->desc_node, &dwc->free_list); - spin_unlock_irqrestore(&dwc->lock, flags); + list_for_each_entry_safe(child, _next, &desc->tx_list, desc_node) { + list_del(&child->desc_node); + dma_pool_free(dw->desc_pool, child, child->txd.phys); + dwc->descs_allocated--; } + + dma_pool_free(dw->desc_pool, desc, desc->txd.phys); + dwc->descs_allocated--; } static void dwc_initialize(struct dw_dma_chan *dwc) { struct dw_dma *dw = to_dw_dma(dwc->chan.device); - struct dw_dma_slave *dws = dwc->chan.private; u32 cfghi = DWC_CFGH_FIFO_MODE; u32 cfglo = DWC_CFGL_CH_PRIOR(dwc->priority); - if (dwc->initialized == true) + if (test_bit(DW_DMA_IS_INITIALIZED, &dwc->flags)) return; - if (dws) { - /* - * We need controller-specific data to set up slave - * transfers. - */ - BUG_ON(!dws->dma_dev || dws->dma_dev != dw->dma.dev); - - cfghi |= DWC_CFGH_DST_PER(dws->dst_id); - cfghi |= DWC_CFGH_SRC_PER(dws->src_id); - } else { - cfghi |= DWC_CFGH_DST_PER(dwc->dst_id); - cfghi |= DWC_CFGH_SRC_PER(dwc->src_id); - } + cfghi |= DWC_CFGH_DST_PER(dwc->dst_id); + cfghi |= DWC_CFGH_SRC_PER(dwc->src_id); channel_writel(dwc, CFG_LO, cfglo); channel_writel(dwc, CFG_HI, cfghi); @@ -158,26 +157,11 @@ static void dwc_initialize(struct dw_dma_chan *dwc) channel_set_bit(dw, MASK.XFER, dwc->mask); channel_set_bit(dw, MASK.ERROR, dwc->mask); - dwc->initialized = true; + set_bit(DW_DMA_IS_INITIALIZED, &dwc->flags); } /*----------------------------------------------------------------------*/ -static inline unsigned int dwc_fast_ffs(unsigned long long v) -{ - /* - * We can be a lot more clever here, but this should take care - * of the most common optimization. - */ - if (!(v & 7)) - return 3; - else if (!(v & 3)) - return 2; - else if (!(v & 1)) - return 1; - return 0; -} - static inline void dwc_dump_chan_regs(struct dw_dma_chan *dwc) { dev_err(chan2dev(&dwc->chan), @@ -209,12 +193,12 @@ static inline void dwc_do_single_block(struct dw_dma_chan *dwc, * Software emulation of LLP mode relies on interrupts to continue * multi block transfer. */ - ctllo = desc->lli.ctllo | DWC_CTLL_INT_EN; + ctllo = lli_read(desc, ctllo) | DWC_CTLL_INT_EN; - channel_writel(dwc, SAR, desc->lli.sar); - channel_writel(dwc, DAR, desc->lli.dar); + channel_writel(dwc, SAR, lli_read(desc, sar)); + channel_writel(dwc, DAR, lli_read(desc, dar)); channel_writel(dwc, CTL_LO, ctllo); - channel_writel(dwc, CTL_HI, desc->lli.ctlhi); + channel_writel(dwc, CTL_HI, lli_read(desc, ctlhi)); channel_set_bit(dw, CH_EN, dwc->mask); /* Move pointer to next descriptor */ @@ -225,6 +209,7 @@ static inline void dwc_do_single_block(struct dw_dma_chan *dwc, static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first) { struct dw_dma *dw = to_dw_dma(dwc->chan.device); + u8 lms = DWC_LLP_LMS(dwc->m_master); unsigned long was_soft_llp; /* ASSERT: channel is idle */ @@ -249,7 +234,7 @@ static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first) dwc_initialize(dwc); - dwc->residue = first->total_len; + first->residue = first->total_len; dwc->tx_node_active = &first->tx_list; /* Submit first block */ @@ -260,9 +245,8 @@ static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first) dwc_initialize(dwc); - channel_writel(dwc, LLP, first->txd.phys); - channel_writel(dwc, CTL_LO, - DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN); + channel_writel(dwc, LLP, first->txd.phys | lms); + channel_writel(dwc, CTL_LO, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN); channel_writel(dwc, CTL_HI, 0); channel_set_bit(dw, CH_EN, dwc->mask); } @@ -305,11 +289,7 @@ dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc, list_for_each_entry(child, &desc->tx_list, desc_node) async_tx_ack(&child->txd); async_tx_ack(&desc->txd); - - list_splice_init(&desc->tx_list, &dwc->free_list); - list_move(&desc->desc_node, &dwc->free_list); - - dma_descriptor_unmap(txd); + dwc_desc_put(dwc, desc); spin_unlock_irqrestore(&dwc->lock, flags); if (callback) @@ -380,11 +360,11 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) head = &desc->tx_list; if (active != head) { - /* Update desc to reflect last sent one */ - if (active != head->next) - desc = to_dw_desc(active->prev); - - dwc->residue -= desc->len; + /* Update residue to reflect last sent descriptor */ + if (active == head->next) + desc->residue -= desc->len; + else + desc->residue -= to_dw_desc(active->prev)->len; child = to_dw_desc(active); @@ -399,8 +379,6 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) clear_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags); } - dwc->residue = 0; - spin_unlock_irqrestore(&dwc->lock, flags); dwc_complete_all(dw, dwc); @@ -408,7 +386,6 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) } if (list_empty(&dwc->active_list)) { - dwc->residue = 0; spin_unlock_irqrestore(&dwc->lock, flags); return; } @@ -423,31 +400,31 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) list_for_each_entry_safe(desc, _desc, &dwc->active_list, desc_node) { /* Initial residue value */ - dwc->residue = desc->total_len; + desc->residue = desc->total_len; /* Check first descriptors addr */ - if (desc->txd.phys == llp) { + if (desc->txd.phys == DWC_LLP_LOC(llp)) { spin_unlock_irqrestore(&dwc->lock, flags); return; } /* Check first descriptors llp */ - if (desc->lli.llp == llp) { + if (lli_read(desc, llp) == llp) { /* This one is currently in progress */ - dwc->residue -= dwc_get_sent(dwc); + desc->residue -= dwc_get_sent(dwc); spin_unlock_irqrestore(&dwc->lock, flags); return; } - dwc->residue -= desc->len; + desc->residue -= desc->len; list_for_each_entry(child, &desc->tx_list, desc_node) { - if (child->lli.llp == llp) { + if (lli_read(child, llp) == llp) { /* Currently in progress */ - dwc->residue -= dwc_get_sent(dwc); + desc->residue -= dwc_get_sent(dwc); spin_unlock_irqrestore(&dwc->lock, flags); return; } - dwc->residue -= child->len; + desc->residue -= child->len; } /* @@ -469,10 +446,14 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) spin_unlock_irqrestore(&dwc->lock, flags); } -static inline void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_lli *lli) +static inline void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_desc *desc) { dev_crit(chan2dev(&dwc->chan), " desc: s0x%x d0x%x l0x%x c0x%x:%x\n", - lli->sar, lli->dar, lli->llp, lli->ctlhi, lli->ctllo); + lli_read(desc, sar), + lli_read(desc, dar), + lli_read(desc, llp), + lli_read(desc, ctlhi), + lli_read(desc, ctllo)); } static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc) @@ -508,9 +489,9 @@ static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc) */ dev_WARN(chan2dev(&dwc->chan), "Bad descriptor submitted for DMA!\n" " cookie: %d\n", bad_desc->txd.cookie); - dwc_dump_lli(dwc, &bad_desc->lli); + dwc_dump_lli(dwc, bad_desc); list_for_each_entry(child, &bad_desc->tx_list, desc_node) - dwc_dump_lli(dwc, &child->lli); + dwc_dump_lli(dwc, child); spin_unlock_irqrestore(&dwc->lock, flags); @@ -561,7 +542,7 @@ static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc, */ if (unlikely(status_err & dwc->mask) || unlikely(status_xfer & dwc->mask)) { - int i; + unsigned int i; dev_err(chan2dev(&dwc->chan), "cyclic DMA unexpected %s interrupt, stopping DMA transfer\n", @@ -583,7 +564,7 @@ static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc, dma_writel(dw, CLEAR.XFER, dwc->mask); for (i = 0; i < dwc->cdesc->periods; i++) - dwc_dump_lli(dwc, &dwc->cdesc->desc[i]->lli); + dwc_dump_lli(dwc, dwc->cdesc->desc[i]); spin_unlock_irqrestore(&dwc->lock, flags); } @@ -601,7 +582,7 @@ static void dw_dma_tasklet(unsigned long data) u32 status_block; u32 status_xfer; u32 status_err; - int i; + unsigned int i; status_block = dma_readl(dw, RAW.BLOCK); status_xfer = dma_readl(dw, RAW.XFER); @@ -670,30 +651,6 @@ static irqreturn_t dw_dma_interrupt(int irq, void *dev_id) /*----------------------------------------------------------------------*/ -static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx) -{ - struct dw_desc *desc = txd_to_dw_desc(tx); - struct dw_dma_chan *dwc = to_dw_dma_chan(tx->chan); - dma_cookie_t cookie; - unsigned long flags; - - spin_lock_irqsave(&dwc->lock, flags); - cookie = dma_cookie_assign(tx); - - /* - * REVISIT: We should attempt to chain as many descriptors as - * possible, perhaps even appending to those already submitted - * for DMA. But this is hard to do in a race-free manner. - */ - - dev_vdbg(chan2dev(tx->chan), "%s: queued %u\n", __func__, desc->txd.cookie); - list_add_tail(&desc->desc_node, &dwc->queue); - - spin_unlock_irqrestore(&dwc->lock, flags); - - return cookie; -} - static struct dma_async_tx_descriptor * dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, size_t len, unsigned long flags) @@ -705,10 +662,12 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, struct dw_desc *prev; size_t xfer_count; size_t offset; + u8 m_master = dwc->m_master; unsigned int src_width; unsigned int dst_width; - unsigned int data_width; + unsigned int data_width = dw->pdata->data_width[m_master]; u32 ctllo; + u8 lms = DWC_LLP_LMS(m_master); dev_vdbg(chan2dev(chan), "%s: d%pad s%pad l0x%zx f0x%lx\n", __func__, @@ -721,11 +680,7 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, dwc->direction = DMA_MEM_TO_MEM; - data_width = min_t(unsigned int, dw->data_width[dwc->src_master], - dw->data_width[dwc->dst_master]); - - src_width = dst_width = min_t(unsigned int, data_width, - dwc_fast_ffs(src | dest | len)); + src_width = dst_width = __ffs(data_width | src | dest | len); ctllo = DWC_DEFAULT_CTLLO(chan) | DWC_CTLL_DST_WIDTH(dst_width) @@ -743,27 +698,27 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, if (!desc) goto err_desc_get; - desc->lli.sar = src + offset; - desc->lli.dar = dest + offset; - desc->lli.ctllo = ctllo; - desc->lli.ctlhi = xfer_count; + lli_write(desc, sar, src + offset); + lli_write(desc, dar, dest + offset); + lli_write(desc, ctllo, ctllo); + lli_write(desc, ctlhi, xfer_count); desc->len = xfer_count << src_width; if (!first) { first = desc; } else { - prev->lli.llp = desc->txd.phys; - list_add_tail(&desc->desc_node, - &first->tx_list); + lli_write(prev, llp, desc->txd.phys | lms); + list_add_tail(&desc->desc_node, &first->tx_list); } prev = desc; } if (flags & DMA_PREP_INTERRUPT) /* Trigger interrupt after last block */ - prev->lli.ctllo |= DWC_CTLL_INT_EN; + lli_set(prev, ctllo, DWC_CTLL_INT_EN); prev->lli.llp = 0; + lli_clear(prev, ctllo, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN); first->txd.flags = flags; first->total_len = len; @@ -785,10 +740,12 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, struct dw_desc *prev; struct dw_desc *first; u32 ctllo; + u8 m_master = dwc->m_master; + u8 lms = DWC_LLP_LMS(m_master); dma_addr_t reg; unsigned int reg_width; unsigned int mem_width; - unsigned int data_width; + unsigned int data_width = dw->pdata->data_width[m_master]; unsigned int i; struct scatterlist *sg; size_t total_len = 0; @@ -814,8 +771,6 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_M2P) : DWC_CTLL_FC(DW_DMA_FC_D_M2P); - data_width = dw->data_width[dwc->src_master]; - for_each_sg(sgl, sg, sg_len, i) { struct dw_desc *desc; u32 len, dlen, mem; @@ -823,17 +778,16 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, mem = sg_dma_address(sg); len = sg_dma_len(sg); - mem_width = min_t(unsigned int, - data_width, dwc_fast_ffs(mem | len)); + mem_width = __ffs(data_width | mem | len); slave_sg_todev_fill_desc: desc = dwc_desc_get(dwc); if (!desc) goto err_desc_get; - desc->lli.sar = mem; - desc->lli.dar = reg; - desc->lli.ctllo = ctllo | DWC_CTLL_SRC_WIDTH(mem_width); + lli_write(desc, sar, mem); + lli_write(desc, dar, reg); + lli_write(desc, ctllo, ctllo | DWC_CTLL_SRC_WIDTH(mem_width)); if ((len >> mem_width) > dwc->block_size) { dlen = dwc->block_size << mem_width; mem += dlen; @@ -843,15 +797,14 @@ slave_sg_todev_fill_desc: len = 0; } - desc->lli.ctlhi = dlen >> mem_width; + lli_write(desc, ctlhi, dlen >> mem_width); desc->len = dlen; if (!first) { first = desc; } else { - prev->lli.llp = desc->txd.phys; - list_add_tail(&desc->desc_node, - &first->tx_list); + lli_write(prev, llp, desc->txd.phys | lms); + list_add_tail(&desc->desc_node, &first->tx_list); } prev = desc; total_len += dlen; @@ -871,8 +824,6 @@ slave_sg_todev_fill_desc: ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_P2M) : DWC_CTLL_FC(DW_DMA_FC_D_P2M); - data_width = dw->data_width[dwc->dst_master]; - for_each_sg(sgl, sg, sg_len, i) { struct dw_desc *desc; u32 len, dlen, mem; @@ -880,17 +831,16 @@ slave_sg_todev_fill_desc: mem = sg_dma_address(sg); len = sg_dma_len(sg); - mem_width = min_t(unsigned int, - data_width, dwc_fast_ffs(mem | len)); + mem_width = __ffs(data_width | mem | len); slave_sg_fromdev_fill_desc: desc = dwc_desc_get(dwc); if (!desc) goto err_desc_get; - desc->lli.sar = reg; - desc->lli.dar = mem; - desc->lli.ctllo = ctllo | DWC_CTLL_DST_WIDTH(mem_width); + lli_write(desc, sar, reg); + lli_write(desc, dar, mem); + lli_write(desc, ctllo, ctllo | DWC_CTLL_DST_WIDTH(mem_width)); if ((len >> reg_width) > dwc->block_size) { dlen = dwc->block_size << reg_width; mem += dlen; @@ -899,15 +849,14 @@ slave_sg_fromdev_fill_desc: dlen = len; len = 0; } - desc->lli.ctlhi = dlen >> reg_width; + lli_write(desc, ctlhi, dlen >> reg_width); desc->len = dlen; if (!first) { first = desc; } else { - prev->lli.llp = desc->txd.phys; - list_add_tail(&desc->desc_node, - &first->tx_list); + lli_write(prev, llp, desc->txd.phys | lms); + list_add_tail(&desc->desc_node, &first->tx_list); } prev = desc; total_len += dlen; @@ -922,9 +871,10 @@ slave_sg_fromdev_fill_desc: if (flags & DMA_PREP_INTERRUPT) /* Trigger interrupt after last block */ - prev->lli.ctllo |= DWC_CTLL_INT_EN; + lli_set(prev, ctllo, DWC_CTLL_INT_EN); prev->lli.llp = 0; + lli_clear(prev, ctllo, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN); first->total_len = total_len; return &first->txd; @@ -941,7 +891,7 @@ bool dw_dma_filter(struct dma_chan *chan, void *param) struct dw_dma_chan *dwc = to_dw_dma_chan(chan); struct dw_dma_slave *dws = param; - if (!dws || dws->dma_dev != chan->device->dev) + if (dws->dma_dev != chan->device->dev) return false; /* We have to copy data since dws can be temporary storage */ @@ -949,8 +899,8 @@ bool dw_dma_filter(struct dma_chan *chan, void *param) dwc->src_id = dws->src_id; dwc->dst_id = dws->dst_id; - dwc->src_master = dws->src_master; - dwc->dst_master = dws->dst_master; + dwc->m_master = dws->m_master; + dwc->p_master = dws->p_master; return true; } @@ -1003,7 +953,7 @@ static int dwc_pause(struct dma_chan *chan) while (!(channel_readl(dwc, CFG_LO) & DWC_CFGL_FIFO_EMPTY) && count--) udelay(2); - dwc->paused = true; + set_bit(DW_DMA_IS_PAUSED, &dwc->flags); spin_unlock_irqrestore(&dwc->lock, flags); @@ -1016,7 +966,7 @@ static inline void dwc_chan_resume(struct dw_dma_chan *dwc) channel_writel(dwc, CFG_LO, cfglo & ~DWC_CFGL_CH_SUSP); - dwc->paused = false; + clear_bit(DW_DMA_IS_PAUSED, &dwc->flags); } static int dwc_resume(struct dma_chan *chan) @@ -1024,12 +974,10 @@ static int dwc_resume(struct dma_chan *chan) struct dw_dma_chan *dwc = to_dw_dma_chan(chan); unsigned long flags; - if (!dwc->paused) - return 0; - spin_lock_irqsave(&dwc->lock, flags); - dwc_chan_resume(dwc); + if (test_bit(DW_DMA_IS_PAUSED, &dwc->flags)) + dwc_chan_resume(dwc); spin_unlock_irqrestore(&dwc->lock, flags); @@ -1065,16 +1013,37 @@ static int dwc_terminate_all(struct dma_chan *chan) return 0; } -static inline u32 dwc_get_residue(struct dw_dma_chan *dwc) +static struct dw_desc *dwc_find_desc(struct dw_dma_chan *dwc, dma_cookie_t c) +{ + struct dw_desc *desc; + + list_for_each_entry(desc, &dwc->active_list, desc_node) + if (desc->txd.cookie == c) + return desc; + + return NULL; +} + +static u32 dwc_get_residue(struct dw_dma_chan *dwc, dma_cookie_t cookie) { + struct dw_desc *desc; unsigned long flags; u32 residue; spin_lock_irqsave(&dwc->lock, flags); - residue = dwc->residue; - if (test_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags) && residue) - residue -= dwc_get_sent(dwc); + desc = dwc_find_desc(dwc, cookie); + if (desc) { + if (desc == dwc_first_active(dwc)) { + residue = desc->residue; + if (test_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags) && residue) + residue -= dwc_get_sent(dwc); + } else { + residue = desc->total_len; + } + } else { + residue = 0; + } spin_unlock_irqrestore(&dwc->lock, flags); return residue; @@ -1095,10 +1064,12 @@ dwc_tx_status(struct dma_chan *chan, dwc_scan_descriptors(to_dw_dma(chan->device), dwc); ret = dma_cookie_status(chan, cookie, txstate); - if (ret != DMA_COMPLETE) - dma_set_residue(txstate, dwc_get_residue(dwc)); + if (ret == DMA_COMPLETE) + return ret; - if (dwc->paused && ret == DMA_IN_PROGRESS) + dma_set_residue(txstate, dwc_get_residue(dwc, cookie)); + + if (test_bit(DW_DMA_IS_PAUSED, &dwc->flags) && ret == DMA_IN_PROGRESS) return DMA_PAUSED; return ret; @@ -1119,7 +1090,7 @@ static void dwc_issue_pending(struct dma_chan *chan) static void dw_dma_off(struct dw_dma *dw) { - int i; + unsigned int i; dma_writel(dw, CFG, 0); @@ -1133,7 +1104,7 @@ static void dw_dma_off(struct dw_dma *dw) cpu_relax(); for (i = 0; i < dw->dma.chancnt; i++) - dw->chan[i].initialized = false; + clear_bit(DW_DMA_IS_INITIALIZED, &dw->chan[i].flags); } static void dw_dma_on(struct dw_dma *dw) @@ -1145,9 +1116,6 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan) { struct dw_dma_chan *dwc = to_dw_dma_chan(chan); struct dw_dma *dw = to_dw_dma(chan->device); - struct dw_desc *desc; - int i; - unsigned long flags; dev_vdbg(chan2dev(chan), "%s\n", __func__); @@ -1165,53 +1133,26 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan) * doesn't mean what you think it means), and status writeback. */ + /* + * We need controller-specific data to set up slave transfers. + */ + if (chan->private && !dw_dma_filter(chan, chan->private)) { + dev_warn(chan2dev(chan), "Wrong controller-specific data\n"); + return -EINVAL; + } + /* Enable controller here if needed */ if (!dw->in_use) dw_dma_on(dw); dw->in_use |= dwc->mask; - spin_lock_irqsave(&dwc->lock, flags); - i = dwc->descs_allocated; - while (dwc->descs_allocated < NR_DESCS_PER_CHANNEL) { - dma_addr_t phys; - - spin_unlock_irqrestore(&dwc->lock, flags); - - desc = dma_pool_alloc(dw->desc_pool, GFP_ATOMIC, &phys); - if (!desc) - goto err_desc_alloc; - - memset(desc, 0, sizeof(struct dw_desc)); - - INIT_LIST_HEAD(&desc->tx_list); - dma_async_tx_descriptor_init(&desc->txd, chan); - desc->txd.tx_submit = dwc_tx_submit; - desc->txd.flags = DMA_CTRL_ACK; - desc->txd.phys = phys; - - dwc_desc_put(dwc, desc); - - spin_lock_irqsave(&dwc->lock, flags); - i = ++dwc->descs_allocated; - } - - spin_unlock_irqrestore(&dwc->lock, flags); - - dev_dbg(chan2dev(chan), "%s: allocated %d descriptors\n", __func__, i); - - return i; - -err_desc_alloc: - dev_info(chan2dev(chan), "only allocated %d descriptors\n", i); - - return i; + return 0; } static void dwc_free_chan_resources(struct dma_chan *chan) { struct dw_dma_chan *dwc = to_dw_dma_chan(chan); struct dw_dma *dw = to_dw_dma(chan->device); - struct dw_desc *desc, *_desc; unsigned long flags; LIST_HEAD(list); @@ -1224,9 +1165,15 @@ static void dwc_free_chan_resources(struct dma_chan *chan) BUG_ON(dma_readl(to_dw_dma(chan->device), CH_EN) & dwc->mask); spin_lock_irqsave(&dwc->lock, flags); - list_splice_init(&dwc->free_list, &list); - dwc->descs_allocated = 0; - dwc->initialized = false; + + /* Clear custom channel configuration */ + dwc->src_id = 0; + dwc->dst_id = 0; + + dwc->m_master = 0; + dwc->p_master = 0; + + clear_bit(DW_DMA_IS_INITIALIZED, &dwc->flags); /* Disable interrupts */ channel_clear_bit(dw, MASK.XFER, dwc->mask); @@ -1240,11 +1187,6 @@ static void dwc_free_chan_resources(struct dma_chan *chan) if (!dw->in_use) dw_dma_off(dw); - list_for_each_entry_safe(desc, _desc, &list, desc_node) { - dev_vdbg(chan2dev(chan), " freeing descriptor %p\n", desc); - dma_pool_free(dw->desc_pool, desc, desc->txd.phys); - } - dev_vdbg(chan2dev(chan), "%s: done\n", __func__); } @@ -1322,6 +1264,7 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan, struct dw_cyclic_desc *retval = NULL; struct dw_desc *desc; struct dw_desc *last = NULL; + u8 lms = DWC_LLP_LMS(dwc->m_master); unsigned long was_cyclic; unsigned int reg_width; unsigned int periods; @@ -1375,9 +1318,6 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan, retval = ERR_PTR(-ENOMEM); - if (periods > NR_DESCS_PER_CHANNEL) - goto out_err; - cdesc = kzalloc(sizeof(struct dw_cyclic_desc), GFP_KERNEL); if (!cdesc) goto out_err; @@ -1393,50 +1333,50 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan, switch (direction) { case DMA_MEM_TO_DEV: - desc->lli.dar = sconfig->dst_addr; - desc->lli.sar = buf_addr + (period_len * i); - desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan) - | DWC_CTLL_DST_WIDTH(reg_width) - | DWC_CTLL_SRC_WIDTH(reg_width) - | DWC_CTLL_DST_FIX - | DWC_CTLL_SRC_INC - | DWC_CTLL_INT_EN); - - desc->lli.ctllo |= sconfig->device_fc ? - DWC_CTLL_FC(DW_DMA_FC_P_M2P) : - DWC_CTLL_FC(DW_DMA_FC_D_M2P); + lli_write(desc, dar, sconfig->dst_addr); + lli_write(desc, sar, buf_addr + period_len * i); + lli_write(desc, ctllo, (DWC_DEFAULT_CTLLO(chan) + | DWC_CTLL_DST_WIDTH(reg_width) + | DWC_CTLL_SRC_WIDTH(reg_width) + | DWC_CTLL_DST_FIX + | DWC_CTLL_SRC_INC + | DWC_CTLL_INT_EN)); + + lli_set(desc, ctllo, sconfig->device_fc ? + DWC_CTLL_FC(DW_DMA_FC_P_M2P) : + DWC_CTLL_FC(DW_DMA_FC_D_M2P)); break; case DMA_DEV_TO_MEM: - desc->lli.dar = buf_addr + (period_len * i); - desc->lli.sar = sconfig->src_addr; - desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan) - | DWC_CTLL_SRC_WIDTH(reg_width) - | DWC_CTLL_DST_WIDTH(reg_width) - | DWC_CTLL_DST_INC - | DWC_CTLL_SRC_FIX - | DWC_CTLL_INT_EN); - - desc->lli.ctllo |= sconfig->device_fc ? - DWC_CTLL_FC(DW_DMA_FC_P_P2M) : - DWC_CTLL_FC(DW_DMA_FC_D_P2M); + lli_write(desc, dar, buf_addr + period_len * i); + lli_write(desc, sar, sconfig->src_addr); + lli_write(desc, ctllo, (DWC_DEFAULT_CTLLO(chan) + | DWC_CTLL_SRC_WIDTH(reg_width) + | DWC_CTLL_DST_WIDTH(reg_width) + | DWC_CTLL_DST_INC + | DWC_CTLL_SRC_FIX + | DWC_CTLL_INT_EN)); + + lli_set(desc, ctllo, sconfig->device_fc ? + DWC_CTLL_FC(DW_DMA_FC_P_P2M) : + DWC_CTLL_FC(DW_DMA_FC_D_P2M)); break; default: break; } - desc->lli.ctlhi = (period_len >> reg_width); + lli_write(desc, ctlhi, period_len >> reg_width); cdesc->desc[i] = desc; if (last) - last->lli.llp = desc->txd.phys; + lli_write(last, llp, desc->txd.phys | lms); last = desc; } /* Let's make a cyclic list */ - last->lli.llp = cdesc->desc[0]->txd.phys; + lli_write(last, llp, cdesc->desc[0]->txd.phys | lms); dev_dbg(chan2dev(&dwc->chan), "cyclic prepared buf %pad len %zu period %zu periods %d\n", @@ -1467,7 +1407,7 @@ void dw_dma_cyclic_free(struct dma_chan *chan) struct dw_dma_chan *dwc = to_dw_dma_chan(chan); struct dw_dma *dw = to_dw_dma(dwc->chan.device); struct dw_cyclic_desc *cdesc = dwc->cdesc; - int i; + unsigned int i; unsigned long flags; dev_dbg(chan2dev(&dwc->chan), "%s\n", __func__); @@ -1491,32 +1431,38 @@ void dw_dma_cyclic_free(struct dma_chan *chan) kfree(cdesc->desc); kfree(cdesc); + dwc->cdesc = NULL; + clear_bit(DW_DMA_IS_CYCLIC, &dwc->flags); } EXPORT_SYMBOL(dw_dma_cyclic_free); /*----------------------------------------------------------------------*/ -int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) +int dw_dma_probe(struct dw_dma_chip *chip) { + struct dw_dma_platform_data *pdata; struct dw_dma *dw; bool autocfg = false; unsigned int dw_params; - unsigned int max_blk_size = 0; + unsigned int i; int err; - int i; dw = devm_kzalloc(chip->dev, sizeof(*dw), GFP_KERNEL); if (!dw) return -ENOMEM; + dw->pdata = devm_kzalloc(chip->dev, sizeof(*dw->pdata), GFP_KERNEL); + if (!dw->pdata) + return -ENOMEM; + dw->regs = chip->regs; chip->dw = dw; pm_runtime_get_sync(chip->dev); - if (!pdata) { - dw_params = dma_read_byaddr(chip->regs, DW_PARAMS); + if (!chip->pdata) { + dw_params = dma_readl(dw, DW_PARAMS); dev_dbg(chip->dev, "DW_PARAMS: 0x%08x\n", dw_params); autocfg = dw_params >> DW_PARAMS_EN & 1; @@ -1525,29 +1471,31 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) goto err_pdata; } - pdata = devm_kzalloc(chip->dev, sizeof(*pdata), GFP_KERNEL); - if (!pdata) { - err = -ENOMEM; - goto err_pdata; - } + /* Reassign the platform data pointer */ + pdata = dw->pdata; /* Get hardware configuration parameters */ pdata->nr_channels = (dw_params >> DW_PARAMS_NR_CHAN & 7) + 1; pdata->nr_masters = (dw_params >> DW_PARAMS_NR_MASTER & 3) + 1; for (i = 0; i < pdata->nr_masters; i++) { pdata->data_width[i] = - (dw_params >> DW_PARAMS_DATA_WIDTH(i) & 3) + 2; + 4 << (dw_params >> DW_PARAMS_DATA_WIDTH(i) & 3); } - max_blk_size = dma_readl(dw, MAX_BLK_SIZE); + pdata->block_size = dma_readl(dw, MAX_BLK_SIZE); /* Fill platform data with the default values */ pdata->is_private = true; pdata->is_memcpy = true; pdata->chan_allocation_order = CHAN_ALLOCATION_ASCENDING; pdata->chan_priority = CHAN_PRIORITY_ASCENDING; - } else if (pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS) { + } else if (chip->pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS) { err = -EINVAL; goto err_pdata; + } else { + memcpy(dw->pdata, chip->pdata, sizeof(*dw->pdata)); + + /* Reassign the platform data pointer */ + pdata = dw->pdata; } dw->chan = devm_kcalloc(chip->dev, pdata->nr_channels, sizeof(*dw->chan), @@ -1557,11 +1505,6 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) goto err_pdata; } - /* Get hardware configuration parameters */ - dw->nr_masters = pdata->nr_masters; - for (i = 0; i < dw->nr_masters; i++) - dw->data_width[i] = pdata->data_width[i]; - /* Calculate all channel mask before DMA setup */ dw->all_chan_mask = (1 << pdata->nr_channels) - 1; @@ -1608,7 +1551,6 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) INIT_LIST_HEAD(&dwc->active_list); INIT_LIST_HEAD(&dwc->queue); - INIT_LIST_HEAD(&dwc->free_list); channel_clear_bit(dw, CH_EN, dwc->mask); @@ -1616,11 +1558,9 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) /* Hardware configuration */ if (autocfg) { - unsigned int dwc_params; unsigned int r = DW_DMA_MAX_NR_CHANNELS - i - 1; - void __iomem *addr = chip->regs + r * sizeof(u32); - - dwc_params = dma_read_byaddr(addr, DWC_PARAMS); + void __iomem *addr = &__dw_regs(dw)->DWC_PARAMS[r]; + unsigned int dwc_params = dma_readl_native(addr); dev_dbg(chip->dev, "DWC_PARAMS[%d]: 0x%08x\n", i, dwc_params); @@ -1631,16 +1571,15 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) * up to 0x0a for 4095. */ dwc->block_size = - (4 << ((max_blk_size >> 4 * i) & 0xf)) - 1; + (4 << ((pdata->block_size >> 4 * i) & 0xf)) - 1; dwc->nollp = (dwc_params >> DWC_PARAMS_MBLK_EN & 0x1) == 0; } else { dwc->block_size = pdata->block_size; /* Check if channel supports multi block transfer */ - channel_writel(dwc, LLP, 0xfffffffc); - dwc->nollp = - (channel_readl(dwc, LLP) & 0xfffffffc) == 0; + channel_writel(dwc, LLP, DWC_LLP_LOC(0xffffffff)); + dwc->nollp = DWC_LLP_LOC(channel_readl(dwc, LLP)) == 0; channel_writel(dwc, LLP, 0); } } diff --git a/drivers/dma/dw/pci.c b/drivers/dma/dw/pci.c index 358f9689a3f5..0ae6c3b1d34e 100644 --- a/drivers/dma/dw/pci.c +++ b/drivers/dma/dw/pci.c @@ -17,8 +17,8 @@ static int dw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pid) { + const struct dw_dma_platform_data *pdata = (void *)pid->driver_data; struct dw_dma_chip *chip; - struct dw_dma_platform_data *pdata = (void *)pid->driver_data; int ret; ret = pcim_enable_device(pdev); @@ -49,8 +49,9 @@ static int dw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pid) chip->dev = &pdev->dev; chip->regs = pcim_iomap_table(pdev)[0]; chip->irq = pdev->irq; + chip->pdata = pdata; - ret = dw_dma_probe(chip, pdata); + ret = dw_dma_probe(chip); if (ret) return ret; diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c index 26edbe3a27ac..5bda0eb9f393 100644 --- a/drivers/dma/dw/platform.c +++ b/drivers/dma/dw/platform.c @@ -42,13 +42,13 @@ static struct dma_chan *dw_dma_of_xlate(struct of_phandle_args *dma_spec, slave.src_id = dma_spec->args[0]; slave.dst_id = dma_spec->args[0]; - slave.src_master = dma_spec->args[1]; - slave.dst_master = dma_spec->args[2]; + slave.m_master = dma_spec->args[1]; + slave.p_master = dma_spec->args[2]; if (WARN_ON(slave.src_id >= DW_DMA_MAX_NR_REQUESTS || slave.dst_id >= DW_DMA_MAX_NR_REQUESTS || - slave.src_master >= dw->nr_masters || - slave.dst_master >= dw->nr_masters)) + slave.m_master >= dw->pdata->nr_masters || + slave.p_master >= dw->pdata->nr_masters)) return NULL; dma_cap_zero(cap); @@ -66,8 +66,8 @@ static bool dw_dma_acpi_filter(struct dma_chan *chan, void *param) .dma_dev = dma_spec->dev, .src_id = dma_spec->slave_id, .dst_id = dma_spec->slave_id, - .src_master = 1, - .dst_master = 0, + .m_master = 0, + .p_master = 1, }; return dw_dma_filter(chan, &slave); @@ -103,6 +103,7 @@ dw_dma_parse_dt(struct platform_device *pdev) struct device_node *np = pdev->dev.of_node; struct dw_dma_platform_data *pdata; u32 tmp, arr[DW_DMA_MAX_NR_MASTERS]; + u32 nr_masters; u32 nr_channels; if (!np) { @@ -110,6 +111,11 @@ dw_dma_parse_dt(struct platform_device *pdev) return NULL; } + if (of_property_read_u32(np, "dma-masters", &nr_masters)) + return NULL; + if (nr_masters < 1 || nr_masters > DW_DMA_MAX_NR_MASTERS) + return NULL; + if (of_property_read_u32(np, "dma-channels", &nr_channels)) return NULL; @@ -117,6 +123,7 @@ dw_dma_parse_dt(struct platform_device *pdev) if (!pdata) return NULL; + pdata->nr_masters = nr_masters; pdata->nr_channels = nr_channels; if (of_property_read_bool(np, "is_private")) @@ -131,17 +138,13 @@ dw_dma_parse_dt(struct platform_device *pdev) if (!of_property_read_u32(np, "block_size", &tmp)) pdata->block_size = tmp; - if (!of_property_read_u32(np, "dma-masters", &tmp)) { - if (tmp > DW_DMA_MAX_NR_MASTERS) - return NULL; - - pdata->nr_masters = tmp; - } - - if (!of_property_read_u32_array(np, "data_width", arr, - pdata->nr_masters)) - for (tmp = 0; tmp < pdata->nr_masters; tmp++) + if (!of_property_read_u32_array(np, "data-width", arr, nr_masters)) { + for (tmp = 0; tmp < nr_masters; tmp++) pdata->data_width[tmp] = arr[tmp]; + } else if (!of_property_read_u32_array(np, "data_width", arr, nr_masters)) { + for (tmp = 0; tmp < nr_masters; tmp++) + pdata->data_width[tmp] = BIT(arr[tmp] & 0x07); + } return pdata; } @@ -158,7 +161,7 @@ static int dw_probe(struct platform_device *pdev) struct dw_dma_chip *chip; struct device *dev = &pdev->dev; struct resource *mem; - struct dw_dma_platform_data *pdata; + const struct dw_dma_platform_data *pdata; int err; chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL); @@ -183,6 +186,7 @@ static int dw_probe(struct platform_device *pdev) pdata = dw_dma_parse_dt(pdev); chip->dev = dev; + chip->pdata = pdata; chip->clk = devm_clk_get(chip->dev, "hclk"); if (IS_ERR(chip->clk)) @@ -193,7 +197,7 @@ static int dw_probe(struct platform_device *pdev) pm_runtime_enable(&pdev->dev); - err = dw_dma_probe(chip, pdata); + err = dw_dma_probe(chip); if (err) goto err_dw_dma_probe; diff --git a/drivers/dma/dw/regs.h b/drivers/dma/dw/regs.h index 0a50c18d85b8..4b7bd7834046 100644 --- a/drivers/dma/dw/regs.h +++ b/drivers/dma/dw/regs.h @@ -114,10 +114,6 @@ struct dw_dma_regs { #define dma_writel_native writel #endif -/* To access the registers in early stage of probe */ -#define dma_read_byaddr(addr, name) \ - dma_readl_native((addr) + offsetof(struct dw_dma_regs, name)) - /* Bitfields in DW_PARAMS */ #define DW_PARAMS_NR_CHAN 8 /* number of channels */ #define DW_PARAMS_NR_MASTER 11 /* number of AHB masters */ @@ -143,6 +139,10 @@ enum dw_dma_msize { DW_DMA_MSIZE_256, }; +/* Bitfields in LLP */ +#define DWC_LLP_LMS(x) ((x) & 3) /* list master select */ +#define DWC_LLP_LOC(x) ((x) & ~3) /* next lli */ + /* Bitfields in CTL_LO */ #define DWC_CTLL_INT_EN (1 << 0) /* irqs enabled? */ #define DWC_CTLL_DST_WIDTH(n) ((n)<<1) /* bytes per element */ @@ -216,6 +216,8 @@ enum dw_dma_msize { enum dw_dmac_flags { DW_DMA_IS_CYCLIC = 0, DW_DMA_IS_SOFT_LLP = 1, + DW_DMA_IS_PAUSED = 2, + DW_DMA_IS_INITIALIZED = 3, }; struct dw_dma_chan { @@ -224,8 +226,6 @@ struct dw_dma_chan { u8 mask; u8 priority; enum dma_transfer_direction direction; - bool paused; - bool initialized; /* software emulation of the LLP transfers */ struct list_head *tx_node_active; @@ -236,8 +236,6 @@ struct dw_dma_chan { unsigned long flags; struct list_head active_list; struct list_head queue; - struct list_head free_list; - u32 residue; struct dw_cyclic_desc *cdesc; unsigned int descs_allocated; @@ -249,8 +247,8 @@ struct dw_dma_chan { /* custom slave configuration */ u8 src_id; u8 dst_id; - u8 src_master; - u8 dst_master; + u8 m_master; + u8 p_master; /* configuration passed via .device_config */ struct dma_slave_config dma_sconfig; @@ -283,9 +281,8 @@ struct dw_dma { u8 all_chan_mask; u8 in_use; - /* hardware configuration */ - unsigned char nr_masters; - unsigned char data_width[DW_DMA_MAX_NR_MASTERS]; + /* platform data */ + struct dw_dma_platform_data *pdata; }; static inline struct dw_dma_regs __iomem *__dw_regs(struct dw_dma *dw) @@ -308,32 +305,51 @@ static inline struct dw_dma *to_dw_dma(struct dma_device *ddev) return container_of(ddev, struct dw_dma, dma); } +#ifdef CONFIG_DW_DMAC_BIG_ENDIAN_IO +typedef __be32 __dw32; +#else +typedef __le32 __dw32; +#endif + /* LLI == Linked List Item; a.k.a. DMA block descriptor */ struct dw_lli { /* values that are not changed by hardware */ - u32 sar; - u32 dar; - u32 llp; /* chain to next lli */ - u32 ctllo; + __dw32 sar; + __dw32 dar; + __dw32 llp; /* chain to next lli */ + __dw32 ctllo; /* values that may get written back: */ - u32 ctlhi; + __dw32 ctlhi; /* sstat and dstat can snapshot peripheral register state. * silicon config may discard either or both... */ - u32 sstat; - u32 dstat; + __dw32 sstat; + __dw32 dstat; }; struct dw_desc { /* FIRST values the hardware uses */ struct dw_lli lli; +#ifdef CONFIG_DW_DMAC_BIG_ENDIAN_IO +#define lli_set(d, reg, v) ((d)->lli.reg |= cpu_to_be32(v)) +#define lli_clear(d, reg, v) ((d)->lli.reg &= ~cpu_to_be32(v)) +#define lli_read(d, reg) be32_to_cpu((d)->lli.reg) +#define lli_write(d, reg, v) ((d)->lli.reg = cpu_to_be32(v)) +#else +#define lli_set(d, reg, v) ((d)->lli.reg |= cpu_to_le32(v)) +#define lli_clear(d, reg, v) ((d)->lli.reg &= ~cpu_to_le32(v)) +#define lli_read(d, reg) le32_to_cpu((d)->lli.reg) +#define lli_write(d, reg, v) ((d)->lli.reg = cpu_to_le32(v)) +#endif + /* THEN values for driver housekeeping */ struct list_head desc_node; struct list_head tx_list; struct dma_async_tx_descriptor txd; size_t len; size_t total_len; + u32 residue; }; #define to_dw_desc(h) list_entry(h, struct dw_desc, desc_node) diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c index ee3463e774f8..694c44e487ed 100644 --- a/drivers/dma/edma.c +++ b/drivers/dma/edma.c @@ -1518,8 +1518,17 @@ static irqreturn_t dma_ccerr_handler(int irq, void *data) dev_vdbg(ecc->dev, "dma_ccerr_handler\n"); - if (!edma_error_pending(ecc)) + if (!edma_error_pending(ecc)) { + /* + * The registers indicate no pending error event but the irq + * handler has been called. + * Ask eDMA to re-evaluate the error registers. + */ + dev_err(ecc->dev, "%s: Error interrupt without error event!\n", + __func__); + edma_write(ecc, EDMA_EEVAL, 1); return IRQ_NONE; + } while (1) { /* Event missed register(s) */ diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c index aac85c30c2cf..a8828ed639b3 100644 --- a/drivers/dma/fsldma.c +++ b/drivers/dma/fsldma.c @@ -462,13 +462,12 @@ static struct fsl_desc_sw *fsl_dma_alloc_descriptor(struct fsldma_chan *chan) struct fsl_desc_sw *desc; dma_addr_t pdesc; - desc = dma_pool_alloc(chan->desc_pool, GFP_ATOMIC, &pdesc); + desc = dma_pool_zalloc(chan->desc_pool, GFP_ATOMIC, &pdesc); if (!desc) { chan_dbg(chan, "out of memory for link descriptor\n"); return NULL; } - memset(desc, 0, sizeof(*desc)); INIT_LIST_HEAD(&desc->tx_list); dma_async_tx_descriptor_init(&desc->async_tx, &chan->common); desc->async_tx.tx_submit = fsl_dma_tx_submit; diff --git a/drivers/dma/hsu/hsu.c b/drivers/dma/hsu/hsu.c index eef145edb936..59d1e7c6fd0f 100644 --- a/drivers/dma/hsu/hsu.c +++ b/drivers/dma/hsu/hsu.c @@ -77,8 +77,8 @@ static void hsu_dma_chan_start(struct hsu_dma_chan *hsuc) hsu_chan_writel(hsuc, HSU_CH_MTSR, mtsr); /* Set descriptors */ - count = (desc->nents - desc->active) % HSU_DMA_CHAN_NR_DESC; - for (i = 0; i < count; i++) { + count = desc->nents - desc->active; + for (i = 0; i < count && i < HSU_DMA_CHAN_NR_DESC; i++) { hsu_chan_writel(hsuc, HSU_CH_DxSAR(i), desc->sg[i].addr); hsu_chan_writel(hsuc, HSU_CH_DxTSR(i), desc->sg[i].len); @@ -160,7 +160,7 @@ irqreturn_t hsu_dma_irq(struct hsu_dma_chip *chip, unsigned short nr) return IRQ_NONE; /* Timeout IRQ, need wait some time, see Errata 2 */ - if (hsuc->direction == DMA_DEV_TO_MEM && (sr & HSU_CH_SR_DESCTO_ANY)) + if (sr & HSU_CH_SR_DESCTO_ANY) udelay(2); sr &= ~HSU_CH_SR_DESCTO_ANY; @@ -417,6 +417,8 @@ int hsu_dma_probe(struct hsu_dma_chip *chip) hsu->dma.dev = chip->dev; + dma_set_max_seg_size(hsu->dma.dev, HSU_CH_DxTSR_MASK); + ret = dma_async_device_register(&hsu->dma); if (ret) return ret; diff --git a/drivers/dma/hsu/hsu.h b/drivers/dma/hsu/hsu.h index 578a8ee8cd05..50a9d1bda253 100644 --- a/drivers/dma/hsu/hsu.h +++ b/drivers/dma/hsu/hsu.h @@ -55,6 +55,10 @@ #define HSU_CH_DCR_CHEI BIT(23) #define HSU_CH_DCR_CHTOI(x) BIT(24 + (x)) +/* Bits in HSU_CH_DxTSR */ +#define HSU_CH_DxTSR_MASK GENMASK(15, 0) +#define HSU_CH_DxTSR_TSR(x) ((x) & HSU_CH_DxTSR_MASK) + struct hsu_dma_sg { dma_addr_t addr; unsigned int len; diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c index efdee1a69fc4..d406056e8892 100644 --- a/drivers/dma/ioat/init.c +++ b/drivers/dma/ioat/init.c @@ -690,12 +690,11 @@ static int ioat_alloc_chan_resources(struct dma_chan *c) /* allocate a completion writeback area */ /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ ioat_chan->completion = - dma_pool_alloc(ioat_chan->ioat_dma->completion_pool, - GFP_KERNEL, &ioat_chan->completion_dma); + dma_pool_zalloc(ioat_chan->ioat_dma->completion_pool, + GFP_KERNEL, &ioat_chan->completion_dma); if (!ioat_chan->completion) return -ENOMEM; - memset(ioat_chan->completion, 0, sizeof(*ioat_chan->completion)); writel(((u64)ioat_chan->completion_dma) & 0x00000000FFFFFFFF, ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); writel(((u64)ioat_chan->completion_dma) >> 32, @@ -1074,6 +1073,7 @@ static int ioat3_dma_probe(struct ioatdma_device *ioat_dma, int dca) struct ioatdma_chan *ioat_chan; bool is_raid_device = false; int err; + u16 val16; dma = &ioat_dma->dma_dev; dma->device_prep_dma_memcpy = ioat_dma_prep_memcpy_lock; @@ -1173,6 +1173,17 @@ static int ioat3_dma_probe(struct ioatdma_device *ioat_dma, int dca) if (dca) ioat_dma->dca = ioat_dca_init(pdev, ioat_dma->reg_base); + /* disable relaxed ordering */ + err = pcie_capability_read_word(pdev, IOAT_DEVCTRL_OFFSET, &val16); + if (err) + return err; + + /* clear relaxed ordering enable */ + val16 &= ~IOAT_DEVCTRL_ROE; + err = pcie_capability_write_word(pdev, IOAT_DEVCTRL_OFFSET, val16); + if (err) + return err; + return 0; } diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h index 4994a3623aee..70534981a49b 100644 --- a/drivers/dma/ioat/registers.h +++ b/drivers/dma/ioat/registers.h @@ -26,6 +26,13 @@ #define IOAT_PCI_CHANERR_INT_OFFSET 0x180 #define IOAT_PCI_CHANERRMASK_INT_OFFSET 0x184 +/* PCIe config registers */ + +/* EXPCAPID + N */ +#define IOAT_DEVCTRL_OFFSET 0x8 +/* relaxed ordering enable */ +#define IOAT_DEVCTRL_ROE 0x10 + /* MMIO Device Registers */ #define IOAT_CHANCNT_OFFSET 0x00 /* 8-bit */ diff --git a/drivers/dma/mmp_pdma.c b/drivers/dma/mmp_pdma.c index e39457f13d4d..56f1fd68b620 100644 --- a/drivers/dma/mmp_pdma.c +++ b/drivers/dma/mmp_pdma.c @@ -364,13 +364,12 @@ mmp_pdma_alloc_descriptor(struct mmp_pdma_chan *chan) struct mmp_pdma_desc_sw *desc; dma_addr_t pdesc; - desc = dma_pool_alloc(chan->desc_pool, GFP_ATOMIC, &pdesc); + desc = dma_pool_zalloc(chan->desc_pool, GFP_ATOMIC, &pdesc); if (!desc) { dev_err(chan->dev, "out of memory for link descriptor\n"); return NULL; } - memset(desc, 0, sizeof(*desc)); INIT_LIST_HEAD(&desc->tx_list); dma_async_tx_descriptor_init(&desc->async_tx, &chan->chan); /* each desc has submit */ diff --git a/drivers/dma/mpc512x_dma.c b/drivers/dma/mpc512x_dma.c index aae76fb39adc..ccadafa51d5e 100644 --- a/drivers/dma/mpc512x_dma.c +++ b/drivers/dma/mpc512x_dma.c @@ -3,6 +3,7 @@ * Copyright (C) Semihalf 2009 * Copyright (C) Ilya Yanok, Emcraft Systems 2010 * Copyright (C) Alexander Popov, Promcontroller 2014 + * Copyright (C) Mario Six, Guntermann & Drunck GmbH, 2016 * * Written by Piotr Ziecik <kosmo@semihalf.com>. Hardware description * (defines, structures and comments) was taken from MPC5121 DMA driver @@ -26,18 +27,19 @@ */ /* - * MPC512x and MPC8308 DMA driver. It supports - * memory to memory data transfers (tested using dmatest module) and - * data transfers between memory and peripheral I/O memory - * by means of slave scatter/gather with these limitations: - * - chunked transfers (described by s/g lists with more than one item) - * are refused as long as proper support for scatter/gather is missing; - * - transfers on MPC8308 always start from software as this SoC appears - * not to have external request lines for peripheral flow control; - * - only peripheral devices with 4-byte FIFO access register are supported; - * - minimal memory <-> I/O memory transfer chunk is 4 bytes and consequently - * source and destination addresses must be 4-byte aligned - * and transfer size must be aligned on (4 * maxburst) boundary; + * MPC512x and MPC8308 DMA driver. It supports memory to memory data transfers + * (tested using dmatest module) and data transfers between memory and + * peripheral I/O memory by means of slave scatter/gather with these + * limitations: + * - chunked transfers (described by s/g lists with more than one item) are + * refused as long as proper support for scatter/gather is missing + * - transfers on MPC8308 always start from software as this SoC does not have + * external request lines for peripheral flow control + * - memory <-> I/O memory transfer chunks of sizes of 1, 2, 4, 16 (for + * MPC512x), and 32 bytes are supported, and, consequently, source + * addresses and destination addresses must be aligned accordingly; + * furthermore, for MPC512x SoCs, the transfer size must be aligned on + * (chunk size * maxburst) */ #include <linux/module.h> @@ -213,8 +215,10 @@ struct mpc_dma_chan { /* Settings for access to peripheral FIFO */ dma_addr_t src_per_paddr; u32 src_tcd_nunits; + u8 swidth; dma_addr_t dst_per_paddr; u32 dst_tcd_nunits; + u8 dwidth; /* Lock for this structure */ spinlock_t lock; @@ -247,6 +251,7 @@ static inline struct mpc_dma_chan *dma_chan_to_mpc_dma_chan(struct dma_chan *c) static inline struct mpc_dma *dma_chan_to_mpc_dma(struct dma_chan *c) { struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(c); + return container_of(mchan, struct mpc_dma, channels[c->chan_id]); } @@ -254,9 +259,9 @@ static inline struct mpc_dma *dma_chan_to_mpc_dma(struct dma_chan *c) * Execute all queued DMA descriptors. * * Following requirements must be met while calling mpc_dma_execute(): - * a) mchan->lock is acquired, - * b) mchan->active list is empty, - * c) mchan->queued list contains at least one entry. + * a) mchan->lock is acquired, + * b) mchan->active list is empty, + * c) mchan->queued list contains at least one entry. */ static void mpc_dma_execute(struct mpc_dma_chan *mchan) { @@ -446,20 +451,15 @@ static void mpc_dma_tasklet(unsigned long data) if (es & MPC_DMA_DMAES_SAE) dev_err(mdma->dma.dev, "- Source Address Error\n"); if (es & MPC_DMA_DMAES_SOE) - dev_err(mdma->dma.dev, "- Source Offset" - " Configuration Error\n"); + dev_err(mdma->dma.dev, "- Source Offset Configuration Error\n"); if (es & MPC_DMA_DMAES_DAE) - dev_err(mdma->dma.dev, "- Destination Address" - " Error\n"); + dev_err(mdma->dma.dev, "- Destination Address Error\n"); if (es & MPC_DMA_DMAES_DOE) - dev_err(mdma->dma.dev, "- Destination Offset" - " Configuration Error\n"); + dev_err(mdma->dma.dev, "- Destination Offset Configuration Error\n"); if (es & MPC_DMA_DMAES_NCE) - dev_err(mdma->dma.dev, "- NBytes/Citter" - " Configuration Error\n"); + dev_err(mdma->dma.dev, "- NBytes/Citter Configuration Error\n"); if (es & MPC_DMA_DMAES_SGE) - dev_err(mdma->dma.dev, "- Scatter/Gather" - " Configuration Error\n"); + dev_err(mdma->dma.dev, "- Scatter/Gather Configuration Error\n"); if (es & MPC_DMA_DMAES_SBE) dev_err(mdma->dma.dev, "- Source Bus Error\n"); if (es & MPC_DMA_DMAES_DBE) @@ -518,8 +518,8 @@ static int mpc_dma_alloc_chan_resources(struct dma_chan *chan) for (i = 0; i < MPC_DMA_DESCRIPTORS; i++) { mdesc = kzalloc(sizeof(struct mpc_dma_desc), GFP_KERNEL); if (!mdesc) { - dev_notice(mdma->dma.dev, "Memory allocation error. " - "Allocated only %u descriptors\n", i); + dev_notice(mdma->dma.dev, + "Memory allocation error. Allocated only %u descriptors\n", i); break; } @@ -684,6 +684,15 @@ mpc_dma_prep_memcpy(struct dma_chan *chan, dma_addr_t dst, dma_addr_t src, return &mdesc->desc; } +inline u8 buswidth_to_dmatsize(u8 buswidth) +{ + u8 res; + + for (res = 0; buswidth > 1; buswidth /= 2) + res++; + return res; +} + static struct dma_async_tx_descriptor * mpc_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len, enum dma_transfer_direction direction, @@ -742,39 +751,54 @@ mpc_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, memset(tcd, 0, sizeof(struct mpc_dma_tcd)); - if (!IS_ALIGNED(sg_dma_address(sg), 4)) - goto err_prep; - if (direction == DMA_DEV_TO_MEM) { tcd->saddr = per_paddr; tcd->daddr = sg_dma_address(sg); + + if (!IS_ALIGNED(sg_dma_address(sg), mchan->dwidth)) + goto err_prep; + tcd->soff = 0; - tcd->doff = 4; + tcd->doff = mchan->dwidth; } else { tcd->saddr = sg_dma_address(sg); tcd->daddr = per_paddr; - tcd->soff = 4; + + if (!IS_ALIGNED(sg_dma_address(sg), mchan->swidth)) + goto err_prep; + + tcd->soff = mchan->swidth; tcd->doff = 0; } - tcd->ssize = MPC_DMA_TSIZE_4; - tcd->dsize = MPC_DMA_TSIZE_4; + tcd->ssize = buswidth_to_dmatsize(mchan->swidth); + tcd->dsize = buswidth_to_dmatsize(mchan->dwidth); - len = sg_dma_len(sg); - tcd->nbytes = tcd_nunits * 4; - if (!IS_ALIGNED(len, tcd->nbytes)) - goto err_prep; + if (mdma->is_mpc8308) { + tcd->nbytes = sg_dma_len(sg); + if (!IS_ALIGNED(tcd->nbytes, mchan->swidth)) + goto err_prep; - iter = len / tcd->nbytes; - if (iter >= 1 << 15) { - /* len is too big */ - goto err_prep; + /* No major loops for MPC8303 */ + tcd->biter = 1; + tcd->citer = 1; + } else { + len = sg_dma_len(sg); + tcd->nbytes = tcd_nunits * tcd->ssize; + if (!IS_ALIGNED(len, tcd->nbytes)) + goto err_prep; + + iter = len / tcd->nbytes; + if (iter >= 1 << 15) { + /* len is too big */ + goto err_prep; + } + /* citer_linkch contains the high bits of iter */ + tcd->biter = iter & 0x1ff; + tcd->biter_linkch = iter >> 9; + tcd->citer = tcd->biter; + tcd->citer_linkch = tcd->biter_linkch; } - /* citer_linkch contains the high bits of iter */ - tcd->biter = iter & 0x1ff; - tcd->biter_linkch = iter >> 9; - tcd->citer = tcd->biter; - tcd->citer_linkch = tcd->biter_linkch; tcd->e_sg = 0; tcd->d_req = 1; @@ -796,40 +820,62 @@ err_prep: return NULL; } +inline bool is_buswidth_valid(u8 buswidth, bool is_mpc8308) +{ + switch (buswidth) { + case 16: + if (is_mpc8308) + return false; + case 1: + case 2: + case 4: + case 32: + break; + default: + return false; + } + + return true; +} + static int mpc_dma_device_config(struct dma_chan *chan, struct dma_slave_config *cfg) { struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan); + struct mpc_dma *mdma = dma_chan_to_mpc_dma(&mchan->chan); unsigned long flags; /* * Software constraints: - * - only transfers between a peripheral device and - * memory are supported; - * - only peripheral devices with 4-byte FIFO access register - * are supported; - * - minimal transfer chunk is 4 bytes and consequently - * source and destination addresses must be 4-byte aligned - * and transfer size must be aligned on (4 * maxburst) - * boundary; - * - during the transfer RAM address is being incremented by - * the size of minimal transfer chunk; - * - peripheral port's address is constant during the transfer. + * - only transfers between a peripheral device and memory are + * supported + * - transfer chunk sizes of 1, 2, 4, 16 (for MPC512x), and 32 bytes + * are supported, and, consequently, source addresses and + * destination addresses; must be aligned accordingly; furthermore, + * for MPC512x SoCs, the transfer size must be aligned on (chunk + * size * maxburst) + * - during the transfer, the RAM address is incremented by the size + * of transfer chunk + * - the peripheral port's address is constant during the transfer. */ - if (cfg->src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES || - cfg->dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES || - !IS_ALIGNED(cfg->src_addr, 4) || - !IS_ALIGNED(cfg->dst_addr, 4)) { + if (!IS_ALIGNED(cfg->src_addr, cfg->src_addr_width) || + !IS_ALIGNED(cfg->dst_addr, cfg->dst_addr_width)) { return -EINVAL; } + if (!is_buswidth_valid(cfg->src_addr_width, mdma->is_mpc8308) || + !is_buswidth_valid(cfg->dst_addr_width, mdma->is_mpc8308)) + return -EINVAL; + spin_lock_irqsave(&mchan->lock, flags); mchan->src_per_paddr = cfg->src_addr; mchan->src_tcd_nunits = cfg->src_maxburst; + mchan->swidth = cfg->src_addr_width; mchan->dst_per_paddr = cfg->dst_addr; mchan->dst_tcd_nunits = cfg->dst_maxburst; + mchan->dwidth = cfg->dst_addr_width; /* Apply defaults */ if (mchan->src_tcd_nunits == 0) @@ -875,7 +921,6 @@ static int mpc_dma_probe(struct platform_device *op) mdma = devm_kzalloc(dev, sizeof(struct mpc_dma), GFP_KERNEL); if (!mdma) { - dev_err(dev, "Memory exhausted!\n"); retval = -ENOMEM; goto err; } @@ -999,7 +1044,8 @@ static int mpc_dma_probe(struct platform_device *op) out_be32(&mdma->regs->dmaerrl, 0xFFFF); } else { out_be32(&mdma->regs->dmacr, MPC_DMA_DMACR_EDCG | - MPC_DMA_DMACR_ERGA | MPC_DMA_DMACR_ERCA); + MPC_DMA_DMACR_ERGA | + MPC_DMA_DMACR_ERCA); /* Disable hardware DMA requests */ out_be32(&mdma->regs->dmaerqh, 0); diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index 3922a5d56806..25d1dadcddd1 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -31,6 +31,12 @@ #include "dmaengine.h" #include "mv_xor.h" +enum mv_xor_type { + XOR_ORION, + XOR_ARMADA_38X, + XOR_ARMADA_37XX, +}; + enum mv_xor_mode { XOR_MODE_IN_REG, XOR_MODE_IN_DESC, @@ -477,7 +483,7 @@ mv_xor_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, BUG_ON(len > MV_XOR_MAX_BYTE_COUNT); dev_dbg(mv_chan_to_devp(mv_chan), - "%s src_cnt: %d len: %u dest %pad flags: %ld\n", + "%s src_cnt: %d len: %zu dest %pad flags: %ld\n", __func__, src_cnt, len, &dest, flags); sw_desc = mv_chan_alloc_slot(mv_chan); @@ -933,7 +939,7 @@ static int mv_xor_channel_remove(struct mv_xor_chan *mv_chan) static struct mv_xor_chan * mv_xor_channel_add(struct mv_xor_device *xordev, struct platform_device *pdev, - int idx, dma_cap_mask_t cap_mask, int irq, int op_in_desc) + int idx, dma_cap_mask_t cap_mask, int irq) { int ret = 0; struct mv_xor_chan *mv_chan; @@ -945,7 +951,10 @@ mv_xor_channel_add(struct mv_xor_device *xordev, mv_chan->idx = idx; mv_chan->irq = irq; - mv_chan->op_in_desc = op_in_desc; + if (xordev->xor_type == XOR_ORION) + mv_chan->op_in_desc = XOR_MODE_IN_REG; + else + mv_chan->op_in_desc = XOR_MODE_IN_DESC; dma_dev = &mv_chan->dmadev; @@ -1085,6 +1094,33 @@ mv_xor_conf_mbus_windows(struct mv_xor_device *xordev, writel(0, base + WINDOW_OVERRIDE_CTRL(1)); } +static void +mv_xor_conf_mbus_windows_a3700(struct mv_xor_device *xordev) +{ + void __iomem *base = xordev->xor_high_base; + u32 win_enable = 0; + int i; + + for (i = 0; i < 8; i++) { + writel(0, base + WINDOW_BASE(i)); + writel(0, base + WINDOW_SIZE(i)); + if (i < 4) + writel(0, base + WINDOW_REMAP_HIGH(i)); + } + /* + * For Armada3700 open default 4GB Mbus window. The dram + * related configuration are done at AXIS level. + */ + writel(0xffff0000, base + WINDOW_SIZE(0)); + win_enable |= 1; + win_enable |= 3 << 16; + + writel(win_enable, base + WINDOW_BAR_ENABLE(0)); + writel(win_enable, base + WINDOW_BAR_ENABLE(1)); + writel(0, base + WINDOW_OVERRIDE_CTRL(0)); + writel(0, base + WINDOW_OVERRIDE_CTRL(1)); +} + /* * Since this XOR driver is basically used only for RAID5, we don't * need to care about synchronizing ->suspend with DMA activity, @@ -1129,6 +1165,11 @@ static int mv_xor_resume(struct platform_device *dev) XOR_INTR_MASK(mv_chan)); } + if (xordev->xor_type == XOR_ARMADA_37XX) { + mv_xor_conf_mbus_windows_a3700(xordev); + return 0; + } + dram = mv_mbus_dram_info(); if (dram) mv_xor_conf_mbus_windows(xordev, dram); @@ -1137,8 +1178,9 @@ static int mv_xor_resume(struct platform_device *dev) } static const struct of_device_id mv_xor_dt_ids[] = { - { .compatible = "marvell,orion-xor", .data = (void *)XOR_MODE_IN_REG }, - { .compatible = "marvell,armada-380-xor", .data = (void *)XOR_MODE_IN_DESC }, + { .compatible = "marvell,orion-xor", .data = (void *)XOR_ORION }, + { .compatible = "marvell,armada-380-xor", .data = (void *)XOR_ARMADA_38X }, + { .compatible = "marvell,armada-3700-xor", .data = (void *)XOR_ARMADA_37XX }, {}, }; @@ -1152,7 +1194,6 @@ static int mv_xor_probe(struct platform_device *pdev) struct resource *res; unsigned int max_engines, max_channels; int i, ret; - int op_in_desc; dev_notice(&pdev->dev, "Marvell shared XOR driver\n"); @@ -1180,12 +1221,30 @@ static int mv_xor_probe(struct platform_device *pdev) platform_set_drvdata(pdev, xordev); + + /* + * We need to know which type of XOR device we use before + * setting up. In non-dt case it can only be the legacy one. + */ + xordev->xor_type = XOR_ORION; + if (pdev->dev.of_node) { + const struct of_device_id *of_id = + of_match_device(mv_xor_dt_ids, + &pdev->dev); + + xordev->xor_type = (uintptr_t)of_id->data; + } + /* * (Re-)program MBUS remapping windows if we are asked to. */ - dram = mv_mbus_dram_info(); - if (dram) - mv_xor_conf_mbus_windows(xordev, dram); + if (xordev->xor_type == XOR_ARMADA_37XX) { + mv_xor_conf_mbus_windows_a3700(xordev); + } else { + dram = mv_mbus_dram_info(); + if (dram) + mv_xor_conf_mbus_windows(xordev, dram); + } /* Not all platforms can gate the clock, so it is not * an error if the clock does not exists. @@ -1199,12 +1258,16 @@ static int mv_xor_probe(struct platform_device *pdev) * order for async_tx to perform well. So we limit the number * of engines and channels so that we take into account this * constraint. Note that we also want to use channels from - * separate engines when possible. + * separate engines when possible. For dual-CPU Armada 3700 + * SoC with single XOR engine allow using its both channels. */ max_engines = num_present_cpus(); - max_channels = min_t(unsigned int, - MV_XOR_MAX_CHANNELS, - DIV_ROUND_UP(num_present_cpus(), 2)); + if (xordev->xor_type == XOR_ARMADA_37XX) + max_channels = num_present_cpus(); + else + max_channels = min_t(unsigned int, + MV_XOR_MAX_CHANNELS, + DIV_ROUND_UP(num_present_cpus(), 2)); if (mv_xor_engine_count >= max_engines) return 0; @@ -1212,15 +1275,11 @@ static int mv_xor_probe(struct platform_device *pdev) if (pdev->dev.of_node) { struct device_node *np; int i = 0; - const struct of_device_id *of_id = - of_match_device(mv_xor_dt_ids, - &pdev->dev); for_each_child_of_node(pdev->dev.of_node, np) { struct mv_xor_chan *chan; dma_cap_mask_t cap_mask; int irq; - op_in_desc = (int)of_id->data; if (i >= max_channels) continue; @@ -1237,7 +1296,7 @@ static int mv_xor_probe(struct platform_device *pdev) } chan = mv_xor_channel_add(xordev, pdev, i, - cap_mask, irq, op_in_desc); + cap_mask, irq); if (IS_ERR(chan)) { ret = PTR_ERR(chan); irq_dispose_mapping(irq); @@ -1266,8 +1325,7 @@ static int mv_xor_probe(struct platform_device *pdev) } chan = mv_xor_channel_add(xordev, pdev, i, - cd->cap_mask, irq, - XOR_MODE_IN_REG); + cd->cap_mask, irq); if (IS_ERR(chan)) { ret = PTR_ERR(chan); goto err_channel_add; diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h index c19fe30e5ae9..bf56e082e7cd 100644 --- a/drivers/dma/mv_xor.h +++ b/drivers/dma/mv_xor.h @@ -85,6 +85,7 @@ struct mv_xor_device { void __iomem *xor_high_base; struct clk *clk; struct mv_xor_chan *channels[MV_XOR_MAX_CHANNELS]; + int xor_type; }; /** diff --git a/drivers/dma/of-dma.c b/drivers/dma/of-dma.c index 1e1f2986eba8..faae0bfe1109 100644 --- a/drivers/dma/of-dma.c +++ b/drivers/dma/of-dma.c @@ -240,8 +240,9 @@ struct dma_chan *of_dma_request_slave_channel(struct device_node *np, struct of_phandle_args dma_spec; struct of_dma *ofdma; struct dma_chan *chan; - int count, i; + int count, i, start; int ret_no_channel = -ENODEV; + static atomic_t last_index; if (!np || !name) { pr_err("%s: not enough information provided\n", __func__); @@ -259,8 +260,15 @@ struct dma_chan *of_dma_request_slave_channel(struct device_node *np, return ERR_PTR(-ENODEV); } + /* + * approximate an average distribution across multiple + * entries with the same name + */ + start = atomic_inc_return(&last_index); for (i = 0; i < count; i++) { - if (of_dma_match_channel(np, name, i, &dma_spec)) + if (of_dma_match_channel(np, name, + (i + start) % count, + &dma_spec)) continue; mutex_lock(&of_dma_lock); diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c index 77c1c44009d8..e756a30ccba2 100644 --- a/drivers/dma/pxa_dma.c +++ b/drivers/dma/pxa_dma.c @@ -117,6 +117,7 @@ struct pxad_chan { /* protected by vc->lock */ struct pxad_phy *phy; struct dma_pool *desc_pool; /* Descriptors pool */ + dma_cookie_t bus_error; }; struct pxad_device { @@ -563,6 +564,7 @@ static void pxad_launch_chan(struct pxad_chan *chan, return; } } + chan->bus_error = 0; /* * Program the descriptor's address into the DMA controller, @@ -666,6 +668,7 @@ static irqreturn_t pxad_chan_handler(int irq, void *dev_id) struct virt_dma_desc *vd, *tmp; unsigned int dcsr; unsigned long flags; + dma_cookie_t last_started = 0; BUG_ON(!chan); @@ -678,6 +681,7 @@ static irqreturn_t pxad_chan_handler(int irq, void *dev_id) dev_dbg(&chan->vc.chan.dev->device, "%s(): checking txd %p[%x]: completed=%d\n", __func__, vd, vd->tx.cookie, is_desc_completed(vd)); + last_started = vd->tx.cookie; if (to_pxad_sw_desc(vd)->cyclic) { vchan_cyclic_callback(vd); break; @@ -690,7 +694,12 @@ static irqreturn_t pxad_chan_handler(int irq, void *dev_id) } } - if (dcsr & PXA_DCSR_STOPSTATE) { + if (dcsr & PXA_DCSR_BUSERR) { + chan->bus_error = last_started; + phy_disable(phy); + } + + if (!chan->bus_error && dcsr & PXA_DCSR_STOPSTATE) { dev_dbg(&chan->vc.chan.dev->device, "%s(): channel stopped, submitted_empty=%d issued_empty=%d", __func__, @@ -1249,6 +1258,9 @@ static enum dma_status pxad_tx_status(struct dma_chan *dchan, struct pxad_chan *chan = to_pxad_chan(dchan); enum dma_status ret; + if (cookie == chan->bus_error) + return DMA_ERROR; + ret = dma_cookie_status(dchan, cookie, txstate); if (likely(txstate && (ret != DMA_ERROR))) dma_set_residue(txstate, pxad_residue(chan, cookie)); @@ -1321,7 +1333,7 @@ static int pxad_init_phys(struct platform_device *op, return 0; } -static const struct of_device_id const pxad_dt_ids[] = { +static const struct of_device_id pxad_dt_ids[] = { { .compatible = "marvell,pdma-1.0", }, {} }; diff --git a/drivers/dma/qcom/Makefile b/drivers/dma/qcom/Makefile index bfea6990229f..4bfc38b45220 100644 --- a/drivers/dma/qcom/Makefile +++ b/drivers/dma/qcom/Makefile @@ -1,3 +1,5 @@ obj-$(CONFIG_QCOM_BAM_DMA) += bam_dma.o obj-$(CONFIG_QCOM_HIDMA_MGMT) += hdma_mgmt.o hdma_mgmt-objs := hidma_mgmt.o hidma_mgmt_sys.o +obj-$(CONFIG_QCOM_HIDMA) += hdma.o +hdma-objs := hidma_ll.o hidma.o hidma_dbg.o diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c index d5e0a9c3ad5d..969b48176745 100644 --- a/drivers/dma/qcom/bam_dma.c +++ b/drivers/dma/qcom/bam_dma.c @@ -342,7 +342,7 @@ static const struct reg_offset_data bam_v1_7_reg_info[] = { #define BAM_DESC_FIFO_SIZE SZ_32K #define MAX_DESCRIPTORS (BAM_DESC_FIFO_SIZE / sizeof(struct bam_desc_hw) - 1) -#define BAM_MAX_DATA_SIZE (SZ_32K - 8) +#define BAM_FIFO_SIZE (SZ_32K - 8) struct bam_chan { struct virt_dma_chan vc; @@ -387,6 +387,7 @@ struct bam_device { /* execution environment ID, from DT */ u32 ee; + bool controlled_remotely; const struct reg_offset_data *layout; @@ -458,7 +459,7 @@ static void bam_chan_init_hw(struct bam_chan *bchan, */ writel_relaxed(ALIGN(bchan->fifo_phys, sizeof(struct bam_desc_hw)), bam_addr(bdev, bchan->id, BAM_P_DESC_FIFO_ADDR)); - writel_relaxed(BAM_DESC_FIFO_SIZE, + writel_relaxed(BAM_FIFO_SIZE, bam_addr(bdev, bchan->id, BAM_P_FIFO_SIZES)); /* enable the per pipe interrupts, enable EOT, ERR, and INT irqs */ @@ -604,7 +605,7 @@ static struct dma_async_tx_descriptor *bam_prep_slave_sg(struct dma_chan *chan, /* calculate number of required entries */ for_each_sg(sgl, sg, sg_len, i) - num_alloc += DIV_ROUND_UP(sg_dma_len(sg), BAM_MAX_DATA_SIZE); + num_alloc += DIV_ROUND_UP(sg_dma_len(sg), BAM_FIFO_SIZE); /* allocate enough room to accomodate the number of entries */ async_desc = kzalloc(sizeof(*async_desc) + @@ -635,10 +636,10 @@ static struct dma_async_tx_descriptor *bam_prep_slave_sg(struct dma_chan *chan, desc->addr = cpu_to_le32(sg_dma_address(sg) + curr_offset); - if (remainder > BAM_MAX_DATA_SIZE) { - desc->size = cpu_to_le16(BAM_MAX_DATA_SIZE); - remainder -= BAM_MAX_DATA_SIZE; - curr_offset += BAM_MAX_DATA_SIZE; + if (remainder > BAM_FIFO_SIZE) { + desc->size = cpu_to_le16(BAM_FIFO_SIZE); + remainder -= BAM_FIFO_SIZE; + curr_offset += BAM_FIFO_SIZE; } else { desc->size = cpu_to_le16(remainder); remainder = 0; @@ -801,13 +802,17 @@ static irqreturn_t bam_dma_irq(int irq, void *data) if (srcs & P_IRQ) tasklet_schedule(&bdev->task); - if (srcs & BAM_IRQ) + if (srcs & BAM_IRQ) { clr_mask = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_STTS)); - /* don't allow reorder of the various accesses to the BAM registers */ - mb(); + /* + * don't allow reorder of the various accesses to the BAM + * registers + */ + mb(); - writel_relaxed(clr_mask, bam_addr(bdev, 0, BAM_IRQ_CLR)); + writel_relaxed(clr_mask, bam_addr(bdev, 0, BAM_IRQ_CLR)); + } return IRQ_HANDLED; } @@ -1038,6 +1043,9 @@ static int bam_init(struct bam_device *bdev) val = readl_relaxed(bam_addr(bdev, 0, BAM_NUM_PIPES)); bdev->num_channels = val & BAM_NUM_PIPES_MASK; + if (bdev->controlled_remotely) + return 0; + /* s/w reset bam */ /* after reset all pipes are disabled and idle */ val = readl_relaxed(bam_addr(bdev, 0, BAM_CTRL)); @@ -1125,6 +1133,9 @@ static int bam_dma_probe(struct platform_device *pdev) return ret; } + bdev->controlled_remotely = of_property_read_bool(pdev->dev.of_node, + "qcom,controlled-remotely"); + bdev->bamclk = devm_clk_get(bdev->dev, "bam_clk"); if (IS_ERR(bdev->bamclk)) return PTR_ERR(bdev->bamclk); @@ -1163,7 +1174,7 @@ static int bam_dma_probe(struct platform_device *pdev) /* set max dma segment size */ bdev->common.dev = bdev->dev; bdev->common.dev->dma_parms = &bdev->dma_parms; - ret = dma_set_max_seg_size(bdev->common.dev, BAM_MAX_DATA_SIZE); + ret = dma_set_max_seg_size(bdev->common.dev, BAM_FIFO_SIZE); if (ret) { dev_err(bdev->dev, "cannot set maximum segment size\n"); goto err_bam_channel_exit; @@ -1234,6 +1245,9 @@ static int bam_dma_remove(struct platform_device *pdev) bam_dma_terminate_all(&bdev->channels[i].vc.chan); tasklet_kill(&bdev->channels[i].vc.task); + if (!bdev->channels[i].fifo_virt) + continue; + dma_free_wc(bdev->dev, BAM_DESC_FIFO_SIZE, bdev->channels[i].fifo_virt, bdev->channels[i].fifo_phys); diff --git a/drivers/dma/qcom/hidma.c b/drivers/dma/qcom/hidma.c index cccc78efbca9..41b5c6dee713 100644 --- a/drivers/dma/qcom/hidma.c +++ b/drivers/dma/qcom/hidma.c @@ -1,7 +1,7 @@ /* * Qualcomm Technologies HIDMA DMA engine interface * - * Copyright (c) 2015, The Linux Foundation. All rights reserved. + * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -404,7 +404,7 @@ static int hidma_terminate_channel(struct dma_chan *chan) spin_unlock_irqrestore(&mchan->lock, irqflags); /* this suspends the existing transfer */ - rc = hidma_ll_pause(dmadev->lldev); + rc = hidma_ll_disable(dmadev->lldev); if (rc) { dev_err(dmadev->ddev.dev, "channel did not pause\n"); goto out; @@ -427,7 +427,7 @@ static int hidma_terminate_channel(struct dma_chan *chan) list_move(&mdesc->node, &mchan->free); } - rc = hidma_ll_resume(dmadev->lldev); + rc = hidma_ll_enable(dmadev->lldev); out: pm_runtime_mark_last_busy(dmadev->ddev.dev); pm_runtime_put_autosuspend(dmadev->ddev.dev); @@ -488,7 +488,7 @@ static int hidma_pause(struct dma_chan *chan) dmadev = to_hidma_dev(mchan->chan.device); if (!mchan->paused) { pm_runtime_get_sync(dmadev->ddev.dev); - if (hidma_ll_pause(dmadev->lldev)) + if (hidma_ll_disable(dmadev->lldev)) dev_warn(dmadev->ddev.dev, "channel did not stop\n"); mchan->paused = true; pm_runtime_mark_last_busy(dmadev->ddev.dev); @@ -507,7 +507,7 @@ static int hidma_resume(struct dma_chan *chan) dmadev = to_hidma_dev(mchan->chan.device); if (mchan->paused) { pm_runtime_get_sync(dmadev->ddev.dev); - rc = hidma_ll_resume(dmadev->lldev); + rc = hidma_ll_enable(dmadev->lldev); if (!rc) mchan->paused = false; else @@ -530,6 +530,43 @@ static irqreturn_t hidma_chirq_handler(int chirq, void *arg) return hidma_ll_inthandler(chirq, lldev); } +static ssize_t hidma_show_values(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct platform_device *pdev = to_platform_device(dev); + struct hidma_dev *mdev = platform_get_drvdata(pdev); + + buf[0] = 0; + + if (strcmp(attr->attr.name, "chid") == 0) + sprintf(buf, "%d\n", mdev->chidx); + + return strlen(buf); +} + +static int hidma_create_sysfs_entry(struct hidma_dev *dev, char *name, + int mode) +{ + struct device_attribute *attrs; + char *name_copy; + + attrs = devm_kmalloc(dev->ddev.dev, sizeof(struct device_attribute), + GFP_KERNEL); + if (!attrs) + return -ENOMEM; + + name_copy = devm_kstrdup(dev->ddev.dev, name, GFP_KERNEL); + if (!name_copy) + return -ENOMEM; + + attrs->attr.name = name_copy; + attrs->attr.mode = mode; + attrs->show = hidma_show_values; + sysfs_attr_init(&attrs->attr); + + return device_create_file(dev->ddev.dev, attrs); +} + static int hidma_probe(struct platform_device *pdev) { struct hidma_dev *dmadev; @@ -644,6 +681,8 @@ static int hidma_probe(struct platform_device *pdev) dmadev->irq = chirq; tasklet_init(&dmadev->task, hidma_issue_task, (unsigned long)dmadev); + hidma_debug_init(dmadev); + hidma_create_sysfs_entry(dmadev, "chid", S_IRUGO); dev_info(&pdev->dev, "HI-DMA engine driver registration complete\n"); platform_set_drvdata(pdev, dmadev); pm_runtime_mark_last_busy(dmadev->ddev.dev); @@ -651,6 +690,7 @@ static int hidma_probe(struct platform_device *pdev) return 0; uninit: + hidma_debug_uninit(dmadev); hidma_ll_uninit(dmadev->lldev); dmafree: if (dmadev) @@ -668,6 +708,7 @@ static int hidma_remove(struct platform_device *pdev) pm_runtime_get_sync(dmadev->ddev.dev); dma_async_device_unregister(&dmadev->ddev); devm_free_irq(dmadev->ddev.dev, dmadev->irq, dmadev->lldev); + hidma_debug_uninit(dmadev); hidma_ll_uninit(dmadev->lldev); hidma_free(dmadev); @@ -689,7 +730,6 @@ static const struct of_device_id hidma_match[] = { {.compatible = "qcom,hidma-1.0",}, {}, }; - MODULE_DEVICE_TABLE(of, hidma_match); static struct platform_driver hidma_driver = { diff --git a/drivers/dma/qcom/hidma.h b/drivers/dma/qcom/hidma.h index 231e306f6d87..db413a5efc4e 100644 --- a/drivers/dma/qcom/hidma.h +++ b/drivers/dma/qcom/hidma.h @@ -1,7 +1,7 @@ /* * Qualcomm Technologies HIDMA data structures * - * Copyright (c) 2014, The Linux Foundation. All rights reserved. + * Copyright (c) 2014-2016, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -20,32 +20,29 @@ #include <linux/interrupt.h> #include <linux/dmaengine.h> -#define TRE_SIZE 32 /* each TRE is 32 bytes */ -#define TRE_CFG_IDX 0 -#define TRE_LEN_IDX 1 -#define TRE_SRC_LOW_IDX 2 -#define TRE_SRC_HI_IDX 3 -#define TRE_DEST_LOW_IDX 4 -#define TRE_DEST_HI_IDX 5 - -struct hidma_tx_status { - u8 err_info; /* error record in this transfer */ - u8 err_code; /* completion code */ -}; +#define HIDMA_TRE_SIZE 32 /* each TRE is 32 bytes */ +#define HIDMA_TRE_CFG_IDX 0 +#define HIDMA_TRE_LEN_IDX 1 +#define HIDMA_TRE_SRC_LOW_IDX 2 +#define HIDMA_TRE_SRC_HI_IDX 3 +#define HIDMA_TRE_DEST_LOW_IDX 4 +#define HIDMA_TRE_DEST_HI_IDX 5 struct hidma_tre { atomic_t allocated; /* if this channel is allocated */ bool queued; /* flag whether this is pending */ u16 status; /* status */ - u32 chidx; /* index of the tre */ + u32 idx; /* index of the tre */ u32 dma_sig; /* signature of the tre */ const char *dev_name; /* name of the device */ void (*callback)(void *data); /* requester callback */ void *data; /* Data associated with this channel*/ struct hidma_lldev *lldev; /* lldma device pointer */ - u32 tre_local[TRE_SIZE / sizeof(u32) + 1]; /* TRE local copy */ + u32 tre_local[HIDMA_TRE_SIZE / sizeof(u32) + 1]; /* TRE local copy */ u32 tre_index; /* the offset where this was written*/ u32 int_flags; /* interrupt flags */ + u8 err_info; /* error record in this transfer */ + u8 err_code; /* completion code */ }; struct hidma_lldev { @@ -61,22 +58,21 @@ struct hidma_lldev { void __iomem *evca; /* Event Channel address */ struct hidma_tre **pending_tre_list; /* Pointers to pending TREs */ - struct hidma_tx_status - *tx_status_list; /* Pointers to pending TREs status*/ s32 pending_tre_count; /* Number of TREs pending */ void *tre_ring; /* TRE ring */ - dma_addr_t tre_ring_handle; /* TRE ring to be shared with HW */ + dma_addr_t tre_dma; /* TRE ring to be shared with HW */ u32 tre_ring_size; /* Byte size of the ring */ u32 tre_processed_off; /* last processed TRE */ void *evre_ring; /* EVRE ring */ - dma_addr_t evre_ring_handle; /* EVRE ring to be shared with HW */ + dma_addr_t evre_dma; /* EVRE ring to be shared with HW */ u32 evre_ring_size; /* Byte size of the ring */ u32 evre_processed_off; /* last processed EVRE */ u32 tre_write_offset; /* TRE write location */ struct tasklet_struct task; /* task delivering notifications */ + struct tasklet_struct rst_task; /* task to reset HW */ DECLARE_KFIFO_PTR(handoff_fifo, struct hidma_tre *); /* pending TREs FIFO */ }; @@ -145,8 +141,8 @@ enum dma_status hidma_ll_status(struct hidma_lldev *llhndl, u32 tre_ch); bool hidma_ll_isenabled(struct hidma_lldev *llhndl); void hidma_ll_queue_request(struct hidma_lldev *llhndl, u32 tre_ch); void hidma_ll_start(struct hidma_lldev *llhndl); -int hidma_ll_pause(struct hidma_lldev *llhndl); -int hidma_ll_resume(struct hidma_lldev *llhndl); +int hidma_ll_disable(struct hidma_lldev *lldev); +int hidma_ll_enable(struct hidma_lldev *llhndl); void hidma_ll_set_transfer_params(struct hidma_lldev *llhndl, u32 tre_ch, dma_addr_t src, dma_addr_t dest, u32 len, u32 flags); int hidma_ll_setup(struct hidma_lldev *lldev); @@ -157,4 +153,6 @@ int hidma_ll_uninit(struct hidma_lldev *llhndl); irqreturn_t hidma_ll_inthandler(int irq, void *arg); void hidma_cleanup_pending_tre(struct hidma_lldev *llhndl, u8 err_info, u8 err_code); +int hidma_debug_init(struct hidma_dev *dmadev); +void hidma_debug_uninit(struct hidma_dev *dmadev); #endif diff --git a/drivers/dma/qcom/hidma_dbg.c b/drivers/dma/qcom/hidma_dbg.c new file mode 100644 index 000000000000..fa827e5ffd68 --- /dev/null +++ b/drivers/dma/qcom/hidma_dbg.c @@ -0,0 +1,217 @@ +/* + * Qualcomm Technologies HIDMA debug file + * + * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/debugfs.h> +#include <linux/device.h> +#include <linux/list.h> +#include <linux/pm_runtime.h> + +#include "hidma.h" + +static void hidma_ll_chstats(struct seq_file *s, void *llhndl, u32 tre_ch) +{ + struct hidma_lldev *lldev = llhndl; + struct hidma_tre *tre; + u32 length; + dma_addr_t src_start; + dma_addr_t dest_start; + u32 *tre_local; + + if (tre_ch >= lldev->nr_tres) { + dev_err(lldev->dev, "invalid TRE number in chstats:%d", tre_ch); + return; + } + tre = &lldev->trepool[tre_ch]; + seq_printf(s, "------Channel %d -----\n", tre_ch); + seq_printf(s, "allocated=%d\n", atomic_read(&tre->allocated)); + seq_printf(s, "queued = 0x%x\n", tre->queued); + seq_printf(s, "err_info = 0x%x\n", tre->err_info); + seq_printf(s, "err_code = 0x%x\n", tre->err_code); + seq_printf(s, "status = 0x%x\n", tre->status); + seq_printf(s, "idx = 0x%x\n", tre->idx); + seq_printf(s, "dma_sig = 0x%x\n", tre->dma_sig); + seq_printf(s, "dev_name=%s\n", tre->dev_name); + seq_printf(s, "callback=%p\n", tre->callback); + seq_printf(s, "data=%p\n", tre->data); + seq_printf(s, "tre_index = 0x%x\n", tre->tre_index); + + tre_local = &tre->tre_local[0]; + src_start = tre_local[HIDMA_TRE_SRC_LOW_IDX]; + src_start = ((u64) (tre_local[HIDMA_TRE_SRC_HI_IDX]) << 32) + src_start; + dest_start = tre_local[HIDMA_TRE_DEST_LOW_IDX]; + dest_start += ((u64) (tre_local[HIDMA_TRE_DEST_HI_IDX]) << 32); + length = tre_local[HIDMA_TRE_LEN_IDX]; + + seq_printf(s, "src=%pap\n", &src_start); + seq_printf(s, "dest=%pap\n", &dest_start); + seq_printf(s, "length = 0x%x\n", length); +} + +static void hidma_ll_devstats(struct seq_file *s, void *llhndl) +{ + struct hidma_lldev *lldev = llhndl; + + seq_puts(s, "------Device -----\n"); + seq_printf(s, "lldev init = 0x%x\n", lldev->initialized); + seq_printf(s, "trch_state = 0x%x\n", lldev->trch_state); + seq_printf(s, "evch_state = 0x%x\n", lldev->evch_state); + seq_printf(s, "chidx = 0x%x\n", lldev->chidx); + seq_printf(s, "nr_tres = 0x%x\n", lldev->nr_tres); + seq_printf(s, "trca=%p\n", lldev->trca); + seq_printf(s, "tre_ring=%p\n", lldev->tre_ring); + seq_printf(s, "tre_ring_handle=%pap\n", &lldev->tre_dma); + seq_printf(s, "tre_ring_size = 0x%x\n", lldev->tre_ring_size); + seq_printf(s, "tre_processed_off = 0x%x\n", lldev->tre_processed_off); + seq_printf(s, "pending_tre_count=%d\n", lldev->pending_tre_count); + seq_printf(s, "evca=%p\n", lldev->evca); + seq_printf(s, "evre_ring=%p\n", lldev->evre_ring); + seq_printf(s, "evre_ring_handle=%pap\n", &lldev->evre_dma); + seq_printf(s, "evre_ring_size = 0x%x\n", lldev->evre_ring_size); + seq_printf(s, "evre_processed_off = 0x%x\n", lldev->evre_processed_off); + seq_printf(s, "tre_write_offset = 0x%x\n", lldev->tre_write_offset); +} + +/* + * hidma_chan_stats: display HIDMA channel statistics + * + * Display the statistics for the current HIDMA virtual channel device. + */ +static int hidma_chan_stats(struct seq_file *s, void *unused) +{ + struct hidma_chan *mchan = s->private; + struct hidma_desc *mdesc; + struct hidma_dev *dmadev = mchan->dmadev; + + pm_runtime_get_sync(dmadev->ddev.dev); + seq_printf(s, "paused=%u\n", mchan->paused); + seq_printf(s, "dma_sig=%u\n", mchan->dma_sig); + seq_puts(s, "prepared\n"); + list_for_each_entry(mdesc, &mchan->prepared, node) + hidma_ll_chstats(s, mchan->dmadev->lldev, mdesc->tre_ch); + + seq_puts(s, "active\n"); + list_for_each_entry(mdesc, &mchan->active, node) + hidma_ll_chstats(s, mchan->dmadev->lldev, mdesc->tre_ch); + + seq_puts(s, "completed\n"); + list_for_each_entry(mdesc, &mchan->completed, node) + hidma_ll_chstats(s, mchan->dmadev->lldev, mdesc->tre_ch); + + hidma_ll_devstats(s, mchan->dmadev->lldev); + pm_runtime_mark_last_busy(dmadev->ddev.dev); + pm_runtime_put_autosuspend(dmadev->ddev.dev); + return 0; +} + +/* + * hidma_dma_info: display HIDMA device info + * + * Display the info for the current HIDMA device. + */ +static int hidma_dma_info(struct seq_file *s, void *unused) +{ + struct hidma_dev *dmadev = s->private; + resource_size_t sz; + + seq_printf(s, "nr_descriptors=%d\n", dmadev->nr_descriptors); + seq_printf(s, "dev_trca=%p\n", &dmadev->dev_trca); + seq_printf(s, "dev_trca_phys=%pa\n", &dmadev->trca_resource->start); + sz = resource_size(dmadev->trca_resource); + seq_printf(s, "dev_trca_size=%pa\n", &sz); + seq_printf(s, "dev_evca=%p\n", &dmadev->dev_evca); + seq_printf(s, "dev_evca_phys=%pa\n", &dmadev->evca_resource->start); + sz = resource_size(dmadev->evca_resource); + seq_printf(s, "dev_evca_size=%pa\n", &sz); + return 0; +} + +static int hidma_chan_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, hidma_chan_stats, inode->i_private); +} + +static int hidma_dma_info_open(struct inode *inode, struct file *file) +{ + return single_open(file, hidma_dma_info, inode->i_private); +} + +static const struct file_operations hidma_chan_fops = { + .open = hidma_chan_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static const struct file_operations hidma_dma_fops = { + .open = hidma_dma_info_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void hidma_debug_uninit(struct hidma_dev *dmadev) +{ + debugfs_remove_recursive(dmadev->debugfs); + debugfs_remove_recursive(dmadev->stats); +} + +int hidma_debug_init(struct hidma_dev *dmadev) +{ + int rc = 0; + int chidx = 0; + struct list_head *position = NULL; + + dmadev->debugfs = debugfs_create_dir(dev_name(dmadev->ddev.dev), NULL); + if (!dmadev->debugfs) { + rc = -ENODEV; + return rc; + } + + /* walk through the virtual channel list */ + list_for_each(position, &dmadev->ddev.channels) { + struct hidma_chan *chan; + + chan = list_entry(position, struct hidma_chan, + chan.device_node); + sprintf(chan->dbg_name, "chan%d", chidx); + chan->debugfs = debugfs_create_dir(chan->dbg_name, + dmadev->debugfs); + if (!chan->debugfs) { + rc = -ENOMEM; + goto cleanup; + } + chan->stats = debugfs_create_file("stats", S_IRUGO, + chan->debugfs, chan, + &hidma_chan_fops); + if (!chan->stats) { + rc = -ENOMEM; + goto cleanup; + } + chidx++; + } + + dmadev->stats = debugfs_create_file("stats", S_IRUGO, + dmadev->debugfs, dmadev, + &hidma_dma_fops); + if (!dmadev->stats) { + rc = -ENOMEM; + goto cleanup; + } + + return 0; +cleanup: + hidma_debug_uninit(dmadev); + return rc; +} diff --git a/drivers/dma/qcom/hidma_ll.c b/drivers/dma/qcom/hidma_ll.c new file mode 100644 index 000000000000..f3929001539b --- /dev/null +++ b/drivers/dma/qcom/hidma_ll.c @@ -0,0 +1,872 @@ +/* + * Qualcomm Technologies HIDMA DMA engine low level code + * + * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/dmaengine.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/mm.h> +#include <linux/highmem.h> +#include <linux/dma-mapping.h> +#include <linux/delay.h> +#include <linux/atomic.h> +#include <linux/iopoll.h> +#include <linux/kfifo.h> +#include <linux/bitops.h> + +#include "hidma.h" + +#define HIDMA_EVRE_SIZE 16 /* each EVRE is 16 bytes */ + +#define HIDMA_TRCA_CTRLSTS_REG 0x000 +#define HIDMA_TRCA_RING_LOW_REG 0x008 +#define HIDMA_TRCA_RING_HIGH_REG 0x00C +#define HIDMA_TRCA_RING_LEN_REG 0x010 +#define HIDMA_TRCA_DOORBELL_REG 0x400 + +#define HIDMA_EVCA_CTRLSTS_REG 0x000 +#define HIDMA_EVCA_INTCTRL_REG 0x004 +#define HIDMA_EVCA_RING_LOW_REG 0x008 +#define HIDMA_EVCA_RING_HIGH_REG 0x00C +#define HIDMA_EVCA_RING_LEN_REG 0x010 +#define HIDMA_EVCA_WRITE_PTR_REG 0x020 +#define HIDMA_EVCA_DOORBELL_REG 0x400 + +#define HIDMA_EVCA_IRQ_STAT_REG 0x100 +#define HIDMA_EVCA_IRQ_CLR_REG 0x108 +#define HIDMA_EVCA_IRQ_EN_REG 0x110 + +#define HIDMA_EVRE_CFG_IDX 0 + +#define HIDMA_EVRE_ERRINFO_BIT_POS 24 +#define HIDMA_EVRE_CODE_BIT_POS 28 + +#define HIDMA_EVRE_ERRINFO_MASK GENMASK(3, 0) +#define HIDMA_EVRE_CODE_MASK GENMASK(3, 0) + +#define HIDMA_CH_CONTROL_MASK GENMASK(7, 0) +#define HIDMA_CH_STATE_MASK GENMASK(7, 0) +#define HIDMA_CH_STATE_BIT_POS 0x8 + +#define HIDMA_IRQ_EV_CH_EOB_IRQ_BIT_POS 0 +#define HIDMA_IRQ_EV_CH_WR_RESP_BIT_POS 1 +#define HIDMA_IRQ_TR_CH_TRE_RD_RSP_ER_BIT_POS 9 +#define HIDMA_IRQ_TR_CH_DATA_RD_ER_BIT_POS 10 +#define HIDMA_IRQ_TR_CH_DATA_WR_ER_BIT_POS 11 +#define HIDMA_IRQ_TR_CH_INVALID_TRE_BIT_POS 14 + +#define ENABLE_IRQS (BIT(HIDMA_IRQ_EV_CH_EOB_IRQ_BIT_POS) | \ + BIT(HIDMA_IRQ_EV_CH_WR_RESP_BIT_POS) | \ + BIT(HIDMA_IRQ_TR_CH_TRE_RD_RSP_ER_BIT_POS) | \ + BIT(HIDMA_IRQ_TR_CH_DATA_RD_ER_BIT_POS) | \ + BIT(HIDMA_IRQ_TR_CH_DATA_WR_ER_BIT_POS) | \ + BIT(HIDMA_IRQ_TR_CH_INVALID_TRE_BIT_POS)) + +#define HIDMA_INCREMENT_ITERATOR(iter, size, ring_size) \ +do { \ + iter += size; \ + if (iter >= ring_size) \ + iter -= ring_size; \ +} while (0) + +#define HIDMA_CH_STATE(val) \ + ((val >> HIDMA_CH_STATE_BIT_POS) & HIDMA_CH_STATE_MASK) + +#define HIDMA_ERR_INT_MASK \ + (BIT(HIDMA_IRQ_TR_CH_INVALID_TRE_BIT_POS) | \ + BIT(HIDMA_IRQ_TR_CH_TRE_RD_RSP_ER_BIT_POS) | \ + BIT(HIDMA_IRQ_EV_CH_WR_RESP_BIT_POS) | \ + BIT(HIDMA_IRQ_TR_CH_DATA_RD_ER_BIT_POS) | \ + BIT(HIDMA_IRQ_TR_CH_DATA_WR_ER_BIT_POS)) + +enum ch_command { + HIDMA_CH_DISABLE = 0, + HIDMA_CH_ENABLE = 1, + HIDMA_CH_SUSPEND = 2, + HIDMA_CH_RESET = 9, +}; + +enum ch_state { + HIDMA_CH_DISABLED = 0, + HIDMA_CH_ENABLED = 1, + HIDMA_CH_RUNNING = 2, + HIDMA_CH_SUSPENDED = 3, + HIDMA_CH_STOPPED = 4, +}; + +enum tre_type { + HIDMA_TRE_MEMCPY = 3, +}; + +enum err_code { + HIDMA_EVRE_STATUS_COMPLETE = 1, + HIDMA_EVRE_STATUS_ERROR = 4, +}; + +static int hidma_is_chan_enabled(int state) +{ + switch (state) { + case HIDMA_CH_ENABLED: + case HIDMA_CH_RUNNING: + return true; + default: + return false; + } +} + +void hidma_ll_free(struct hidma_lldev *lldev, u32 tre_ch) +{ + struct hidma_tre *tre; + + if (tre_ch >= lldev->nr_tres) { + dev_err(lldev->dev, "invalid TRE number in free:%d", tre_ch); + return; + } + + tre = &lldev->trepool[tre_ch]; + if (atomic_read(&tre->allocated) != true) { + dev_err(lldev->dev, "trying to free an unused TRE:%d", tre_ch); + return; + } + + atomic_set(&tre->allocated, 0); +} + +int hidma_ll_request(struct hidma_lldev *lldev, u32 sig, const char *dev_name, + void (*callback)(void *data), void *data, u32 *tre_ch) +{ + unsigned int i; + struct hidma_tre *tre; + u32 *tre_local; + + if (!tre_ch || !lldev) + return -EINVAL; + + /* need to have at least one empty spot in the queue */ + for (i = 0; i < lldev->nr_tres - 1; i++) { + if (atomic_add_unless(&lldev->trepool[i].allocated, 1, 1)) + break; + } + + if (i == (lldev->nr_tres - 1)) + return -ENOMEM; + + tre = &lldev->trepool[i]; + tre->dma_sig = sig; + tre->dev_name = dev_name; + tre->callback = callback; + tre->data = data; + tre->idx = i; + tre->status = 0; + tre->queued = 0; + tre->err_code = 0; + tre->err_info = 0; + tre->lldev = lldev; + tre_local = &tre->tre_local[0]; + tre_local[HIDMA_TRE_CFG_IDX] = HIDMA_TRE_MEMCPY; + tre_local[HIDMA_TRE_CFG_IDX] |= (lldev->chidx & 0xFF) << 8; + tre_local[HIDMA_TRE_CFG_IDX] |= BIT(16); /* set IEOB */ + *tre_ch = i; + if (callback) + callback(data); + return 0; +} + +/* + * Multiple TREs may be queued and waiting in the pending queue. + */ +static void hidma_ll_tre_complete(unsigned long arg) +{ + struct hidma_lldev *lldev = (struct hidma_lldev *)arg; + struct hidma_tre *tre; + + while (kfifo_out(&lldev->handoff_fifo, &tre, 1)) { + /* call the user if it has been read by the hardware */ + if (tre->callback) + tre->callback(tre->data); + } +} + +static int hidma_post_completed(struct hidma_lldev *lldev, int tre_iterator, + u8 err_info, u8 err_code) +{ + struct hidma_tre *tre; + unsigned long flags; + + spin_lock_irqsave(&lldev->lock, flags); + tre = lldev->pending_tre_list[tre_iterator / HIDMA_TRE_SIZE]; + if (!tre) { + spin_unlock_irqrestore(&lldev->lock, flags); + dev_warn(lldev->dev, "tre_index [%d] and tre out of sync\n", + tre_iterator / HIDMA_TRE_SIZE); + return -EINVAL; + } + lldev->pending_tre_list[tre->tre_index] = NULL; + + /* + * Keep track of pending TREs that SW is expecting to receive + * from HW. We got one now. Decrement our counter. + */ + lldev->pending_tre_count--; + if (lldev->pending_tre_count < 0) { + dev_warn(lldev->dev, "tre count mismatch on completion"); + lldev->pending_tre_count = 0; + } + + spin_unlock_irqrestore(&lldev->lock, flags); + + tre->err_info = err_info; + tre->err_code = err_code; + tre->queued = 0; + + kfifo_put(&lldev->handoff_fifo, tre); + tasklet_schedule(&lldev->task); + + return 0; +} + +/* + * Called to handle the interrupt for the channel. + * Return a positive number if TRE or EVRE were consumed on this run. + * Return a positive number if there are pending TREs or EVREs. + * Return 0 if there is nothing to consume or no pending TREs/EVREs found. + */ +static int hidma_handle_tre_completion(struct hidma_lldev *lldev) +{ + u32 evre_ring_size = lldev->evre_ring_size; + u32 tre_ring_size = lldev->tre_ring_size; + u32 err_info, err_code, evre_write_off; + u32 tre_iterator, evre_iterator; + u32 num_completed = 0; + + evre_write_off = readl_relaxed(lldev->evca + HIDMA_EVCA_WRITE_PTR_REG); + tre_iterator = lldev->tre_processed_off; + evre_iterator = lldev->evre_processed_off; + + if ((evre_write_off > evre_ring_size) || + (evre_write_off % HIDMA_EVRE_SIZE)) { + dev_err(lldev->dev, "HW reports invalid EVRE write offset\n"); + return 0; + } + + /* + * By the time control reaches here the number of EVREs and TREs + * may not match. Only consume the ones that hardware told us. + */ + while ((evre_iterator != evre_write_off)) { + u32 *current_evre = lldev->evre_ring + evre_iterator; + u32 cfg; + + cfg = current_evre[HIDMA_EVRE_CFG_IDX]; + err_info = cfg >> HIDMA_EVRE_ERRINFO_BIT_POS; + err_info &= HIDMA_EVRE_ERRINFO_MASK; + err_code = + (cfg >> HIDMA_EVRE_CODE_BIT_POS) & HIDMA_EVRE_CODE_MASK; + + if (hidma_post_completed(lldev, tre_iterator, err_info, + err_code)) + break; + + HIDMA_INCREMENT_ITERATOR(tre_iterator, HIDMA_TRE_SIZE, + tre_ring_size); + HIDMA_INCREMENT_ITERATOR(evre_iterator, HIDMA_EVRE_SIZE, + evre_ring_size); + + /* + * Read the new event descriptor written by the HW. + * As we are processing the delivered events, other events + * get queued to the SW for processing. + */ + evre_write_off = + readl_relaxed(lldev->evca + HIDMA_EVCA_WRITE_PTR_REG); + num_completed++; + } + + if (num_completed) { + u32 evre_read_off = (lldev->evre_processed_off + + HIDMA_EVRE_SIZE * num_completed); + u32 tre_read_off = (lldev->tre_processed_off + + HIDMA_TRE_SIZE * num_completed); + + evre_read_off = evre_read_off % evre_ring_size; + tre_read_off = tre_read_off % tre_ring_size; + + writel(evre_read_off, lldev->evca + HIDMA_EVCA_DOORBELL_REG); + + /* record the last processed tre offset */ + lldev->tre_processed_off = tre_read_off; + lldev->evre_processed_off = evre_read_off; + } + + return num_completed; +} + +void hidma_cleanup_pending_tre(struct hidma_lldev *lldev, u8 err_info, + u8 err_code) +{ + u32 tre_iterator; + u32 tre_ring_size = lldev->tre_ring_size; + int num_completed = 0; + u32 tre_read_off; + + tre_iterator = lldev->tre_processed_off; + while (lldev->pending_tre_count) { + if (hidma_post_completed(lldev, tre_iterator, err_info, + err_code)) + break; + HIDMA_INCREMENT_ITERATOR(tre_iterator, HIDMA_TRE_SIZE, + tre_ring_size); + num_completed++; + } + tre_read_off = (lldev->tre_processed_off + + HIDMA_TRE_SIZE * num_completed); + + tre_read_off = tre_read_off % tre_ring_size; + + /* record the last processed tre offset */ + lldev->tre_processed_off = tre_read_off; +} + +static int hidma_ll_reset(struct hidma_lldev *lldev) +{ + u32 val; + int ret; + + val = readl(lldev->trca + HIDMA_TRCA_CTRLSTS_REG); + val &= ~(HIDMA_CH_CONTROL_MASK << 16); + val |= HIDMA_CH_RESET << 16; + writel(val, lldev->trca + HIDMA_TRCA_CTRLSTS_REG); + + /* + * Delay 10ms after reset to allow DMA logic to quiesce. + * Do a polled read up to 1ms and 10ms maximum. + */ + ret = readl_poll_timeout(lldev->trca + HIDMA_TRCA_CTRLSTS_REG, val, + HIDMA_CH_STATE(val) == HIDMA_CH_DISABLED, + 1000, 10000); + if (ret) { + dev_err(lldev->dev, "transfer channel did not reset\n"); + return ret; + } + + val = readl(lldev->evca + HIDMA_EVCA_CTRLSTS_REG); + val &= ~(HIDMA_CH_CONTROL_MASK << 16); + val |= HIDMA_CH_RESET << 16; + writel(val, lldev->evca + HIDMA_EVCA_CTRLSTS_REG); + + /* + * Delay 10ms after reset to allow DMA logic to quiesce. + * Do a polled read up to 1ms and 10ms maximum. + */ + ret = readl_poll_timeout(lldev->evca + HIDMA_EVCA_CTRLSTS_REG, val, + HIDMA_CH_STATE(val) == HIDMA_CH_DISABLED, + 1000, 10000); + if (ret) + return ret; + + lldev->trch_state = HIDMA_CH_DISABLED; + lldev->evch_state = HIDMA_CH_DISABLED; + return 0; +} + +/* + * Abort all transactions and perform a reset. + */ +static void hidma_ll_abort(unsigned long arg) +{ + struct hidma_lldev *lldev = (struct hidma_lldev *)arg; + u8 err_code = HIDMA_EVRE_STATUS_ERROR; + u8 err_info = 0xFF; + int rc; + + hidma_cleanup_pending_tre(lldev, err_info, err_code); + + /* reset the channel for recovery */ + rc = hidma_ll_setup(lldev); + if (rc) { + dev_err(lldev->dev, "channel reinitialize failed after error\n"); + return; + } + writel(ENABLE_IRQS, lldev->evca + HIDMA_EVCA_IRQ_EN_REG); +} + +/* + * The interrupt handler for HIDMA will try to consume as many pending + * EVRE from the event queue as possible. Each EVRE has an associated + * TRE that holds the user interface parameters. EVRE reports the + * result of the transaction. Hardware guarantees ordering between EVREs + * and TREs. We use last processed offset to figure out which TRE is + * associated with which EVRE. If two TREs are consumed by HW, the EVREs + * are in order in the event ring. + * + * This handler will do a one pass for consuming EVREs. Other EVREs may + * be delivered while we are working. It will try to consume incoming + * EVREs one more time and return. + * + * For unprocessed EVREs, hardware will trigger another interrupt until + * all the interrupt bits are cleared. + * + * Hardware guarantees that by the time interrupt is observed, all data + * transactions in flight are delivered to their respective places and + * are visible to the CPU. + * + * On demand paging for IOMMU is only supported for PCIe via PRI + * (Page Request Interface) not for HIDMA. All other hardware instances + * including HIDMA work on pinned DMA addresses. + * + * HIDMA is not aware of IOMMU presence since it follows the DMA API. All + * IOMMU latency will be built into the data movement time. By the time + * interrupt happens, IOMMU lookups + data movement has already taken place. + * + * While the first read in a typical PCI endpoint ISR flushes all outstanding + * requests traditionally to the destination, this concept does not apply + * here for this HW. + */ +irqreturn_t hidma_ll_inthandler(int chirq, void *arg) +{ + struct hidma_lldev *lldev = arg; + u32 status; + u32 enable; + u32 cause; + + /* + * Fine tuned for this HW... + * + * This ISR has been designed for this particular hardware. Relaxed + * read and write accessors are used for performance reasons due to + * interrupt delivery guarantees. Do not copy this code blindly and + * expect that to work. + */ + status = readl_relaxed(lldev->evca + HIDMA_EVCA_IRQ_STAT_REG); + enable = readl_relaxed(lldev->evca + HIDMA_EVCA_IRQ_EN_REG); + cause = status & enable; + + while (cause) { + if (cause & HIDMA_ERR_INT_MASK) { + dev_err(lldev->dev, "error 0x%x, resetting...\n", + cause); + + /* Clear out pending interrupts */ + writel(cause, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG); + + tasklet_schedule(&lldev->rst_task); + goto out; + } + + /* + * Try to consume as many EVREs as possible. + */ + hidma_handle_tre_completion(lldev); + + /* We consumed TREs or there are pending TREs or EVREs. */ + writel_relaxed(cause, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG); + + /* + * Another interrupt might have arrived while we are + * processing this one. Read the new cause. + */ + status = readl_relaxed(lldev->evca + HIDMA_EVCA_IRQ_STAT_REG); + enable = readl_relaxed(lldev->evca + HIDMA_EVCA_IRQ_EN_REG); + cause = status & enable; + } + +out: + return IRQ_HANDLED; +} + +int hidma_ll_enable(struct hidma_lldev *lldev) +{ + u32 val; + int ret; + + val = readl(lldev->evca + HIDMA_EVCA_CTRLSTS_REG); + val &= ~(HIDMA_CH_CONTROL_MASK << 16); + val |= HIDMA_CH_ENABLE << 16; + writel(val, lldev->evca + HIDMA_EVCA_CTRLSTS_REG); + + ret = readl_poll_timeout(lldev->evca + HIDMA_EVCA_CTRLSTS_REG, val, + hidma_is_chan_enabled(HIDMA_CH_STATE(val)), + 1000, 10000); + if (ret) { + dev_err(lldev->dev, "event channel did not get enabled\n"); + return ret; + } + + val = readl(lldev->trca + HIDMA_TRCA_CTRLSTS_REG); + val &= ~(HIDMA_CH_CONTROL_MASK << 16); + val |= HIDMA_CH_ENABLE << 16; + writel(val, lldev->trca + HIDMA_TRCA_CTRLSTS_REG); + + ret = readl_poll_timeout(lldev->trca + HIDMA_TRCA_CTRLSTS_REG, val, + hidma_is_chan_enabled(HIDMA_CH_STATE(val)), + 1000, 10000); + if (ret) { + dev_err(lldev->dev, "transfer channel did not get enabled\n"); + return ret; + } + + lldev->trch_state = HIDMA_CH_ENABLED; + lldev->evch_state = HIDMA_CH_ENABLED; + + return 0; +} + +void hidma_ll_start(struct hidma_lldev *lldev) +{ + unsigned long irqflags; + + spin_lock_irqsave(&lldev->lock, irqflags); + writel(lldev->tre_write_offset, lldev->trca + HIDMA_TRCA_DOORBELL_REG); + spin_unlock_irqrestore(&lldev->lock, irqflags); +} + +bool hidma_ll_isenabled(struct hidma_lldev *lldev) +{ + u32 val; + + val = readl(lldev->trca + HIDMA_TRCA_CTRLSTS_REG); + lldev->trch_state = HIDMA_CH_STATE(val); + val = readl(lldev->evca + HIDMA_EVCA_CTRLSTS_REG); + lldev->evch_state = HIDMA_CH_STATE(val); + + /* both channels have to be enabled before calling this function */ + if (hidma_is_chan_enabled(lldev->trch_state) && + hidma_is_chan_enabled(lldev->evch_state)) + return true; + + return false; +} + +void hidma_ll_queue_request(struct hidma_lldev *lldev, u32 tre_ch) +{ + struct hidma_tre *tre; + unsigned long flags; + + tre = &lldev->trepool[tre_ch]; + + /* copy the TRE into its location in the TRE ring */ + spin_lock_irqsave(&lldev->lock, flags); + tre->tre_index = lldev->tre_write_offset / HIDMA_TRE_SIZE; + lldev->pending_tre_list[tre->tre_index] = tre; + memcpy(lldev->tre_ring + lldev->tre_write_offset, + &tre->tre_local[0], HIDMA_TRE_SIZE); + tre->err_code = 0; + tre->err_info = 0; + tre->queued = 1; + lldev->pending_tre_count++; + lldev->tre_write_offset = (lldev->tre_write_offset + HIDMA_TRE_SIZE) + % lldev->tre_ring_size; + spin_unlock_irqrestore(&lldev->lock, flags); +} + +/* + * Note that even though we stop this channel if there is a pending transaction + * in flight it will complete and follow the callback. This request will + * prevent further requests to be made. + */ +int hidma_ll_disable(struct hidma_lldev *lldev) +{ + u32 val; + int ret; + + val = readl(lldev->evca + HIDMA_EVCA_CTRLSTS_REG); + lldev->evch_state = HIDMA_CH_STATE(val); + val = readl(lldev->trca + HIDMA_TRCA_CTRLSTS_REG); + lldev->trch_state = HIDMA_CH_STATE(val); + + /* already suspended by this OS */ + if ((lldev->trch_state == HIDMA_CH_SUSPENDED) || + (lldev->evch_state == HIDMA_CH_SUSPENDED)) + return 0; + + /* already stopped by the manager */ + if ((lldev->trch_state == HIDMA_CH_STOPPED) || + (lldev->evch_state == HIDMA_CH_STOPPED)) + return 0; + + val = readl(lldev->trca + HIDMA_TRCA_CTRLSTS_REG); + val &= ~(HIDMA_CH_CONTROL_MASK << 16); + val |= HIDMA_CH_SUSPEND << 16; + writel(val, lldev->trca + HIDMA_TRCA_CTRLSTS_REG); + + /* + * Start the wait right after the suspend is confirmed. + * Do a polled read up to 1ms and 10ms maximum. + */ + ret = readl_poll_timeout(lldev->trca + HIDMA_TRCA_CTRLSTS_REG, val, + HIDMA_CH_STATE(val) == HIDMA_CH_SUSPENDED, + 1000, 10000); + if (ret) + return ret; + + val = readl(lldev->evca + HIDMA_EVCA_CTRLSTS_REG); + val &= ~(HIDMA_CH_CONTROL_MASK << 16); + val |= HIDMA_CH_SUSPEND << 16; + writel(val, lldev->evca + HIDMA_EVCA_CTRLSTS_REG); + + /* + * Start the wait right after the suspend is confirmed + * Delay up to 10ms after reset to allow DMA logic to quiesce. + */ + ret = readl_poll_timeout(lldev->evca + HIDMA_EVCA_CTRLSTS_REG, val, + HIDMA_CH_STATE(val) == HIDMA_CH_SUSPENDED, + 1000, 10000); + if (ret) + return ret; + + lldev->trch_state = HIDMA_CH_SUSPENDED; + lldev->evch_state = HIDMA_CH_SUSPENDED; + return 0; +} + +void hidma_ll_set_transfer_params(struct hidma_lldev *lldev, u32 tre_ch, + dma_addr_t src, dma_addr_t dest, u32 len, + u32 flags) +{ + struct hidma_tre *tre; + u32 *tre_local; + + if (tre_ch >= lldev->nr_tres) { + dev_err(lldev->dev, "invalid TRE number in transfer params:%d", + tre_ch); + return; + } + + tre = &lldev->trepool[tre_ch]; + if (atomic_read(&tre->allocated) != true) { + dev_err(lldev->dev, "trying to set params on an unused TRE:%d", + tre_ch); + return; + } + + tre_local = &tre->tre_local[0]; + tre_local[HIDMA_TRE_LEN_IDX] = len; + tre_local[HIDMA_TRE_SRC_LOW_IDX] = lower_32_bits(src); + tre_local[HIDMA_TRE_SRC_HI_IDX] = upper_32_bits(src); + tre_local[HIDMA_TRE_DEST_LOW_IDX] = lower_32_bits(dest); + tre_local[HIDMA_TRE_DEST_HI_IDX] = upper_32_bits(dest); + tre->int_flags = flags; +} + +/* + * Called during initialization and after an error condition + * to restore hardware state. + */ +int hidma_ll_setup(struct hidma_lldev *lldev) +{ + int rc; + u64 addr; + u32 val; + u32 nr_tres = lldev->nr_tres; + + lldev->pending_tre_count = 0; + lldev->tre_processed_off = 0; + lldev->evre_processed_off = 0; + lldev->tre_write_offset = 0; + + /* disable interrupts */ + writel(0, lldev->evca + HIDMA_EVCA_IRQ_EN_REG); + + /* clear all pending interrupts */ + val = readl(lldev->evca + HIDMA_EVCA_IRQ_STAT_REG); + writel(val, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG); + + rc = hidma_ll_reset(lldev); + if (rc) + return rc; + + /* + * Clear all pending interrupts again. + * Otherwise, we observe reset complete interrupts. + */ + val = readl(lldev->evca + HIDMA_EVCA_IRQ_STAT_REG); + writel(val, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG); + + /* disable interrupts again after reset */ + writel(0, lldev->evca + HIDMA_EVCA_IRQ_EN_REG); + + addr = lldev->tre_dma; + writel(lower_32_bits(addr), lldev->trca + HIDMA_TRCA_RING_LOW_REG); + writel(upper_32_bits(addr), lldev->trca + HIDMA_TRCA_RING_HIGH_REG); + writel(lldev->tre_ring_size, lldev->trca + HIDMA_TRCA_RING_LEN_REG); + + addr = lldev->evre_dma; + writel(lower_32_bits(addr), lldev->evca + HIDMA_EVCA_RING_LOW_REG); + writel(upper_32_bits(addr), lldev->evca + HIDMA_EVCA_RING_HIGH_REG); + writel(HIDMA_EVRE_SIZE * nr_tres, + lldev->evca + HIDMA_EVCA_RING_LEN_REG); + + /* support IRQ only for now */ + val = readl(lldev->evca + HIDMA_EVCA_INTCTRL_REG); + val &= ~0xF; + val |= 0x1; + writel(val, lldev->evca + HIDMA_EVCA_INTCTRL_REG); + + /* clear all pending interrupts and enable them */ + writel(ENABLE_IRQS, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG); + writel(ENABLE_IRQS, lldev->evca + HIDMA_EVCA_IRQ_EN_REG); + + return hidma_ll_enable(lldev); +} + +struct hidma_lldev *hidma_ll_init(struct device *dev, u32 nr_tres, + void __iomem *trca, void __iomem *evca, + u8 chidx) +{ + u32 required_bytes; + struct hidma_lldev *lldev; + int rc; + size_t sz; + + if (!trca || !evca || !dev || !nr_tres) + return NULL; + + /* need at least four TREs */ + if (nr_tres < 4) + return NULL; + + /* need an extra space */ + nr_tres += 1; + + lldev = devm_kzalloc(dev, sizeof(struct hidma_lldev), GFP_KERNEL); + if (!lldev) + return NULL; + + lldev->evca = evca; + lldev->trca = trca; + lldev->dev = dev; + sz = sizeof(struct hidma_tre); + lldev->trepool = devm_kcalloc(lldev->dev, nr_tres, sz, GFP_KERNEL); + if (!lldev->trepool) + return NULL; + + required_bytes = sizeof(lldev->pending_tre_list[0]); + lldev->pending_tre_list = devm_kcalloc(dev, nr_tres, required_bytes, + GFP_KERNEL); + if (!lldev->pending_tre_list) + return NULL; + + sz = (HIDMA_TRE_SIZE + 1) * nr_tres; + lldev->tre_ring = dmam_alloc_coherent(dev, sz, &lldev->tre_dma, + GFP_KERNEL); + if (!lldev->tre_ring) + return NULL; + + memset(lldev->tre_ring, 0, (HIDMA_TRE_SIZE + 1) * nr_tres); + lldev->tre_ring_size = HIDMA_TRE_SIZE * nr_tres; + lldev->nr_tres = nr_tres; + + /* the TRE ring has to be TRE_SIZE aligned */ + if (!IS_ALIGNED(lldev->tre_dma, HIDMA_TRE_SIZE)) { + u8 tre_ring_shift; + + tre_ring_shift = lldev->tre_dma % HIDMA_TRE_SIZE; + tre_ring_shift = HIDMA_TRE_SIZE - tre_ring_shift; + lldev->tre_dma += tre_ring_shift; + lldev->tre_ring += tre_ring_shift; + } + + sz = (HIDMA_EVRE_SIZE + 1) * nr_tres; + lldev->evre_ring = dmam_alloc_coherent(dev, sz, &lldev->evre_dma, + GFP_KERNEL); + if (!lldev->evre_ring) + return NULL; + + memset(lldev->evre_ring, 0, (HIDMA_EVRE_SIZE + 1) * nr_tres); + lldev->evre_ring_size = HIDMA_EVRE_SIZE * nr_tres; + + /* the EVRE ring has to be EVRE_SIZE aligned */ + if (!IS_ALIGNED(lldev->evre_dma, HIDMA_EVRE_SIZE)) { + u8 evre_ring_shift; + + evre_ring_shift = lldev->evre_dma % HIDMA_EVRE_SIZE; + evre_ring_shift = HIDMA_EVRE_SIZE - evre_ring_shift; + lldev->evre_dma += evre_ring_shift; + lldev->evre_ring += evre_ring_shift; + } + lldev->nr_tres = nr_tres; + lldev->chidx = chidx; + + sz = nr_tres * sizeof(struct hidma_tre *); + rc = kfifo_alloc(&lldev->handoff_fifo, sz, GFP_KERNEL); + if (rc) + return NULL; + + rc = hidma_ll_setup(lldev); + if (rc) + return NULL; + + spin_lock_init(&lldev->lock); + tasklet_init(&lldev->rst_task, hidma_ll_abort, (unsigned long)lldev); + tasklet_init(&lldev->task, hidma_ll_tre_complete, (unsigned long)lldev); + lldev->initialized = 1; + writel(ENABLE_IRQS, lldev->evca + HIDMA_EVCA_IRQ_EN_REG); + return lldev; +} + +int hidma_ll_uninit(struct hidma_lldev *lldev) +{ + u32 required_bytes; + int rc = 0; + u32 val; + + if (!lldev) + return -ENODEV; + + if (!lldev->initialized) + return 0; + + lldev->initialized = 0; + + required_bytes = sizeof(struct hidma_tre) * lldev->nr_tres; + tasklet_kill(&lldev->task); + memset(lldev->trepool, 0, required_bytes); + lldev->trepool = NULL; + lldev->pending_tre_count = 0; + lldev->tre_write_offset = 0; + + rc = hidma_ll_reset(lldev); + + /* + * Clear all pending interrupts again. + * Otherwise, we observe reset complete interrupts. + */ + val = readl(lldev->evca + HIDMA_EVCA_IRQ_STAT_REG); + writel(val, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG); + writel(0, lldev->evca + HIDMA_EVCA_IRQ_EN_REG); + return rc; +} + +enum dma_status hidma_ll_status(struct hidma_lldev *lldev, u32 tre_ch) +{ + enum dma_status ret = DMA_ERROR; + struct hidma_tre *tre; + unsigned long flags; + u8 err_code; + + spin_lock_irqsave(&lldev->lock, flags); + + tre = &lldev->trepool[tre_ch]; + err_code = tre->err_code; + + if (err_code & HIDMA_EVRE_STATUS_COMPLETE) + ret = DMA_COMPLETE; + else if (err_code & HIDMA_EVRE_STATUS_ERROR) + ret = DMA_ERROR; + else + ret = DMA_IN_PROGRESS; + spin_unlock_irqrestore(&lldev->lock, flags); + + return ret; +} diff --git a/drivers/dma/qcom/hidma_mgmt.c b/drivers/dma/qcom/hidma_mgmt.c index ef491b893f40..c0e365321310 100644 --- a/drivers/dma/qcom/hidma_mgmt.c +++ b/drivers/dma/qcom/hidma_mgmt.c @@ -1,7 +1,7 @@ /* * Qualcomm Technologies HIDMA DMA engine Management interface * - * Copyright (c) 2015, The Linux Foundation. All rights reserved. + * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -17,13 +17,14 @@ #include <linux/acpi.h> #include <linux/of.h> #include <linux/property.h> -#include <linux/interrupt.h> -#include <linux/platform_device.h> +#include <linux/of_irq.h> +#include <linux/of_platform.h> #include <linux/module.h> #include <linux/uaccess.h> #include <linux/slab.h> #include <linux/pm_runtime.h> #include <linux/bitops.h> +#include <linux/dma-mapping.h> #include "hidma_mgmt.h" @@ -298,5 +299,109 @@ static struct platform_driver hidma_mgmt_driver = { }, }; -module_platform_driver(hidma_mgmt_driver); +#if defined(CONFIG_OF) && defined(CONFIG_OF_IRQ) +static int object_counter; + +static int __init hidma_mgmt_of_populate_channels(struct device_node *np) +{ + struct platform_device *pdev_parent = of_find_device_by_node(np); + struct platform_device_info pdevinfo; + struct of_phandle_args out_irq; + struct device_node *child; + struct resource *res; + const __be32 *cell; + int ret = 0, size, i, num; + u64 addr, addr_size; + + for_each_available_child_of_node(np, child) { + struct resource *res_iter; + struct platform_device *new_pdev; + + cell = of_get_property(child, "reg", &size); + if (!cell) { + ret = -EINVAL; + goto out; + } + + size /= sizeof(*cell); + num = size / + (of_n_addr_cells(child) + of_n_size_cells(child)) + 1; + + /* allocate a resource array */ + res = kcalloc(num, sizeof(*res), GFP_KERNEL); + if (!res) { + ret = -ENOMEM; + goto out; + } + + /* read each reg value */ + i = 0; + res_iter = res; + while (i < size) { + addr = of_read_number(&cell[i], + of_n_addr_cells(child)); + i += of_n_addr_cells(child); + + addr_size = of_read_number(&cell[i], + of_n_size_cells(child)); + i += of_n_size_cells(child); + + res_iter->start = addr; + res_iter->end = res_iter->start + addr_size - 1; + res_iter->flags = IORESOURCE_MEM; + res_iter++; + } + + ret = of_irq_parse_one(child, 0, &out_irq); + if (ret) + goto out; + + res_iter->start = irq_create_of_mapping(&out_irq); + res_iter->name = "hidma event irq"; + res_iter->flags = IORESOURCE_IRQ; + + memset(&pdevinfo, 0, sizeof(pdevinfo)); + pdevinfo.fwnode = &child->fwnode; + pdevinfo.parent = pdev_parent ? &pdev_parent->dev : NULL; + pdevinfo.name = child->name; + pdevinfo.id = object_counter++; + pdevinfo.res = res; + pdevinfo.num_res = num; + pdevinfo.data = NULL; + pdevinfo.size_data = 0; + pdevinfo.dma_mask = DMA_BIT_MASK(64); + new_pdev = platform_device_register_full(&pdevinfo); + if (!new_pdev) { + ret = -ENODEV; + goto out; + } + of_dma_configure(&new_pdev->dev, child); + + kfree(res); + res = NULL; + } +out: + kfree(res); + + return ret; +} +#endif + +static int __init hidma_mgmt_init(void) +{ +#if defined(CONFIG_OF) && defined(CONFIG_OF_IRQ) + struct device_node *child; + + for (child = of_find_matching_node(NULL, hidma_mgmt_match); child; + child = of_find_matching_node(child, hidma_mgmt_match)) { + /* device tree based firmware here */ + hidma_mgmt_of_populate_channels(child); + of_node_put(child); + } +#endif + platform_driver_register(&hidma_mgmt_driver); + + return 0; +} +module_init(hidma_mgmt_init); MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/sun6i-dma.c b/drivers/dma/sun6i-dma.c index 2db12e493c53..5065ca43face 100644 --- a/drivers/dma/sun6i-dma.c +++ b/drivers/dma/sun6i-dma.c @@ -146,6 +146,8 @@ struct sun6i_vchan { struct dma_slave_config cfg; struct sun6i_pchan *phy; u8 port; + u8 irq_type; + bool cyclic; }; struct sun6i_dma_dev { @@ -254,6 +256,30 @@ static inline s8 convert_buswidth(enum dma_slave_buswidth addr_width) return addr_width >> 1; } +static size_t sun6i_get_chan_size(struct sun6i_pchan *pchan) +{ + struct sun6i_desc *txd = pchan->desc; + struct sun6i_dma_lli *lli; + size_t bytes; + dma_addr_t pos; + + pos = readl(pchan->base + DMA_CHAN_LLI_ADDR); + bytes = readl(pchan->base + DMA_CHAN_CUR_CNT); + + if (pos == LLI_LAST_ITEM) + return bytes; + + for (lli = txd->v_lli; lli; lli = lli->v_lli_next) { + if (lli->p_lli_next == pos) { + for (lli = lli->v_lli_next; lli; lli = lli->v_lli_next) + bytes += lli->len; + break; + } + } + + return bytes; +} + static void *sun6i_dma_lli_add(struct sun6i_dma_lli *prev, struct sun6i_dma_lli *next, dma_addr_t next_phy, @@ -276,45 +302,6 @@ static void *sun6i_dma_lli_add(struct sun6i_dma_lli *prev, return next; } -static inline int sun6i_dma_cfg_lli(struct sun6i_dma_lli *lli, - dma_addr_t src, - dma_addr_t dst, u32 len, - struct dma_slave_config *config) -{ - u8 src_width, dst_width, src_burst, dst_burst; - - if (!config) - return -EINVAL; - - src_burst = convert_burst(config->src_maxburst); - if (src_burst) - return src_burst; - - dst_burst = convert_burst(config->dst_maxburst); - if (dst_burst) - return dst_burst; - - src_width = convert_buswidth(config->src_addr_width); - if (src_width) - return src_width; - - dst_width = convert_buswidth(config->dst_addr_width); - if (dst_width) - return dst_width; - - lli->cfg = DMA_CHAN_CFG_SRC_BURST(src_burst) | - DMA_CHAN_CFG_SRC_WIDTH(src_width) | - DMA_CHAN_CFG_DST_BURST(dst_burst) | - DMA_CHAN_CFG_DST_WIDTH(dst_width); - - lli->src = src; - lli->dst = dst; - lli->len = len; - lli->para = NORMAL_WAIT; - - return 0; -} - static inline void sun6i_dma_dump_lli(struct sun6i_vchan *vchan, struct sun6i_dma_lli *lli) { @@ -381,9 +368,13 @@ static int sun6i_dma_start_desc(struct sun6i_vchan *vchan) irq_reg = pchan->idx / DMA_IRQ_CHAN_NR; irq_offset = pchan->idx % DMA_IRQ_CHAN_NR; - irq_val = readl(sdev->base + DMA_IRQ_EN(irq_offset)); - irq_val |= DMA_IRQ_QUEUE << (irq_offset * DMA_IRQ_CHAN_WIDTH); - writel(irq_val, sdev->base + DMA_IRQ_EN(irq_offset)); + vchan->irq_type = vchan->cyclic ? DMA_IRQ_PKG : DMA_IRQ_QUEUE; + + irq_val = readl(sdev->base + DMA_IRQ_EN(irq_reg)); + irq_val &= ~((DMA_IRQ_HALF | DMA_IRQ_PKG | DMA_IRQ_QUEUE) << + (irq_offset * DMA_IRQ_CHAN_WIDTH)); + irq_val |= vchan->irq_type << (irq_offset * DMA_IRQ_CHAN_WIDTH); + writel(irq_val, sdev->base + DMA_IRQ_EN(irq_reg)); writel(pchan->desc->p_lli, pchan->base + DMA_CHAN_LLI_ADDR); writel(DMA_CHAN_ENABLE_START, pchan->base + DMA_CHAN_ENABLE); @@ -479,11 +470,12 @@ static irqreturn_t sun6i_dma_interrupt(int irq, void *dev_id) writel(status, sdev->base + DMA_IRQ_STAT(i)); for (j = 0; (j < DMA_IRQ_CHAN_NR) && status; j++) { - if (status & DMA_IRQ_QUEUE) { - pchan = sdev->pchans + j; - vchan = pchan->vchan; - - if (vchan) { + pchan = sdev->pchans + j; + vchan = pchan->vchan; + if (vchan && (status & vchan->irq_type)) { + if (vchan->cyclic) { + vchan_cyclic_callback(&pchan->desc->vd); + } else { spin_lock(&vchan->vc.lock); vchan_cookie_complete(&pchan->desc->vd); pchan->done = pchan->desc; @@ -502,6 +494,55 @@ static irqreturn_t sun6i_dma_interrupt(int irq, void *dev_id) return ret; } +static int set_config(struct sun6i_dma_dev *sdev, + struct dma_slave_config *sconfig, + enum dma_transfer_direction direction, + u32 *p_cfg) +{ + s8 src_width, dst_width, src_burst, dst_burst; + + switch (direction) { + case DMA_MEM_TO_DEV: + src_burst = convert_burst(sconfig->src_maxburst ? + sconfig->src_maxburst : 8); + src_width = convert_buswidth(sconfig->src_addr_width != + DMA_SLAVE_BUSWIDTH_UNDEFINED ? + sconfig->src_addr_width : + DMA_SLAVE_BUSWIDTH_4_BYTES); + dst_burst = convert_burst(sconfig->dst_maxburst); + dst_width = convert_buswidth(sconfig->dst_addr_width); + break; + case DMA_DEV_TO_MEM: + src_burst = convert_burst(sconfig->src_maxburst); + src_width = convert_buswidth(sconfig->src_addr_width); + dst_burst = convert_burst(sconfig->dst_maxburst ? + sconfig->dst_maxburst : 8); + dst_width = convert_buswidth(sconfig->dst_addr_width != + DMA_SLAVE_BUSWIDTH_UNDEFINED ? + sconfig->dst_addr_width : + DMA_SLAVE_BUSWIDTH_4_BYTES); + break; + default: + return -EINVAL; + } + + if (src_burst < 0) + return src_burst; + if (src_width < 0) + return src_width; + if (dst_burst < 0) + return dst_burst; + if (dst_width < 0) + return dst_width; + + *p_cfg = DMA_CHAN_CFG_SRC_BURST(src_burst) | + DMA_CHAN_CFG_SRC_WIDTH(src_width) | + DMA_CHAN_CFG_DST_BURST(dst_burst) | + DMA_CHAN_CFG_DST_WIDTH(dst_width); + + return 0; +} + static struct dma_async_tx_descriptor *sun6i_dma_prep_dma_memcpy( struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, size_t len, unsigned long flags) @@ -569,13 +610,15 @@ static struct dma_async_tx_descriptor *sun6i_dma_prep_slave_sg( struct sun6i_desc *txd; struct scatterlist *sg; dma_addr_t p_lli; + u32 lli_cfg; int i, ret; if (!sgl) return NULL; - if (!is_slave_direction(dir)) { - dev_err(chan2dev(chan), "Invalid DMA direction\n"); + ret = set_config(sdev, sconfig, dir, &lli_cfg); + if (ret) { + dev_err(chan2dev(chan), "Invalid DMA configuration\n"); return NULL; } @@ -588,14 +631,14 @@ static struct dma_async_tx_descriptor *sun6i_dma_prep_slave_sg( if (!v_lli) goto err_lli_free; - if (dir == DMA_MEM_TO_DEV) { - ret = sun6i_dma_cfg_lli(v_lli, sg_dma_address(sg), - sconfig->dst_addr, sg_dma_len(sg), - sconfig); - if (ret) - goto err_cur_lli_free; + v_lli->len = sg_dma_len(sg); + v_lli->para = NORMAL_WAIT; - v_lli->cfg |= DMA_CHAN_CFG_DST_IO_MODE | + if (dir == DMA_MEM_TO_DEV) { + v_lli->src = sg_dma_address(sg); + v_lli->dst = sconfig->dst_addr; + v_lli->cfg = lli_cfg | + DMA_CHAN_CFG_DST_IO_MODE | DMA_CHAN_CFG_SRC_LINEAR_MODE | DMA_CHAN_CFG_SRC_DRQ(DRQ_SDRAM) | DMA_CHAN_CFG_DST_DRQ(vchan->port); @@ -607,13 +650,10 @@ static struct dma_async_tx_descriptor *sun6i_dma_prep_slave_sg( sg_dma_len(sg), flags); } else { - ret = sun6i_dma_cfg_lli(v_lli, sconfig->src_addr, - sg_dma_address(sg), sg_dma_len(sg), - sconfig); - if (ret) - goto err_cur_lli_free; - - v_lli->cfg |= DMA_CHAN_CFG_DST_LINEAR_MODE | + v_lli->src = sconfig->src_addr; + v_lli->dst = sg_dma_address(sg); + v_lli->cfg = lli_cfg | + DMA_CHAN_CFG_DST_LINEAR_MODE | DMA_CHAN_CFG_SRC_IO_MODE | DMA_CHAN_CFG_DST_DRQ(DRQ_SDRAM) | DMA_CHAN_CFG_SRC_DRQ(vchan->port); @@ -634,8 +674,78 @@ static struct dma_async_tx_descriptor *sun6i_dma_prep_slave_sg( return vchan_tx_prep(&vchan->vc, &txd->vd, flags); -err_cur_lli_free: - dma_pool_free(sdev->pool, v_lli, p_lli); +err_lli_free: + for (prev = txd->v_lli; prev; prev = prev->v_lli_next) + dma_pool_free(sdev->pool, prev, virt_to_phys(prev)); + kfree(txd); + return NULL; +} + +static struct dma_async_tx_descriptor *sun6i_dma_prep_dma_cyclic( + struct dma_chan *chan, + dma_addr_t buf_addr, + size_t buf_len, + size_t period_len, + enum dma_transfer_direction dir, + unsigned long flags) +{ + struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device); + struct sun6i_vchan *vchan = to_sun6i_vchan(chan); + struct dma_slave_config *sconfig = &vchan->cfg; + struct sun6i_dma_lli *v_lli, *prev = NULL; + struct sun6i_desc *txd; + dma_addr_t p_lli; + u32 lli_cfg; + unsigned int i, periods = buf_len / period_len; + int ret; + + ret = set_config(sdev, sconfig, dir, &lli_cfg); + if (ret) { + dev_err(chan2dev(chan), "Invalid DMA configuration\n"); + return NULL; + } + + txd = kzalloc(sizeof(*txd), GFP_NOWAIT); + if (!txd) + return NULL; + + for (i = 0; i < periods; i++) { + v_lli = dma_pool_alloc(sdev->pool, GFP_NOWAIT, &p_lli); + if (!v_lli) { + dev_err(sdev->slave.dev, "Failed to alloc lli memory\n"); + goto err_lli_free; + } + + v_lli->len = period_len; + v_lli->para = NORMAL_WAIT; + + if (dir == DMA_MEM_TO_DEV) { + v_lli->src = buf_addr + period_len * i; + v_lli->dst = sconfig->dst_addr; + v_lli->cfg = lli_cfg | + DMA_CHAN_CFG_DST_IO_MODE | + DMA_CHAN_CFG_SRC_LINEAR_MODE | + DMA_CHAN_CFG_SRC_DRQ(DRQ_SDRAM) | + DMA_CHAN_CFG_DST_DRQ(vchan->port); + } else { + v_lli->src = sconfig->src_addr; + v_lli->dst = buf_addr + period_len * i; + v_lli->cfg = lli_cfg | + DMA_CHAN_CFG_DST_LINEAR_MODE | + DMA_CHAN_CFG_SRC_IO_MODE | + DMA_CHAN_CFG_DST_DRQ(DRQ_SDRAM) | + DMA_CHAN_CFG_SRC_DRQ(vchan->port); + } + + prev = sun6i_dma_lli_add(prev, v_lli, p_lli, txd); + } + + prev->p_lli_next = txd->p_lli; /* cyclic list */ + + vchan->cyclic = true; + + return vchan_tx_prep(&vchan->vc, &txd->vd, flags); + err_lli_free: for (prev = txd->v_lli; prev; prev = prev->v_lli_next) dma_pool_free(sdev->pool, prev, virt_to_phys(prev)); @@ -712,6 +822,16 @@ static int sun6i_dma_terminate_all(struct dma_chan *chan) spin_lock_irqsave(&vchan->vc.lock, flags); + if (vchan->cyclic) { + vchan->cyclic = false; + if (pchan && pchan->desc) { + struct virt_dma_desc *vd = &pchan->desc->vd; + struct virt_dma_chan *vc = &vchan->vc; + + list_add_tail(&vd->node, &vc->desc_completed); + } + } + vchan_get_all_descriptors(&vchan->vc, &head); if (pchan) { @@ -759,7 +879,7 @@ static enum dma_status sun6i_dma_tx_status(struct dma_chan *chan, } else if (!pchan || !pchan->desc) { bytes = 0; } else { - bytes = readl(pchan->base + DMA_CHAN_CUR_CNT); + bytes = sun6i_get_chan_size(pchan); } spin_unlock_irqrestore(&vchan->vc.lock, flags); @@ -963,6 +1083,7 @@ static int sun6i_dma_probe(struct platform_device *pdev) dma_cap_set(DMA_PRIVATE, sdc->slave.cap_mask); dma_cap_set(DMA_MEMCPY, sdc->slave.cap_mask); dma_cap_set(DMA_SLAVE, sdc->slave.cap_mask); + dma_cap_set(DMA_CYCLIC, sdc->slave.cap_mask); INIT_LIST_HEAD(&sdc->slave.channels); sdc->slave.device_free_chan_resources = sun6i_dma_free_chan_resources; @@ -970,6 +1091,7 @@ static int sun6i_dma_probe(struct platform_device *pdev) sdc->slave.device_issue_pending = sun6i_dma_issue_pending; sdc->slave.device_prep_slave_sg = sun6i_dma_prep_slave_sg; sdc->slave.device_prep_dma_memcpy = sun6i_dma_prep_dma_memcpy; + sdc->slave.device_prep_dma_cyclic = sun6i_dma_prep_dma_cyclic; sdc->slave.copy_align = DMAENGINE_ALIGN_4_BYTES; sdc->slave.device_config = sun6i_dma_config; sdc->slave.device_pause = sun6i_dma_pause; |