diff options
author | Youri Querry <youri.querry_1@nxp.com> | 2019-12-12 20:01:15 +0300 |
---|---|---|
committer | Li Yang <leoyang.li@nxp.com> | 2020-02-20 03:06:16 +0300 |
commit | b46fe745e4f6102fa944383f87f5d8820398f4ad (patch) | |
tree | 1a917b9467a30d0088d4c2c3266229bc5d464947 /drivers/soc/fsl/dpio/qbman-portal.c | |
parent | 9d98809711ae0ebcfb8115a0bc54604c59908710 (diff) | |
download | linux-b46fe745e4f6102fa944383f87f5d8820398f4ad.tar.xz |
soc: fsl: dpio: QMAN performance improvement with function pointer indirection
We are making the access decision in the initialization and
setting the function pointers accordingly.
Signed-off-by: Youri Querry <youri.querry_1@nxp.com>
Acked-by: Roy Pledge <roy.pledge@nxp.com>
Signed-off-by: Li Yang <leoyang.li@nxp.com>
Diffstat (limited to 'drivers/soc/fsl/dpio/qbman-portal.c')
-rw-r--r-- | drivers/soc/fsl/dpio/qbman-portal.c | 451 |
1 files changed, 389 insertions, 62 deletions
diff --git a/drivers/soc/fsl/dpio/qbman-portal.c b/drivers/soc/fsl/dpio/qbman-portal.c index 5a37ac8a171c..0ffe018afb17 100644 --- a/drivers/soc/fsl/dpio/qbman-portal.c +++ b/drivers/soc/fsl/dpio/qbman-portal.c @@ -83,6 +83,82 @@ enum qbman_sdqcr_fc { qbman_sdqcr_fc_up_to_3 = 1 }; +/* Internal Function declaration */ +static int qbman_swp_enqueue_direct(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct dpaa2_fd *fd); +static int qbman_swp_enqueue_mem_back(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct dpaa2_fd *fd); +static int qbman_swp_enqueue_multiple_direct(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct dpaa2_fd *fd, + uint32_t *flags, + int num_frames); +static int qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct dpaa2_fd *fd, + uint32_t *flags, + int num_frames); +static int +qbman_swp_enqueue_multiple_desc_direct(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct dpaa2_fd *fd, + int num_frames); +static +int qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct dpaa2_fd *fd, + int num_frames); +static int qbman_swp_pull_direct(struct qbman_swp *s, + struct qbman_pull_desc *d); +static int qbman_swp_pull_mem_back(struct qbman_swp *s, + struct qbman_pull_desc *d); + +const struct dpaa2_dq *qbman_swp_dqrr_next_direct(struct qbman_swp *s); +const struct dpaa2_dq *qbman_swp_dqrr_next_mem_back(struct qbman_swp *s); + +static int qbman_swp_release_direct(struct qbman_swp *s, + const struct qbman_release_desc *d, + const u64 *buffers, + unsigned int num_buffers); +static int qbman_swp_release_mem_back(struct qbman_swp *s, + const struct qbman_release_desc *d, + const u64 *buffers, + unsigned int num_buffers); + +/* Function pointers */ +int (*qbman_swp_enqueue_ptr)(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct dpaa2_fd *fd) + = qbman_swp_enqueue_direct; + +int (*qbman_swp_enqueue_multiple_ptr)(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct dpaa2_fd *fd, + uint32_t *flags, + int num_frames) + = qbman_swp_enqueue_multiple_direct; + +int +(*qbman_swp_enqueue_multiple_desc_ptr)(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct dpaa2_fd *fd, + int num_frames) + = qbman_swp_enqueue_multiple_desc_direct; + +int (*qbman_swp_pull_ptr)(struct qbman_swp *s, struct qbman_pull_desc *d) + = qbman_swp_pull_direct; + +const struct dpaa2_dq *(*qbman_swp_dqrr_next_ptr)(struct qbman_swp *s) + = qbman_swp_dqrr_next_direct; + +int (*qbman_swp_release_ptr)(struct qbman_swp *s, + const struct qbman_release_desc *d, + const u64 *buffers, + unsigned int num_buffers) + = qbman_swp_release_direct; + /* Portal Access */ static inline u32 qbman_read_register(struct qbman_swp *p, u32 offset) @@ -218,6 +294,19 @@ struct qbman_swp *qbman_swp_init(const struct qbman_swp_desc *d) * applied when dequeues from a specific channel are enabled. */ qbman_write_register(p, QBMAN_CINH_SWP_SDQCR, 0); + + if ((p->desc->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000) { + qbman_swp_enqueue_ptr = + qbman_swp_enqueue_mem_back; + qbman_swp_enqueue_multiple_ptr = + qbman_swp_enqueue_multiple_mem_back; + qbman_swp_enqueue_multiple_desc_ptr = + qbman_swp_enqueue_multiple_desc_mem_back; + qbman_swp_pull_ptr = qbman_swp_pull_mem_back; + qbman_swp_dqrr_next_ptr = qbman_swp_dqrr_next_mem_back; + qbman_swp_release_ptr = qbman_swp_release_mem_back; + } + return p; } @@ -447,7 +536,7 @@ static inline void qbman_write_eqcr_am_rt_register(struct qbman_swp *p, } /** - * qbman_swp_enqueue() - Issue an enqueue command + * qbman_swp_enqueue_direct() - Issue an enqueue command * @s: the software portal used for enqueue * @d: the enqueue descriptor * @fd: the frame descriptor to be enqueued @@ -457,7 +546,7 @@ static inline void qbman_write_eqcr_am_rt_register(struct qbman_swp *p, * * Return 0 for successful enqueue, -EBUSY if the EQCR is not ready. */ -int qbman_swp_enqueue(struct qbman_swp *s, const struct qbman_eq_desc *d, +int qbman_swp_enqueue_direct(struct qbman_swp *s, const struct qbman_eq_desc *d, const struct dpaa2_fd *fd) { struct qbman_eq_desc_with_fd *p; @@ -480,21 +569,57 @@ int qbman_swp_enqueue(struct qbman_swp *s, const struct qbman_eq_desc *d, memcpy(&p->desc.tgtid, &d->tgtid, 24); memcpy(&p->fd, fd, sizeof(*fd)); - if ((s->desc->qman_version & QMAN_REV_MASK) < QMAN_REV_5000) { - /* Set the verb byte, have to substitute in the valid-bit */ - dma_wmb(); - p->desc.verb = d->verb | EQAR_VB(eqar); - } else { - p->desc.verb = d->verb | EQAR_VB(eqar); - dma_wmb(); - qbman_write_eqcr_am_rt_register(s, EQAR_IDX(eqar)); - } + /* Set the verb byte, have to substitute in the valid-bit */ + dma_wmb(); + p->desc.verb = d->verb | EQAR_VB(eqar); return 0; } /** - * qbman_swp_enqueue_multiple() - Issue a multi enqueue command + * qbman_swp_enqueue_mem_back() - Issue an enqueue command + * @s: the software portal used for enqueue + * @d: the enqueue descriptor + * @fd: the frame descriptor to be enqueued + * + * Please note that 'fd' should only be NULL if the "action" of the + * descriptor is "orp_hole" or "orp_nesn". + * + * Return 0 for successful enqueue, -EBUSY if the EQCR is not ready. + */ +int qbman_swp_enqueue_mem_back(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct dpaa2_fd *fd) +{ + struct qbman_eq_desc_with_fd *p; + u32 eqar = qbman_read_register(s, QBMAN_CINH_SWP_EQAR); + + if (!EQAR_SUCCESS(eqar)) + return -EBUSY; + + p = qbman_get_cmd(s, QBMAN_CENA_SWP_EQCR(EQAR_IDX(eqar))); + /* This is mapped as DEVICE type memory, writes are + * with address alignment: + * desc.dca address alignment = 1 + * desc.seqnum address alignment = 2 + * desc.orpid address alignment = 4 + * desc.tgtid address alignment = 8 + */ + p->desc.dca = d->dca; + p->desc.seqnum = d->seqnum; + p->desc.orpid = d->orpid; + memcpy(&p->desc.tgtid, &d->tgtid, 24); + memcpy(&p->fd, fd, sizeof(*fd)); + + p->desc.verb = d->verb | EQAR_VB(eqar); + dma_wmb(); + qbman_write_eqcr_am_rt_register(s, EQAR_IDX(eqar)); + + return 0; +} + +/** + * qbman_swp_enqueue_multiple_direct() - Issue a multi enqueue command * using one enqueue descriptor * @s: the software portal used for enqueue * @d: the enqueue descriptor @@ -504,16 +629,44 @@ int qbman_swp_enqueue(struct qbman_swp *s, const struct qbman_eq_desc *d, * * Return the number of fd enqueued, or a negative error number. */ -int qbman_swp_enqueue_multiple(struct qbman_swp *s, - const struct qbman_eq_desc *d, - const struct dpaa2_fd *fd, - uint32_t *flags, - int num_frames) +int qbman_swp_enqueue_multiple_direct(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct dpaa2_fd *fd, + uint32_t *flags, + int num_frames) +{ + int count = 0; + + while (count < num_frames) { + if (qbman_swp_enqueue_direct(s, d, fd) != 0) + break; + count++; + } + + return count; +} + +/** + * qbman_swp_enqueue_multiple_mem_back() - Issue a multi enqueue command + * using one enqueue descriptor + * @s: the software portal used for enqueue + * @d: the enqueue descriptor + * @fd: table pointer of frame descriptor table to be enqueued + * @flags: table pointer of flags, not used for the moment + * @num_frames: number of fd to be enqueued + * + * Return the number of fd enqueued, or a negative error number. + */ +int qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct dpaa2_fd *fd, + uint32_t *flags, + int num_frames) { int count = 0; while (count < num_frames) { - if (qbman_swp_enqueue(s, d, fd) != 0) + if (qbman_swp_enqueue_mem_back(s, d, fd) != 0) break; count++; } @@ -522,7 +675,7 @@ int qbman_swp_enqueue_multiple(struct qbman_swp *s, } /** - * qbman_swp_enqueue_multiple_desc() - Issue a multi enqueue command + * qbman_swp_enqueue_multiple_desc_direct() - Issue a multi enqueue command * using multiple enqueue descriptor * @s: the software portal used for enqueue * @d: table of minimal enqueue descriptor @@ -531,15 +684,41 @@ int qbman_swp_enqueue_multiple(struct qbman_swp *s, * * Return the number of fd enqueued, or a negative error number. */ -int qbman_swp_enqueue_multiple_desc(struct qbman_swp *s, - const struct qbman_eq_desc *d, - const struct dpaa2_fd *fd, - int num_frames) +int qbman_swp_enqueue_multiple_desc_direct(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct dpaa2_fd *fd, + int num_frames) +{ + int count = 0; + + while (count < num_frames) { + if (qbman_swp_enqueue_direct(s, &(d[count]), fd) != 0) + break; + count++; + } + + return count; +} + +/** + * qbman_swp_enqueue_multiple_desc_mem_back() - Issue a multi enqueue command + * using multiple enqueue descriptor + * @s: the software portal used for enqueue + * @d: table of minimal enqueue descriptor + * @fd: table pointer of frame descriptor table to be enqueued + * @num_frames: number of fd to be enqueued + * + * Return the number of fd enqueued, or a negative error number. + */ +int qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct dpaa2_fd *fd, + int num_frames) { int count = 0; while (count < num_frames) { - if (qbman_swp_enqueue(s, &(d[count]), fd) != 0) + if (qbman_swp_enqueue_mem_back(s, &(d[count]), fd) != 0) break; count++; } @@ -702,7 +881,7 @@ void qbman_pull_desc_set_channel(struct qbman_pull_desc *d, u32 chid, } /** - * qbman_swp_pull() - Issue the pull dequeue command + * qbman_swp_pull_direct() - Issue the pull dequeue command * @s: the software portal object * @d: the software portal descriptor which has been configured with * the set of qbman_pull_desc_set_*() calls @@ -710,7 +889,7 @@ void qbman_pull_desc_set_channel(struct qbman_pull_desc *d, u32 chid, * Return 0 for success, and -EBUSY if the software portal is not ready * to do pull dequeue. */ -int qbman_swp_pull(struct qbman_swp *s, struct qbman_pull_desc *d) +int qbman_swp_pull_direct(struct qbman_swp *s, struct qbman_pull_desc *d) { struct qbman_pull_desc *p; @@ -728,18 +907,45 @@ int qbman_swp_pull(struct qbman_swp *s, struct qbman_pull_desc *d) p->dq_src = d->dq_src; p->rsp_addr = d->rsp_addr; p->rsp_addr_virt = d->rsp_addr_virt; + dma_wmb(); + /* Set the verb byte, have to substitute in the valid-bit */ + p->verb = d->verb | s->vdq.valid_bit; + s->vdq.valid_bit ^= QB_VALID_BIT; - if ((s->desc->qman_version & QMAN_REV_MASK) < QMAN_REV_5000) { - dma_wmb(); - /* Set the verb byte, have to substitute in the valid-bit */ - p->verb = d->verb | s->vdq.valid_bit; - s->vdq.valid_bit ^= QB_VALID_BIT; - } else { - p->verb = d->verb | s->vdq.valid_bit; - s->vdq.valid_bit ^= QB_VALID_BIT; - dma_wmb(); - qbman_write_register(s, QBMAN_CINH_SWP_VDQCR_RT, QMAN_RT_MODE); + return 0; +} + +/** + * qbman_swp_pull_mem_back() - Issue the pull dequeue command + * @s: the software portal object + * @d: the software portal descriptor which has been configured with + * the set of qbman_pull_desc_set_*() calls + * + * Return 0 for success, and -EBUSY if the software portal is not ready + * to do pull dequeue. + */ +int qbman_swp_pull_mem_back(struct qbman_swp *s, struct qbman_pull_desc *d) +{ + struct qbman_pull_desc *p; + + if (!atomic_dec_and_test(&s->vdq.available)) { + atomic_inc(&s->vdq.available); + return -EBUSY; } + s->vdq.storage = (void *)(uintptr_t)d->rsp_addr_virt; + if ((s->desc->qman_version & QMAN_REV_MASK) < QMAN_REV_5000) + p = qbman_get_cmd(s, QBMAN_CENA_SWP_VDQCR); + else + p = qbman_get_cmd(s, QBMAN_CENA_SWP_VDQCR_MEM); + p->numf = d->numf; + p->tok = QMAN_DQ_TOKEN_VALID; + p->dq_src = d->dq_src; + p->rsp_addr = d->rsp_addr; + p->rsp_addr_virt = d->rsp_addr_virt; + p->verb = d->verb | s->vdq.valid_bit; + s->vdq.valid_bit ^= QB_VALID_BIT; + dma_wmb(); + qbman_write_register(s, QBMAN_CINH_SWP_VDQCR_RT, QMAN_RT_MODE); return 0; } @@ -747,14 +953,14 @@ int qbman_swp_pull(struct qbman_swp *s, struct qbman_pull_desc *d) #define QMAN_DQRR_PI_MASK 0xf /** - * qbman_swp_dqrr_next() - Get an valid DQRR entry + * qbman_swp_dqrr_next_direct() - Get an valid DQRR entry * @s: the software portal object * * Return NULL if there are no unconsumed DQRR entries. Return a DQRR entry * only once, so repeated calls can return a sequence of DQRR entries, without * requiring they be consumed immediately or in any particular order. */ -const struct dpaa2_dq *qbman_swp_dqrr_next(struct qbman_swp *s) +const struct dpaa2_dq *qbman_swp_dqrr_next_direct(struct qbman_swp *s) { u32 verb; u32 response_verb; @@ -797,10 +1003,99 @@ const struct dpaa2_dq *qbman_swp_dqrr_next(struct qbman_swp *s) QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx))); } - if ((s->desc->qman_version & QMAN_REV_MASK) < QMAN_REV_5000) - p = qbman_get_cmd(s, QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx)); - else - p = qbman_get_cmd(s, QBMAN_CENA_SWP_DQRR_MEM(s->dqrr.next_idx)); + p = qbman_get_cmd(s, QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx)); + verb = p->dq.verb; + + /* + * If the valid-bit isn't of the expected polarity, nothing there. Note, + * in the DQRR reset bug workaround, we shouldn't need to skip these + * check, because we've already determined that a new entry is available + * and we've invalidated the cacheline before reading it, so the + * valid-bit behaviour is repaired and should tell us what we already + * knew from reading PI. + */ + if ((verb & QB_VALID_BIT) != s->dqrr.valid_bit) { + prefetch(qbman_get_cmd(s, + QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx))); + return NULL; + } + /* + * There's something there. Move "next_idx" attention to the next ring + * entry (and prefetch it) before returning what we found. + */ + s->dqrr.next_idx++; + s->dqrr.next_idx &= s->dqrr.dqrr_size - 1; /* Wrap around */ + if (!s->dqrr.next_idx) + s->dqrr.valid_bit ^= QB_VALID_BIT; + + /* + * If this is the final response to a volatile dequeue command + * indicate that the vdq is available + */ + flags = p->dq.stat; + response_verb = verb & QBMAN_RESULT_MASK; + if ((response_verb == QBMAN_RESULT_DQ) && + (flags & DPAA2_DQ_STAT_VOLATILE) && + (flags & DPAA2_DQ_STAT_EXPIRED)) + atomic_inc(&s->vdq.available); + + prefetch(qbman_get_cmd(s, QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx))); + + return p; +} + +/** + * qbman_swp_dqrr_next_mem_back() - Get an valid DQRR entry + * @s: the software portal object + * + * Return NULL if there are no unconsumed DQRR entries. Return a DQRR entry + * only once, so repeated calls can return a sequence of DQRR entries, without + * requiring they be consumed immediately or in any particular order. + */ +const struct dpaa2_dq *qbman_swp_dqrr_next_mem_back(struct qbman_swp *s) +{ + u32 verb; + u32 response_verb; + u32 flags; + struct dpaa2_dq *p; + + /* Before using valid-bit to detect if something is there, we have to + * handle the case of the DQRR reset bug... + */ + if (unlikely(s->dqrr.reset_bug)) { + /* + * We pick up new entries by cache-inhibited producer index, + * which means that a non-coherent mapping would require us to + * invalidate and read *only* once that PI has indicated that + * there's an entry here. The first trip around the DQRR ring + * will be much less efficient than all subsequent trips around + * it... + */ + u8 pi = qbman_read_register(s, QBMAN_CINH_SWP_DQPI) & + QMAN_DQRR_PI_MASK; + + /* there are new entries if pi != next_idx */ + if (pi == s->dqrr.next_idx) + return NULL; + + /* + * if next_idx is/was the last ring index, and 'pi' is + * different, we can disable the workaround as all the ring + * entries have now been DMA'd to so valid-bit checking is + * repaired. Note: this logic needs to be based on next_idx + * (which increments one at a time), rather than on pi (which + * can burst and wrap-around between our snapshots of it). + */ + if (s->dqrr.next_idx == (s->dqrr.dqrr_size - 1)) { + pr_debug("next_idx=%d, pi=%d, clear reset bug\n", + s->dqrr.next_idx, pi); + s->dqrr.reset_bug = 0; + } + prefetch(qbman_get_cmd(s, + QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx))); + } + + p = qbman_get_cmd(s, QBMAN_CENA_SWP_DQRR_MEM(s->dqrr.next_idx)); verb = p->dq.verb; /* @@ -929,7 +1224,7 @@ void qbman_release_desc_set_rcdi(struct qbman_release_desc *d, int enable) #define RAR_SUCCESS(rar) ((rar) & 0x100) /** - * qbman_swp_release() - Issue a buffer release command + * qbman_swp_release_direct() - Issue a buffer release command * @s: the software portal object * @d: the release descriptor * @buffers: a pointer pointing to the buffer address to be released @@ -937,8 +1232,9 @@ void qbman_release_desc_set_rcdi(struct qbman_release_desc *d, int enable) * * Return 0 for success, -EBUSY if the release command ring is not ready. */ -int qbman_swp_release(struct qbman_swp *s, const struct qbman_release_desc *d, - const u64 *buffers, unsigned int num_buffers) +int qbman_swp_release_direct(struct qbman_swp *s, + const struct qbman_release_desc *d, + const u64 *buffers, unsigned int num_buffers) { int i; struct qbman_release_desc *p; @@ -952,28 +1248,59 @@ int qbman_swp_release(struct qbman_swp *s, const struct qbman_release_desc *d, return -EBUSY; /* Start the release command */ - if ((s->desc->qman_version & QMAN_REV_MASK) < QMAN_REV_5000) - p = qbman_get_cmd(s, QBMAN_CENA_SWP_RCR(RAR_IDX(rar))); - else - p = qbman_get_cmd(s, QBMAN_CENA_SWP_RCR_MEM(RAR_IDX(rar))); + p = qbman_get_cmd(s, QBMAN_CENA_SWP_RCR(RAR_IDX(rar))); + /* Copy the caller's buffer pointers to the command */ for (i = 0; i < num_buffers; i++) p->buf[i] = cpu_to_le64(buffers[i]); p->bpid = d->bpid; - if ((s->desc->qman_version & QMAN_REV_MASK) < QMAN_REV_5000) { - /* - * Set the verb byte, have to substitute in the valid-bit - * and the number of buffers. - */ - dma_wmb(); - p->verb = d->verb | RAR_VB(rar) | num_buffers; - } else { - p->verb = d->verb | RAR_VB(rar) | num_buffers; - dma_wmb(); - qbman_write_register(s, QBMAN_CINH_SWP_RCR_AM_RT + - RAR_IDX(rar) * 4, QMAN_RT_MODE); - } + /* + * Set the verb byte, have to substitute in the valid-bit + * and the number of buffers. + */ + dma_wmb(); + p->verb = d->verb | RAR_VB(rar) | num_buffers; + + return 0; +} + +/** + * qbman_swp_release_mem_back() - Issue a buffer release command + * @s: the software portal object + * @d: the release descriptor + * @buffers: a pointer pointing to the buffer address to be released + * @num_buffers: number of buffers to be released, must be less than 8 + * + * Return 0 for success, -EBUSY if the release command ring is not ready. + */ +int qbman_swp_release_mem_back(struct qbman_swp *s, + const struct qbman_release_desc *d, + const u64 *buffers, unsigned int num_buffers) +{ + int i; + struct qbman_release_desc *p; + u32 rar; + + if (!num_buffers || (num_buffers > 7)) + return -EINVAL; + + rar = qbman_read_register(s, QBMAN_CINH_SWP_RAR); + if (!RAR_SUCCESS(rar)) + return -EBUSY; + + /* Start the release command */ + p = qbman_get_cmd(s, QBMAN_CENA_SWP_RCR_MEM(RAR_IDX(rar))); + + /* Copy the caller's buffer pointers to the command */ + for (i = 0; i < num_buffers; i++) + p->buf[i] = cpu_to_le64(buffers[i]); + p->bpid = d->bpid; + + p->verb = d->verb | RAR_VB(rar) | num_buffers; + dma_wmb(); + qbman_write_register(s, QBMAN_CINH_SWP_RCR_AM_RT + + RAR_IDX(rar) * 4, QMAN_RT_MODE); return 0; } |