summaryrefslogtreecommitdiff
path: root/drivers/net/wireless/ath/ath10k/htt_tx.c
diff options
context:
space:
mode:
authorMichal Kazior <michal.kazior@tieto.com>2014-02-27 20:50:04 +0400
committerKalle Valo <kvalo@qca.qualcomm.com>2014-02-28 13:59:17 +0400
commita16942e63008875a6a057ea2973becd261ed0c4e (patch)
tree0ae24cfe0f981873f15196bc1b72cff2b0d26ab3 /drivers/net/wireless/ath/ath10k/htt_tx.c
parent726346fc713498a84c4d591fce9f1fbe38bf2a44 (diff)
downloadlinux-a16942e63008875a6a057ea2973becd261ed0c4e.tar.xz
ath10k: bypass htc for htt tx path
Going through full htc tx path for htt tx is a waste of resources. By skipping it it's possible to easily submit scatter-gather to the pci hif for reduced host cpu load and improved performance. The new approach uses dma pool to store the following metadata for each tx request: * msdu fragment list * htc header * htt tx command The htt tx command contains a msdu prefetch. Instead of copying it original mapped msdu address is used to submit a second scatter-gather item to hif to make a complete htt tx command. The htt tx command itself hands over dma mapped pointers to msdus and completion of the command itself doesn't mean the frame has been sent and can be unmapped/freed. This is why htc tx completion is skipped for htt tx as all tx related resources are freed upon htt tx completion indication event (which also implicitly means htt tx command itself was completed). Since now each htt tx request effectively consists of 2 copy engine items CE_HTT_H2T_MSG_SRC_NENTRIES is updated to allow maximum of TARGET_10X_NUM_MSDU_DESC msdus being queued. This keeps the tx path resource management simple. Signed-off-by: Michal Kazior <michal.kazior@tieto.com> Signed-off-by: Kalle Valo <kvalo@qca.qualcomm.com>
Diffstat (limited to 'drivers/net/wireless/ath/ath10k/htt_tx.c')
-rw-r--r--drivers/net/wireless/ath/ath10k/htt_tx.c190
1 files changed, 101 insertions, 89 deletions
diff --git a/drivers/net/wireless/ath/ath10k/htt_tx.c b/drivers/net/wireless/ath/ath10k/htt_tx.c
index f5960c593810..20b7a446a9f8 100644
--- a/drivers/net/wireless/ath/ath10k/htt_tx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_tx.c
@@ -109,6 +109,14 @@ int ath10k_htt_tx_attach(struct ath10k_htt *htt)
return -ENOMEM;
}
+ htt->tx_pool = dma_pool_create("ath10k htt tx pool", htt->ar->dev,
+ sizeof(struct ath10k_htt_txbuf), 4, 0);
+ if (!htt->tx_pool) {
+ kfree(htt->used_msdu_ids);
+ kfree(htt->pending_tx);
+ return -ENOMEM;
+ }
+
return 0;
}
@@ -139,6 +147,7 @@ void ath10k_htt_tx_detach(struct ath10k_htt *htt)
ath10k_htt_tx_cleanup_pending(htt);
kfree(htt->pending_tx);
kfree(htt->used_msdu_ids);
+ dma_pool_destroy(htt->tx_pool);
return;
}
@@ -350,8 +359,7 @@ int ath10k_htt_mgmt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
memcpy(cmd->mgmt_tx.hdr, msdu->data,
min_t(int, msdu->len, HTT_MGMT_FRM_HDR_DOWNLOAD_LEN));
- skb_cb->htt.frag_len = 0;
- skb_cb->htt.pad_len = 0;
+ skb_cb->htt.txbuf = NULL;
res = ath10k_htc_send(&htt->ar->htc, htt->eid, txdesc);
if (res)
@@ -377,19 +385,19 @@ err:
int ath10k_htt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
{
struct device *dev = htt->ar->dev;
- struct htt_cmd *cmd;
- struct htt_data_tx_desc_frag *tx_frags;
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)msdu->data;
struct ath10k_skb_cb *skb_cb = ATH10K_SKB_CB(msdu);
- struct sk_buff *txdesc = NULL;
- bool use_frags;
- u8 vdev_id = ATH10K_SKB_CB(msdu)->vdev_id;
- u8 tid;
- int prefetch_len, desc_len;
- int msdu_id = -1;
+ struct ath10k_hif_sg_item sg_items[2];
+ struct htt_data_tx_desc_frag *frags;
+ u8 vdev_id = skb_cb->vdev_id;
+ u8 tid = skb_cb->htt.tid;
+ int prefetch_len;
int res;
- u8 flags0;
- u16 flags1;
+ u8 flags0 = 0;
+ u16 msdu_id, flags1 = 0;
+ dma_addr_t paddr;
+ u32 frags_paddr;
+ bool use_frags;
res = ath10k_htt_tx_inc_pending(htt);
if (res)
@@ -408,105 +416,109 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
prefetch_len = min(htt->prefetch_len, msdu->len);
prefetch_len = roundup(prefetch_len, 4);
- desc_len = sizeof(cmd->hdr) + sizeof(cmd->data_tx) + prefetch_len;
-
- txdesc = ath10k_htc_alloc_skb(desc_len);
- if (!txdesc) {
- res = -ENOMEM;
- goto err_free_msdu_id;
- }
-
/* Since HTT 3.0 there is no separate mgmt tx command. However in case
* of mgmt tx using TX_FRM there is not tx fragment list. Instead of tx
* fragment list host driver specifies directly frame pointer. */
use_frags = htt->target_version_major < 3 ||
!ieee80211_is_mgmt(hdr->frame_control);
- if (!IS_ALIGNED((unsigned long)txdesc->data, 4)) {
- ath10k_warn("htt alignment check failed. dropping packet.\n");
- res = -EIO;
- goto err_free_txdesc;
- }
-
- if (use_frags) {
- skb_cb->htt.frag_len = sizeof(*tx_frags) * 2;
- skb_cb->htt.pad_len = (unsigned long)msdu->data -
- round_down((unsigned long)msdu->data, 4);
-
- skb_push(msdu, skb_cb->htt.frag_len + skb_cb->htt.pad_len);
- } else {
- skb_cb->htt.frag_len = 0;
- skb_cb->htt.pad_len = 0;
- }
+ skb_cb->htt.txbuf = dma_pool_alloc(htt->tx_pool, GFP_ATOMIC,
+ &paddr);
+ if (!skb_cb->htt.txbuf)
+ goto err_free_msdu_id;
+ skb_cb->htt.txbuf_paddr = paddr;
skb_cb->paddr = dma_map_single(dev, msdu->data, msdu->len,
DMA_TO_DEVICE);
res = dma_mapping_error(dev, skb_cb->paddr);
if (res)
- goto err_pull_txfrag;
-
- if (use_frags) {
- dma_sync_single_for_cpu(dev, skb_cb->paddr, msdu->len,
- DMA_TO_DEVICE);
-
- /* tx fragment list must be terminated with zero-entry */
- tx_frags = (struct htt_data_tx_desc_frag *)msdu->data;
- tx_frags[0].paddr = __cpu_to_le32(skb_cb->paddr +
- skb_cb->htt.frag_len +
- skb_cb->htt.pad_len);
- tx_frags[0].len = __cpu_to_le32(msdu->len -
- skb_cb->htt.frag_len -
- skb_cb->htt.pad_len);
- tx_frags[1].paddr = __cpu_to_le32(0);
- tx_frags[1].len = __cpu_to_le32(0);
-
- dma_sync_single_for_device(dev, skb_cb->paddr, msdu->len,
- DMA_TO_DEVICE);
- }
+ goto err_free_txbuf;
- ath10k_dbg(ATH10K_DBG_HTT, "tx-msdu 0x%llx\n",
- (unsigned long long) ATH10K_SKB_CB(msdu)->paddr);
- ath10k_dbg_dump(ATH10K_DBG_HTT_DUMP, NULL, "tx-msdu: ",
- msdu->data, msdu->len);
+ if (likely(use_frags)) {
+ frags = skb_cb->htt.txbuf->frags;
- skb_put(txdesc, desc_len);
- cmd = (struct htt_cmd *)txdesc->data;
+ frags[0].paddr = __cpu_to_le32(skb_cb->paddr);
+ frags[0].len = __cpu_to_le32(msdu->len);
+ frags[1].paddr = 0;
+ frags[1].len = 0;
+
+ flags0 |= SM(ATH10K_HW_TXRX_NATIVE_WIFI,
+ HTT_DATA_TX_DESC_FLAGS0_PKT_TYPE);
- tid = ATH10K_SKB_CB(msdu)->htt.tid;
+ frags_paddr = skb_cb->htt.txbuf_paddr;
+ } else {
+ flags0 |= SM(ATH10K_HW_TXRX_MGMT,
+ HTT_DATA_TX_DESC_FLAGS0_PKT_TYPE);
- ath10k_dbg(ATH10K_DBG_HTT, "htt data tx using tid %hhu\n", tid);
+ frags_paddr = skb_cb->paddr;
+ }
+
+ /* Normally all commands go through HTC which manages tx credits for
+ * each endpoint and notifies when tx is completed.
+ *
+ * HTT endpoint is creditless so there's no need to care about HTC
+ * flags. In that case it is trivial to fill the HTC header here.
+ *
+ * MSDU transmission is considered completed upon HTT event. This
+ * implies no relevant resources can be freed until after the event is
+ * received. That's why HTC tx completion handler itself is ignored by
+ * setting NULL to transfer_context for all sg items.
+ *
+ * There is simply no point in pushing HTT TX_FRM through HTC tx path
+ * as it's a waste of resources. By bypassing HTC it is possible to
+ * avoid extra memory allocations, compress data structures and thus
+ * improve performance. */
+
+ skb_cb->htt.txbuf->htc_hdr.eid = htt->eid;
+ skb_cb->htt.txbuf->htc_hdr.len = __cpu_to_le16(
+ sizeof(skb_cb->htt.txbuf->cmd_hdr) +
+ sizeof(skb_cb->htt.txbuf->cmd_tx) +
+ prefetch_len);
+ skb_cb->htt.txbuf->htc_hdr.flags = 0;
- flags0 = 0;
if (!ieee80211_has_protected(hdr->frame_control))
flags0 |= HTT_DATA_TX_DESC_FLAGS0_NO_ENCRYPT;
- flags0 |= HTT_DATA_TX_DESC_FLAGS0_MAC_HDR_PRESENT;
- if (use_frags)
- flags0 |= SM(ATH10K_HW_TXRX_NATIVE_WIFI,
- HTT_DATA_TX_DESC_FLAGS0_PKT_TYPE);
- else
- flags0 |= SM(ATH10K_HW_TXRX_MGMT,
- HTT_DATA_TX_DESC_FLAGS0_PKT_TYPE);
+ flags0 |= HTT_DATA_TX_DESC_FLAGS0_MAC_HDR_PRESENT;
- flags1 = 0;
flags1 |= SM((u16)vdev_id, HTT_DATA_TX_DESC_FLAGS1_VDEV_ID);
flags1 |= SM((u16)tid, HTT_DATA_TX_DESC_FLAGS1_EXT_TID);
flags1 |= HTT_DATA_TX_DESC_FLAGS1_CKSUM_L3_OFFLOAD;
flags1 |= HTT_DATA_TX_DESC_FLAGS1_CKSUM_L4_OFFLOAD;
- cmd->hdr.msg_type = HTT_H2T_MSG_TYPE_TX_FRM;
- cmd->data_tx.flags0 = flags0;
- cmd->data_tx.flags1 = __cpu_to_le16(flags1);
- cmd->data_tx.len = __cpu_to_le16(msdu->len -
- skb_cb->htt.frag_len -
- skb_cb->htt.pad_len);
- cmd->data_tx.id = __cpu_to_le16(msdu_id);
- cmd->data_tx.frags_paddr = __cpu_to_le32(skb_cb->paddr);
- cmd->data_tx.peerid = __cpu_to_le32(HTT_INVALID_PEERID);
-
- memcpy(cmd->data_tx.prefetch, hdr, prefetch_len);
+ skb_cb->htt.txbuf->cmd_hdr.msg_type = HTT_H2T_MSG_TYPE_TX_FRM;
+ skb_cb->htt.txbuf->cmd_tx.flags0 = flags0;
+ skb_cb->htt.txbuf->cmd_tx.flags1 = __cpu_to_le16(flags1);
+ skb_cb->htt.txbuf->cmd_tx.len = __cpu_to_le16(msdu->len);
+ skb_cb->htt.txbuf->cmd_tx.id = __cpu_to_le16(msdu_id);
+ skb_cb->htt.txbuf->cmd_tx.frags_paddr = __cpu_to_le32(frags_paddr);
+ skb_cb->htt.txbuf->cmd_tx.peerid = __cpu_to_le32(HTT_INVALID_PEERID);
+
+ ath10k_dbg(ATH10K_DBG_HTT,
+ "htt tx flags0 %hhu flags1 %hu len %d id %hu frags_paddr %08x, msdu_paddr %08x vdev %hhu tid %hhu\n",
+ flags0, flags1, msdu->len, msdu_id, frags_paddr,
+ (u32)skb_cb->paddr, vdev_id, tid);
+ ath10k_dbg_dump(ATH10K_DBG_HTT_DUMP, NULL, "htt tx msdu: ",
+ msdu->data, msdu->len);
- res = ath10k_htc_send(&htt->ar->htc, htt->eid, txdesc);
+ sg_items[0].transfer_id = 0;
+ sg_items[0].transfer_context = NULL;
+ sg_items[0].vaddr = &skb_cb->htt.txbuf->htc_hdr;
+ sg_items[0].paddr = skb_cb->htt.txbuf_paddr +
+ sizeof(skb_cb->htt.txbuf->frags);
+ sg_items[0].len = sizeof(skb_cb->htt.txbuf->htc_hdr) +
+ sizeof(skb_cb->htt.txbuf->cmd_hdr) +
+ sizeof(skb_cb->htt.txbuf->cmd_tx);
+
+ sg_items[1].transfer_id = 0;
+ sg_items[1].transfer_context = NULL;
+ sg_items[1].vaddr = msdu->data;
+ sg_items[1].paddr = skb_cb->paddr;
+ sg_items[1].len = prefetch_len;
+
+ res = ath10k_hif_tx_sg(htt->ar,
+ htt->ar->htc.endpoint[htt->eid].ul_pipe_id,
+ sg_items, ARRAY_SIZE(sg_items));
if (res)
goto err_unmap_msdu;
@@ -514,10 +526,10 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
err_unmap_msdu:
dma_unmap_single(dev, skb_cb->paddr, msdu->len, DMA_TO_DEVICE);
-err_pull_txfrag:
- skb_pull(msdu, skb_cb->htt.frag_len + skb_cb->htt.pad_len);
-err_free_txdesc:
- dev_kfree_skb_any(txdesc);
+err_free_txbuf:
+ dma_pool_free(htt->tx_pool,
+ skb_cb->htt.txbuf,
+ skb_cb->htt.txbuf_paddr);
err_free_msdu_id:
spin_lock_bh(&htt->tx_lock);
htt->pending_tx[msdu_id] = NULL;