summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYangyang Li <liyangyang20@huawei.com>2021-12-09 17:06:55 +0300
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2021-12-29 14:28:39 +0300
commitc41b980703922dc0635202d7d6abe732286983cf (patch)
treebc69d19cdf90b92b33dd92be5966d587245d8e64
parent9b0ed41b25e20d1a660ec210d0d42572193af6fe (diff)
downloadlinux-c41b980703922dc0635202d7d6abe732286983cf.tar.xz
RDMA/hns: Fix RNR retransmission issue for HIP08
[ Upstream commit 4ad8181426df92976feee5fbc55236293d069b37 ] Due to the discrete nature of the HIP08 timer unit, a requester might finish the timeout period sooner, in elapsed real time, than its responder does, even when both sides share the identical RNR timeout length included in the RNR Nak packet and the responder indeed starts the timing prior to the requester. Furthermore, if a 'providential' resend packet arrived before the responder's timeout period expired, the responder is certainly entitled to drop the packet silently in the light of IB protocol. To address this problem, our team made good use of certain hardware facts: 1) The timing resolution regards the transmission arrangements is 1 microsecond, e.g. if cq_period field is set to 3, it would be interpreted as 3 microsecond by hardware 2) A QPC field shall inform the hardware how many timing unit (ticks) constitutes a full microsecond, which, by default, is 1000 3) It takes 14ns for the processor to handle a packet in the buffer, so the RNR timeout length of 10ns would ensure our processing mechanism is disabled during the entire timeout period and the packet won't be dropped silently To achieve (3), we permanently set the QPC field mentioned in (2) to zero which nominally indicates every time tick is equivalent to a microsecond in wall-clock time; now, a RNR timeout period at face value of 10 would only last 10 ticks, which is 10ns in wall-clock time. It's worth noting that we adapt the driver by magnifying certain configuration parameters(cq_period, eq_period and ack_timeout)by 1000 given the user assumes the configuring timing unit to be microseconds. Also, this particular improvisation is only deployed on HIP08 since other hardware has already solved this issue. Fixes: cfc85f3e4b7f ("RDMA/hns: Add profile support for hip08 driver") Link: https://lore.kernel.org/r/20211209140655.49493-1-liangwenpeng@huawei.com Signed-off-by: Yangyang Li <liyangyang20@huawei.com> Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c64
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h8
2 files changed, 65 insertions, 7 deletions
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 51dd134952e7..96fe73ba689c 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -1604,11 +1604,17 @@ static int hns_roce_config_global_param(struct hns_roce_dev *hr_dev)
{
struct hns_roce_cmq_desc desc;
struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data;
+ u32 clock_cycles_of_1us;
hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_GLOBAL_PARAM,
false);
- hr_reg_write(req, CFG_GLOBAL_PARAM_1US_CYCLES, 0x3e8);
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
+ clock_cycles_of_1us = HNS_ROCE_1NS_CFG;
+ else
+ clock_cycles_of_1us = HNS_ROCE_1US_CFG;
+
+ hr_reg_write(req, CFG_GLOBAL_PARAM_1US_CYCLES, clock_cycles_of_1us);
hr_reg_write(req, CFG_GLOBAL_PARAM_UDP_PORT, ROCE_V2_UDP_DPORT);
return hns_roce_cmq_send(hr_dev, &desc, 1);
@@ -4812,6 +4818,30 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp,
return ret;
}
+static bool check_qp_timeout_cfg_range(struct hns_roce_dev *hr_dev, u8 *timeout)
+{
+#define QP_ACK_TIMEOUT_MAX_HIP08 20
+#define QP_ACK_TIMEOUT_OFFSET 10
+#define QP_ACK_TIMEOUT_MAX 31
+
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
+ if (*timeout > QP_ACK_TIMEOUT_MAX_HIP08) {
+ ibdev_warn(&hr_dev->ib_dev,
+ "Local ACK timeout shall be 0 to 20.\n");
+ return false;
+ }
+ *timeout += QP_ACK_TIMEOUT_OFFSET;
+ } else if (hr_dev->pci_dev->revision > PCI_REVISION_ID_HIP08) {
+ if (*timeout > QP_ACK_TIMEOUT_MAX) {
+ ibdev_warn(&hr_dev->ib_dev,
+ "Local ACK timeout shall be 0 to 31.\n");
+ return false;
+ }
+ }
+
+ return true;
+}
+
static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp,
const struct ib_qp_attr *attr,
int attr_mask,
@@ -4821,6 +4851,7 @@ static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp,
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
int ret = 0;
+ u8 timeout;
if (attr_mask & IB_QP_AV) {
ret = hns_roce_v2_set_path(ibqp, attr, attr_mask, context,
@@ -4830,12 +4861,10 @@ static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp,
}
if (attr_mask & IB_QP_TIMEOUT) {
- if (attr->timeout < 31) {
- hr_reg_write(context, QPC_AT, attr->timeout);
+ timeout = attr->timeout;
+ if (check_qp_timeout_cfg_range(hr_dev, &timeout)) {
+ hr_reg_write(context, QPC_AT, timeout);
hr_reg_clear(qpc_mask, QPC_AT);
- } else {
- ibdev_warn(&hr_dev->ib_dev,
- "Local ACK timeout shall be 0 to 30.\n");
}
}
@@ -4892,7 +4921,9 @@ static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp,
set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask);
if (attr_mask & IB_QP_MIN_RNR_TIMER) {
- hr_reg_write(context, QPC_MIN_RNR_TIME, attr->min_rnr_timer);
+ hr_reg_write(context, QPC_MIN_RNR_TIME,
+ hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 ?
+ HNS_ROCE_RNR_TIMER_10NS : attr->min_rnr_timer);
hr_reg_clear(qpc_mask, QPC_MIN_RNR_TIME);
}
@@ -5509,6 +5540,16 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
hr_reg_write(cq_context, CQC_CQ_MAX_CNT, cq_count);
hr_reg_clear(cqc_mask, CQC_CQ_MAX_CNT);
+
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
+ if (cq_period * HNS_ROCE_CLOCK_ADJUST > USHRT_MAX) {
+ dev_info(hr_dev->dev,
+ "cq_period(%u) reached the upper limit, adjusted to 65.\n",
+ cq_period);
+ cq_period = HNS_ROCE_MAX_CQ_PERIOD;
+ }
+ cq_period *= HNS_ROCE_CLOCK_ADJUST;
+ }
hr_reg_write(cq_context, CQC_CQ_PERIOD, cq_period);
hr_reg_clear(cqc_mask, CQC_CQ_PERIOD);
@@ -5904,6 +5945,15 @@ static int config_eqc(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq,
hr_reg_write(eqc, EQC_EQ_PROD_INDX, HNS_ROCE_EQ_INIT_PROD_IDX);
hr_reg_write(eqc, EQC_EQ_MAX_CNT, eq->eq_max_cnt);
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
+ if (eq->eq_period * HNS_ROCE_CLOCK_ADJUST > USHRT_MAX) {
+ dev_info(hr_dev->dev, "eq_period(%u) reached the upper limit, adjusted to 65.\n",
+ eq->eq_period);
+ eq->eq_period = HNS_ROCE_MAX_EQ_PERIOD;
+ }
+ eq->eq_period *= HNS_ROCE_CLOCK_ADJUST;
+ }
+
hr_reg_write(eqc, EQC_EQ_PERIOD, eq->eq_period);
hr_reg_write(eqc, EQC_EQE_REPORT_TIMER, HNS_ROCE_EQ_INIT_REPORT_TIMER);
hr_reg_write(eqc, EQC_EQE_BA_L, bt_ba >> 3);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index 4d904d5e82be..35c61da7ba15 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -1444,6 +1444,14 @@ struct hns_roce_dip {
struct list_head node; /* all dips are on a list */
};
+/* only for RNR timeout issue of HIP08 */
+#define HNS_ROCE_CLOCK_ADJUST 1000
+#define HNS_ROCE_MAX_CQ_PERIOD 65
+#define HNS_ROCE_MAX_EQ_PERIOD 65
+#define HNS_ROCE_RNR_TIMER_10NS 1
+#define HNS_ROCE_1US_CFG 999
+#define HNS_ROCE_1NS_CFG 0
+
#define HNS_ROCE_AEQ_DEFAULT_BURST_NUM 0x0
#define HNS_ROCE_AEQ_DEFAULT_INTERVAL 0x0
#define HNS_ROCE_CEQ_DEFAULT_BURST_NUM 0x0