diff options
Diffstat (limited to 'drivers/infiniband/hw')
34 files changed, 2021 insertions, 594 deletions
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c index 2d5cbf4363e4..bdf3507810cb 100644 --- a/drivers/infiniband/hw/amso1100/c2_provider.c +++ b/drivers/infiniband/hw/amso1100/c2_provider.c @@ -476,7 +476,7 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, c2mr->umem->page_size, i, length, - c2mr->umem->offset, + ib_umem_offset(c2mr->umem), &kva, c2_convert_access(acc), c2mr); diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index c2fb71c182a8..9edc200b311d 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -236,10 +236,12 @@ static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb) static void set_emss(struct c4iw_ep *ep, u16 opt) { ep->emss = ep->com.dev->rdev.lldi.mtus[GET_TCPOPT_MSS(opt)] - - sizeof(struct iphdr) - sizeof(struct tcphdr); + ((AF_INET == ep->com.remote_addr.ss_family) ? + sizeof(struct iphdr) : sizeof(struct ipv6hdr)) - + sizeof(struct tcphdr); ep->mss = ep->emss; if (GET_TCPOPT_TSTAMP(opt)) - ep->emss -= 12; + ep->emss -= round_up(TCPOLEN_TIMESTAMP, 4); if (ep->emss < 128) ep->emss = 128; if (ep->emss & 7) @@ -415,6 +417,7 @@ static struct dst_entry *find_route(struct c4iw_dev *dev, __be32 local_ip, return NULL; if (!our_interface(dev, n->dev) && !(n->dev->flags & IFF_LOOPBACK)) { + neigh_release(n); dst_release(&rt->dst); return NULL; } @@ -469,13 +472,13 @@ static void send_flowc(struct c4iw_ep *ep, struct sk_buff *skb) skb = get_skb(skb, flowclen, GFP_KERNEL); flowc = (struct fw_flowc_wr *)__skb_put(skb, flowclen); - flowc->op_to_nparams = cpu_to_be32(FW_WR_OP(FW_FLOWC_WR) | - FW_FLOWC_WR_NPARAMS(8)); - flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16(DIV_ROUND_UP(flowclen, - 16)) | FW_WR_FLOWID(ep->hwtid)); + flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) | + FW_FLOWC_WR_NPARAMS_V(8)); + flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(flowclen, + 16)) | FW_WR_FLOWID_V(ep->hwtid)); flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; - flowc->mnemval[0].val = cpu_to_be32(FW_PFVF_CMD_PFN + flowc->mnemval[0].val = cpu_to_be32(FW_PFVF_CMD_PFN_V (ep->com.dev->rdev.lldi.pf)); flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH; flowc->mnemval[1].val = cpu_to_be32(ep->tx_chan); @@ -581,11 +584,14 @@ static void c4iw_record_pm_msg(struct c4iw_ep *ep, } static void best_mtu(const unsigned short *mtus, unsigned short mtu, - unsigned int *idx, int use_ts) + unsigned int *idx, int use_ts, int ipv6) { - unsigned short hdr_size = sizeof(struct iphdr) + + unsigned short hdr_size = (ipv6 ? + sizeof(struct ipv6hdr) : + sizeof(struct iphdr)) + sizeof(struct tcphdr) + - (use_ts ? 12 : 0); + (use_ts ? + round_up(TCPOLEN_TIMESTAMP, 4) : 0); unsigned short data_size = mtu - hdr_size; cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx); @@ -634,7 +640,8 @@ static int send_connect(struct c4iw_ep *ep) set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx); best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, - enable_tcp_timestamps); + enable_tcp_timestamps, + (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1); wscale = compute_wscale(rcv_win); /* @@ -642,32 +649,33 @@ static int send_connect(struct c4iw_ep *ep) * remainder will be specified in the rx_data_ack. */ win = ep->rcv_win >> 10; - if (win > RCV_BUFSIZ_MASK) - win = RCV_BUFSIZ_MASK; + if (win > RCV_BUFSIZ_M) + win = RCV_BUFSIZ_M; opt0 = (nocong ? NO_CONG(1) : 0) | - KEEP_ALIVE(1) | + KEEP_ALIVE_F | DELACK(1) | - WND_SCALE(wscale) | - MSS_IDX(mtu_idx) | - L2T_IDX(ep->l2t->idx) | - TX_CHAN(ep->tx_chan) | - SMAC_SEL(ep->smac_idx) | + WND_SCALE_V(wscale) | + MSS_IDX_V(mtu_idx) | + L2T_IDX_V(ep->l2t->idx) | + TX_CHAN_V(ep->tx_chan) | + SMAC_SEL_V(ep->smac_idx) | DSCP(ep->tos) | - ULP_MODE(ULP_MODE_TCPDDP) | - RCV_BUFSIZ(win); - opt2 = RX_CHANNEL(0) | + ULP_MODE_V(ULP_MODE_TCPDDP) | + RCV_BUFSIZ_V(win); + opt2 = RX_CHANNEL_V(0) | CCTRL_ECN(enable_ecn) | - RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid); + RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid); if (enable_tcp_timestamps) opt2 |= TSTAMPS_EN(1); if (enable_tcp_sack) opt2 |= SACK_EN(1); if (wscale && enable_tcp_window_scaling) - opt2 |= WND_SCALE_EN(1); + opt2 |= WND_SCALE_EN_F; if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { - opt2 |= T5_OPT_2_VALID; + opt2 |= T5_OPT_2_VALID_F; opt2 |= V_CONG_CNTRL(CONG_ALG_TAHOE); + opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */ } t4_set_arp_err_handler(skb, ep, act_open_req_arp_failure); @@ -713,8 +721,6 @@ static int send_connect(struct c4iw_ep *ep) } else { u32 isn = (prandom_u32() & ~7UL) - 1; - opt2 |= T5_OPT_2_VALID; - opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */ if (peer2peer) isn += 4; @@ -730,7 +736,7 @@ static int send_connect(struct c4iw_ep *ep) t5_req->local_ip = la->sin_addr.s_addr; t5_req->peer_ip = ra->sin_addr.s_addr; t5_req->opt0 = cpu_to_be64(opt0); - t5_req->params = cpu_to_be64(V_FILTER_TUPLE( + t5_req->params = cpu_to_be64(FILTER_TUPLE_V( cxgb4_select_ntuple( ep->com.dev->rdev.lldi.ports[0], ep->l2t))); @@ -756,10 +762,10 @@ static int send_connect(struct c4iw_ep *ep) t5_req6->peer_ip_lo = *((__be64 *) (ra6->sin6_addr.s6_addr + 8)); t5_req6->opt0 = cpu_to_be64(opt0); - t5_req6->params = (__force __be64)cpu_to_be32( + t5_req6->params = cpu_to_be64(FILTER_TUPLE_V( cxgb4_select_ntuple( ep->com.dev->rdev.lldi.ports[0], - ep->l2t)); + ep->l2t))); t5_req6->rsvd = cpu_to_be32(isn); PDBG("%s snd_isn %u\n", __func__, be32_to_cpu(t5_req6->rsvd)); @@ -797,16 +803,16 @@ static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb, req = (struct fw_ofld_tx_data_wr *)skb_put(skb, wrlen); memset(req, 0, wrlen); req->op_to_immdlen = cpu_to_be32( - FW_WR_OP(FW_OFLD_TX_DATA_WR) | - FW_WR_COMPL(1) | - FW_WR_IMMDLEN(mpalen)); + FW_WR_OP_V(FW_OFLD_TX_DATA_WR) | + FW_WR_COMPL_F | + FW_WR_IMMDLEN_V(mpalen)); req->flowid_len16 = cpu_to_be32( - FW_WR_FLOWID(ep->hwtid) | - FW_WR_LEN16(wrlen >> 4)); + FW_WR_FLOWID_V(ep->hwtid) | + FW_WR_LEN16_V(wrlen >> 4)); req->plen = cpu_to_be32(mpalen); req->tunnel_to_proxy = cpu_to_be32( - FW_OFLD_TX_DATA_WR_FLUSH(1) | - FW_OFLD_TX_DATA_WR_SHOVE(1)); + FW_OFLD_TX_DATA_WR_FLUSH_F | + FW_OFLD_TX_DATA_WR_SHOVE_F); mpa = (struct mpa_message *)(req + 1); memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)); @@ -891,16 +897,16 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen) req = (struct fw_ofld_tx_data_wr *)skb_put(skb, wrlen); memset(req, 0, wrlen); req->op_to_immdlen = cpu_to_be32( - FW_WR_OP(FW_OFLD_TX_DATA_WR) | - FW_WR_COMPL(1) | - FW_WR_IMMDLEN(mpalen)); + FW_WR_OP_V(FW_OFLD_TX_DATA_WR) | + FW_WR_COMPL_F | + FW_WR_IMMDLEN_V(mpalen)); req->flowid_len16 = cpu_to_be32( - FW_WR_FLOWID(ep->hwtid) | - FW_WR_LEN16(wrlen >> 4)); + FW_WR_FLOWID_V(ep->hwtid) | + FW_WR_LEN16_V(wrlen >> 4)); req->plen = cpu_to_be32(mpalen); req->tunnel_to_proxy = cpu_to_be32( - FW_OFLD_TX_DATA_WR_FLUSH(1) | - FW_OFLD_TX_DATA_WR_SHOVE(1)); + FW_OFLD_TX_DATA_WR_FLUSH_F | + FW_OFLD_TX_DATA_WR_SHOVE_F); mpa = (struct mpa_message *)(req + 1); memset(mpa, 0, sizeof(*mpa)); @@ -971,16 +977,16 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen) req = (struct fw_ofld_tx_data_wr *) skb_put(skb, wrlen); memset(req, 0, wrlen); req->op_to_immdlen = cpu_to_be32( - FW_WR_OP(FW_OFLD_TX_DATA_WR) | - FW_WR_COMPL(1) | - FW_WR_IMMDLEN(mpalen)); + FW_WR_OP_V(FW_OFLD_TX_DATA_WR) | + FW_WR_COMPL_F | + FW_WR_IMMDLEN_V(mpalen)); req->flowid_len16 = cpu_to_be32( - FW_WR_FLOWID(ep->hwtid) | - FW_WR_LEN16(wrlen >> 4)); + FW_WR_FLOWID_V(ep->hwtid) | + FW_WR_LEN16_V(wrlen >> 4)); req->plen = cpu_to_be32(mpalen); req->tunnel_to_proxy = cpu_to_be32( - FW_OFLD_TX_DATA_WR_FLUSH(1) | - FW_OFLD_TX_DATA_WR_SHOVE(1)); + FW_OFLD_TX_DATA_WR_FLUSH_F | + FW_OFLD_TX_DATA_WR_SHOVE_F); mpa = (struct mpa_message *)(req + 1); memset(mpa, 0, sizeof(*mpa)); @@ -1243,15 +1249,15 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits) * due to the limit in the number of bits in the RCV_BUFSIZ field, * then add the overage in to the credits returned. */ - if (ep->rcv_win > RCV_BUFSIZ_MASK * 1024) - credits += ep->rcv_win - RCV_BUFSIZ_MASK * 1024; + if (ep->rcv_win > RCV_BUFSIZ_M * 1024) + credits += ep->rcv_win - RCV_BUFSIZ_M * 1024; req = (struct cpl_rx_data_ack *) skb_put(skb, wrlen); memset(req, 0, wrlen); INIT_TP_WR(req, ep->hwtid); OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK, ep->hwtid)); - req->credit_dack = cpu_to_be32(credits | RX_FORCE_ACK(1) | + req->credit_dack = cpu_to_be32(credits | RX_FORCE_ACK_F | F_RX_DACK_CHANGE | V_RX_DACK_MODE(dack_mode)); set_wr_txq(skb, CPL_PRIORITY_ACK, ep->ctrlq_idx); @@ -1634,7 +1640,8 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) __state_set(&ep->com, MPA_REQ_RCVD); /* drive upcall */ - mutex_lock(&ep->parent_ep->com.mutex); + mutex_lock_nested(&ep->parent_ep->com.mutex, + SINGLE_DEPTH_NESTING); if (ep->parent_ep->com.state != DEAD) { if (connect_request_upcall(ep)) abort_connection(ep, skb, GFP_KERNEL); @@ -1745,7 +1752,7 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) req = (struct fw_ofld_connection_wr *)__skb_put(skb, sizeof(*req)); memset(req, 0, sizeof(*req)); req->op_compl = htonl(V_WR_OP(FW_OFLD_CONNECTION_WR)); - req->len16_pkd = htonl(FW_WR_LEN16(DIV_ROUND_UP(sizeof(*req), 16))); + req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16))); req->le.filter = cpu_to_be32(cxgb4_select_ntuple( ep->com.dev->rdev.lldi.ports[0], ep->l2t)); @@ -1756,14 +1763,15 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) req->le.pport = sin->sin_port; req->le.u.ipv4.pip = sin->sin_addr.s_addr; req->tcb.t_state_to_astid = - htonl(V_FW_OFLD_CONNECTION_WR_T_STATE(TCP_SYN_SENT) | - V_FW_OFLD_CONNECTION_WR_ASTID(atid)); + htonl(FW_OFLD_CONNECTION_WR_T_STATE_V(TCP_SYN_SENT) | + FW_OFLD_CONNECTION_WR_ASTID_V(atid)); req->tcb.cplrxdataack_cplpassacceptrpl = - htons(F_FW_OFLD_CONNECTION_WR_CPLRXDATAACK); + htons(FW_OFLD_CONNECTION_WR_CPLRXDATAACK_F); req->tcb.tx_max = (__force __be32) jiffies; req->tcb.rcv_adv = htons(1); best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, - enable_tcp_timestamps); + enable_tcp_timestamps, + (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1); wscale = compute_wscale(rcv_win); /* @@ -1771,34 +1779,34 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) * remainder will be specified in the rx_data_ack. */ win = ep->rcv_win >> 10; - if (win > RCV_BUFSIZ_MASK) - win = RCV_BUFSIZ_MASK; + if (win > RCV_BUFSIZ_M) + win = RCV_BUFSIZ_M; req->tcb.opt0 = (__force __be64) (TCAM_BYPASS(1) | (nocong ? NO_CONG(1) : 0) | - KEEP_ALIVE(1) | + KEEP_ALIVE_F | DELACK(1) | - WND_SCALE(wscale) | - MSS_IDX(mtu_idx) | - L2T_IDX(ep->l2t->idx) | - TX_CHAN(ep->tx_chan) | - SMAC_SEL(ep->smac_idx) | + WND_SCALE_V(wscale) | + MSS_IDX_V(mtu_idx) | + L2T_IDX_V(ep->l2t->idx) | + TX_CHAN_V(ep->tx_chan) | + SMAC_SEL_V(ep->smac_idx) | DSCP(ep->tos) | - ULP_MODE(ULP_MODE_TCPDDP) | - RCV_BUFSIZ(win)); + ULP_MODE_V(ULP_MODE_TCPDDP) | + RCV_BUFSIZ_V(win)); req->tcb.opt2 = (__force __be32) (PACE(1) | TX_QUEUE(ep->com.dev->rdev.lldi.tx_modq[ep->tx_chan]) | - RX_CHANNEL(0) | + RX_CHANNEL_V(0) | CCTRL_ECN(enable_ecn) | - RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid)); + RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid)); if (enable_tcp_timestamps) - req->tcb.opt2 |= (__force __be32) TSTAMPS_EN(1); + req->tcb.opt2 |= (__force __be32)TSTAMPS_EN(1); if (enable_tcp_sack) - req->tcb.opt2 |= (__force __be32) SACK_EN(1); + req->tcb.opt2 |= (__force __be32)SACK_EN(1); if (wscale && enable_tcp_window_scaling) - req->tcb.opt2 |= (__force __be32) WND_SCALE_EN(1); - req->tcb.opt0 = cpu_to_be64((__force u64) req->tcb.opt0); - req->tcb.opt2 = cpu_to_be32((__force u32) req->tcb.opt2); + req->tcb.opt2 |= (__force __be32)WND_SCALE_EN_F; + req->tcb.opt0 = cpu_to_be64((__force u64)req->tcb.opt0); + req->tcb.opt2 = cpu_to_be32((__force u32)req->tcb.opt2); set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx); set_bit(ACT_OFLD_CONN, &ep->com.history); c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); @@ -2162,7 +2170,8 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, ep->hwtid)); best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, - enable_tcp_timestamps && req->tcpopt.tstamp); + enable_tcp_timestamps && req->tcpopt.tstamp, + (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1); wscale = compute_wscale(rcv_win); /* @@ -2170,28 +2179,28 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, * remainder will be specified in the rx_data_ack. */ win = ep->rcv_win >> 10; - if (win > RCV_BUFSIZ_MASK) - win = RCV_BUFSIZ_MASK; + if (win > RCV_BUFSIZ_M) + win = RCV_BUFSIZ_M; opt0 = (nocong ? NO_CONG(1) : 0) | - KEEP_ALIVE(1) | + KEEP_ALIVE_F | DELACK(1) | - WND_SCALE(wscale) | - MSS_IDX(mtu_idx) | - L2T_IDX(ep->l2t->idx) | - TX_CHAN(ep->tx_chan) | - SMAC_SEL(ep->smac_idx) | + WND_SCALE_V(wscale) | + MSS_IDX_V(mtu_idx) | + L2T_IDX_V(ep->l2t->idx) | + TX_CHAN_V(ep->tx_chan) | + SMAC_SEL_V(ep->smac_idx) | DSCP(ep->tos >> 2) | - ULP_MODE(ULP_MODE_TCPDDP) | - RCV_BUFSIZ(win); - opt2 = RX_CHANNEL(0) | - RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid); + ULP_MODE_V(ULP_MODE_TCPDDP) | + RCV_BUFSIZ_V(win); + opt2 = RX_CHANNEL_V(0) | + RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid); if (enable_tcp_timestamps && req->tcpopt.tstamp) opt2 |= TSTAMPS_EN(1); if (enable_tcp_sack && req->tcpopt.sack) opt2 |= SACK_EN(1); if (wscale && enable_tcp_window_scaling) - opt2 |= WND_SCALE_EN(1); + opt2 |= WND_SCALE_EN_F; if (enable_ecn) { const struct tcphdr *tcph; u32 hlen = ntohl(req->hdr_len); @@ -2203,7 +2212,7 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, } if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { u32 isn = (prandom_u32() & ~7UL) - 1; - opt2 |= T5_OPT_2_VALID; + opt2 |= T5_OPT_2_VALID_F; opt2 |= V_CONG_CNTRL(CONG_ALG_TAHOE); opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */ rpl5 = (void *)rpl; @@ -3118,6 +3127,8 @@ static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep) err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait, 0, 0, __func__); + else if (err > 0) + err = net_xmit_errno(err); if (err) pr_err("cxgb4_create_server6/filter failed err %d stid %d laddr %pI6 lport %d\n", err, ep->stid, @@ -3151,6 +3162,8 @@ static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep) err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait, 0, 0, __func__); + else if (err > 0) + err = net_xmit_errno(err); } if (err) pr_err("cxgb4_create_server/filter failed err %d stid %d laddr %pI4 lport %d\n" @@ -3529,9 +3542,9 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb, req_skb = alloc_skb(sizeof(struct fw_ofld_connection_wr), GFP_KERNEL); req = (struct fw_ofld_connection_wr *)__skb_put(req_skb, sizeof(*req)); memset(req, 0, sizeof(*req)); - req->op_compl = htonl(V_WR_OP(FW_OFLD_CONNECTION_WR) | FW_WR_COMPL(1)); - req->len16_pkd = htonl(FW_WR_LEN16(DIV_ROUND_UP(sizeof(*req), 16))); - req->le.version_cpl = htonl(F_FW_OFLD_CONNECTION_WR_CPL); + req->op_compl = htonl(V_WR_OP(FW_OFLD_CONNECTION_WR) | FW_WR_COMPL_F); + req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16))); + req->le.version_cpl = htonl(FW_OFLD_CONNECTION_WR_CPL_F); req->le.filter = (__force __be32) filter; req->le.lport = lport; req->le.pport = rport; @@ -3540,16 +3553,16 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb, req->tcb.rcv_nxt = htonl(rcv_isn + 1); req->tcb.rcv_adv = htons(window); req->tcb.t_state_to_astid = - htonl(V_FW_OFLD_CONNECTION_WR_T_STATE(TCP_SYN_RECV) | - V_FW_OFLD_CONNECTION_WR_RCV_SCALE(cpl->tcpopt.wsf) | - V_FW_OFLD_CONNECTION_WR_ASTID( + htonl(FW_OFLD_CONNECTION_WR_T_STATE_V(TCP_SYN_RECV) | + FW_OFLD_CONNECTION_WR_RCV_SCALE_V(cpl->tcpopt.wsf) | + FW_OFLD_CONNECTION_WR_ASTID_V( GET_PASS_OPEN_TID(ntohl(cpl->tos_stid)))); /* * We store the qid in opt2 which will be used by the firmware * to send us the wr response. */ - req->tcb.opt2 = htonl(V_RSS_QUEUE(rss_qid)); + req->tcb.opt2 = htonl(RSS_QUEUE_V(rss_qid)); /* * We initialize the MSS index in TCB to 0xF. @@ -3557,7 +3570,7 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb, * TCB picks up the correct value. If this was 0 * TP will ignore any value > 0 for MSS index. */ - req->tcb.opt0 = cpu_to_be64(V_MSS_IDX(0xF)); + req->tcb.opt0 = cpu_to_be64(MSS_IDX_V(0xF)); req->cookie = (unsigned long)skb; set_wr_txq(req_skb, CPL_PRIORITY_CONTROL, port_id); diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 0f773e78e080..e9fd3a029296 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -51,9 +51,9 @@ static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len); memset(res_wr, 0, wr_len); res_wr->op_nres = cpu_to_be32( - FW_WR_OP(FW_RI_RES_WR) | + FW_WR_OP_V(FW_RI_RES_WR) | V_FW_RI_RES_WR_NRES(1) | - FW_WR_COMPL(1)); + FW_WR_COMPL_F); res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); res_wr->cookie = (unsigned long) &wr_wait; res = res_wr->res; @@ -121,9 +121,9 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len); memset(res_wr, 0, wr_len); res_wr->op_nres = cpu_to_be32( - FW_WR_OP(FW_RI_RES_WR) | + FW_WR_OP_V(FW_RI_RES_WR) | V_FW_RI_RES_WR_NRES(1) | - FW_WR_COMPL(1)); + FW_WR_COMPL_F); res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); res_wr->cookie = (unsigned long) &wr_wait; res = res_wr->res; diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index f25df5276c22..eb5df4e62703 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -60,7 +60,7 @@ int c4iw_wr_log = 0; module_param(c4iw_wr_log, int, 0444); MODULE_PARM_DESC(c4iw_wr_log, "Enables logging of work request timing data."); -int c4iw_wr_log_size_order = 12; +static int c4iw_wr_log_size_order = 12; module_param(c4iw_wr_log_size_order, int, 0444); MODULE_PARM_DESC(c4iw_wr_log_size_order, "Number of entries (log2) in the work request timing log."); @@ -670,7 +670,7 @@ static int ep_open(struct inode *inode, struct file *file) idr_for_each(&epd->devp->stid_idr, count_idrs, &count); spin_unlock_irq(&epd->devp->lock); - epd->bufsize = count * 160; + epd->bufsize = count * 240; epd->buf = vmalloc(epd->bufsize); if (!epd->buf) { ret = -ENOMEM; diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index ec7a2988a703..cb43c2299ac0 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -50,6 +50,13 @@ static int inline_threshold = C4IW_INLINE_THRESHOLD; module_param(inline_threshold, int, 0644); MODULE_PARM_DESC(inline_threshold, "inline vs dsgl threshold (default=128)"); +static int mr_exceeds_hw_limits(struct c4iw_dev *dev, u64 length) +{ + return (is_t4(dev->rdev.lldi.adapter_type) || + is_t5(dev->rdev.lldi.adapter_type)) && + length >= 8*1024*1024*1024ULL; +} + static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr, u32 len, dma_addr_t data, int wait) { @@ -74,18 +81,18 @@ static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr, req = (struct ulp_mem_io *)__skb_put(skb, wr_len); memset(req, 0, wr_len); INIT_ULPTX_WR(req, wr_len, 0, 0); - req->wr.wr_hi = cpu_to_be32(FW_WR_OP(FW_ULPTX_WR) | - (wait ? FW_WR_COMPL(1) : 0)); + req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR) | + (wait ? FW_WR_COMPL_F : 0)); req->wr.wr_lo = wait ? (__force __be64)(unsigned long) &wr_wait : 0L; - req->wr.wr_mid = cpu_to_be32(FW_WR_LEN16(DIV_ROUND_UP(wr_len, 16))); - req->cmd = cpu_to_be32(ULPTX_CMD(ULP_TX_MEM_WRITE)); + req->wr.wr_mid = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(wr_len, 16))); + req->cmd = cpu_to_be32(ULPTX_CMD_V(ULP_TX_MEM_WRITE)); req->cmd |= cpu_to_be32(V_T5_ULP_MEMIO_ORDER(1)); - req->dlen = cpu_to_be32(ULP_MEMIO_DATA_LEN(len>>5)); + req->dlen = cpu_to_be32(ULP_MEMIO_DATA_LEN_V(len>>5)); req->len16 = cpu_to_be32(DIV_ROUND_UP(wr_len-sizeof(req->wr), 16)); - req->lock_addr = cpu_to_be32(ULP_MEMIO_ADDR(addr)); + req->lock_addr = cpu_to_be32(ULP_MEMIO_ADDR_V(addr)); sgl = (struct ulptx_sgl *)(req + 1); - sgl->cmd_nsge = cpu_to_be32(ULPTX_CMD(ULP_TX_SC_DSGL) | + sgl->cmd_nsge = cpu_to_be32(ULPTX_CMD_V(ULP_TX_SC_DSGL) | ULPTX_NSGE(1)); sgl->len0 = cpu_to_be32(len); sgl->addr0 = cpu_to_be64(data); @@ -107,12 +114,12 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len, u8 wr_len, *to_dp, *from_dp; int copy_len, num_wqe, i, ret = 0; struct c4iw_wr_wait wr_wait; - __be32 cmd = cpu_to_be32(ULPTX_CMD(ULP_TX_MEM_WRITE)); + __be32 cmd = cpu_to_be32(ULPTX_CMD_V(ULP_TX_MEM_WRITE)); if (is_t4(rdev->lldi.adapter_type)) - cmd |= cpu_to_be32(ULP_MEMIO_ORDER(1)); + cmd |= cpu_to_be32(ULP_MEMIO_ORDER_F); else - cmd |= cpu_to_be32(V_T5_ULP_MEMIO_IMM(1)); + cmd |= cpu_to_be32(T5_ULP_MEMIO_IMM_F); addr &= 0x7FFFFFF; PDBG("%s addr 0x%x len %u\n", __func__, addr, len); @@ -135,23 +142,23 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len, INIT_ULPTX_WR(req, wr_len, 0, 0); if (i == (num_wqe-1)) { - req->wr.wr_hi = cpu_to_be32(FW_WR_OP(FW_ULPTX_WR) | - FW_WR_COMPL(1)); + req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR) | + FW_WR_COMPL_F); req->wr.wr_lo = (__force __be64)(unsigned long) &wr_wait; } else - req->wr.wr_hi = cpu_to_be32(FW_WR_OP(FW_ULPTX_WR)); + req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR)); req->wr.wr_mid = cpu_to_be32( - FW_WR_LEN16(DIV_ROUND_UP(wr_len, 16))); + FW_WR_LEN16_V(DIV_ROUND_UP(wr_len, 16))); req->cmd = cmd; - req->dlen = cpu_to_be32(ULP_MEMIO_DATA_LEN( + req->dlen = cpu_to_be32(ULP_MEMIO_DATA_LEN_V( DIV_ROUND_UP(copy_len, T4_ULPTX_MIN_IO))); req->len16 = cpu_to_be32(DIV_ROUND_UP(wr_len-sizeof(req->wr), 16)); - req->lock_addr = cpu_to_be32(ULP_MEMIO_ADDR(addr + i * 3)); + req->lock_addr = cpu_to_be32(ULP_MEMIO_ADDR_V(addr + i * 3)); sc = (struct ulptx_idata *)(req + 1); - sc->cmd_more = cpu_to_be32(ULPTX_CMD(ULP_TX_SC_IMM)); + sc->cmd_more = cpu_to_be32(ULPTX_CMD_V(ULP_TX_SC_IMM)); sc->len = cpu_to_be32(roundup(copy_len, T4_ULPTX_MIN_IO)); to_dp = (u8 *)(sc + 1); @@ -369,9 +376,11 @@ static int register_mem(struct c4iw_dev *rhp, struct c4iw_pd *php, int ret; ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, mhp->attr.pdid, - FW_RI_STAG_NSMR, mhp->attr.perms, + FW_RI_STAG_NSMR, mhp->attr.len ? + mhp->attr.perms : 0, mhp->attr.mw_bind_enable, mhp->attr.zbva, - mhp->attr.va_fbo, mhp->attr.len, shift - 12, + mhp->attr.va_fbo, mhp->attr.len ? + mhp->attr.len : -1, shift - 12, mhp->attr.pbl_size, mhp->attr.pbl_addr); if (ret) return ret; @@ -536,6 +545,11 @@ int c4iw_reregister_phys_mem(struct ib_mr *mr, int mr_rereg_mask, return ret; } + if (mr_exceeds_hw_limits(rhp, total_size)) { + kfree(page_list); + return -EINVAL; + } + ret = reregister_mem(rhp, php, &mh, shift, npages); kfree(page_list); if (ret) @@ -596,6 +610,12 @@ struct ib_mr *c4iw_register_phys_mem(struct ib_pd *pd, if (ret) goto err; + if (mr_exceeds_hw_limits(rhp, total_size)) { + kfree(page_list); + ret = -EINVAL; + goto err; + } + ret = alloc_pbl(mhp, npages); if (ret) { kfree(page_list); @@ -699,6 +719,10 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, php = to_c4iw_pd(pd); rhp = php->rhp; + + if (mr_exceeds_hw_limits(rhp, length)) + return ERR_PTR(-EINVAL); + mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); if (!mhp) return ERR_PTR(-ENOMEM); diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 72e3b69d1b76..66bd6a2ad83b 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -408,10 +408,10 @@ static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, PDBG("%s dev 0x%p\n", __func__, dev); return sprintf(buf, "%u.%u.%u.%u\n", - FW_HDR_FW_VER_MAJOR_GET(c4iw_dev->rdev.lldi.fw_vers), - FW_HDR_FW_VER_MINOR_GET(c4iw_dev->rdev.lldi.fw_vers), - FW_HDR_FW_VER_MICRO_GET(c4iw_dev->rdev.lldi.fw_vers), - FW_HDR_FW_VER_BUILD_GET(c4iw_dev->rdev.lldi.fw_vers)); + FW_HDR_FW_VER_MAJOR_G(c4iw_dev->rdev.lldi.fw_vers), + FW_HDR_FW_VER_MINOR_G(c4iw_dev->rdev.lldi.fw_vers), + FW_HDR_FW_VER_MICRO_G(c4iw_dev->rdev.lldi.fw_vers), + FW_HDR_FW_VER_BUILD_G(c4iw_dev->rdev.lldi.fw_vers)); } static ssize_t show_hca(struct device *dev, struct device_attribute *attr, diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 41cd6882b648..bb85d479e66e 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -271,9 +271,9 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len); memset(res_wr, 0, wr_len); res_wr->op_nres = cpu_to_be32( - FW_WR_OP(FW_RI_RES_WR) | + FW_WR_OP_V(FW_RI_RES_WR) | V_FW_RI_RES_WR_NRES(2) | - FW_WR_COMPL(1)); + FW_WR_COMPL_F); res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); res_wr->cookie = (unsigned long) &wr_wait; res = res_wr->res; @@ -1082,10 +1082,10 @@ static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe, wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe)); memset(wqe, 0, sizeof *wqe); - wqe->op_compl = cpu_to_be32(FW_WR_OP(FW_RI_INIT_WR)); + wqe->op_compl = cpu_to_be32(FW_WR_OP_V(FW_RI_INIT_WR)); wqe->flowid_len16 = cpu_to_be32( - FW_WR_FLOWID(qhp->ep->hwtid) | - FW_WR_LEN16(DIV_ROUND_UP(sizeof *wqe, 16))); + FW_WR_FLOWID_V(qhp->ep->hwtid) | + FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16))); wqe->u.terminate.type = FW_RI_TYPE_TERMINATE; wqe->u.terminate.immdlen = cpu_to_be32(sizeof *term); @@ -1204,11 +1204,11 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp, wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe)); memset(wqe, 0, sizeof *wqe); wqe->op_compl = cpu_to_be32( - FW_WR_OP(FW_RI_INIT_WR) | - FW_WR_COMPL(1)); + FW_WR_OP_V(FW_RI_INIT_WR) | + FW_WR_COMPL_F); wqe->flowid_len16 = cpu_to_be32( - FW_WR_FLOWID(ep->hwtid) | - FW_WR_LEN16(DIV_ROUND_UP(sizeof *wqe, 16))); + FW_WR_FLOWID_V(ep->hwtid) | + FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16))); wqe->cookie = (unsigned long) &ep->com.wr_wait; wqe->u.fini.type = FW_RI_TYPE_FINI; @@ -1273,11 +1273,11 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp) wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe)); memset(wqe, 0, sizeof *wqe); wqe->op_compl = cpu_to_be32( - FW_WR_OP(FW_RI_INIT_WR) | - FW_WR_COMPL(1)); + FW_WR_OP_V(FW_RI_INIT_WR) | + FW_WR_COMPL_F); wqe->flowid_len16 = cpu_to_be32( - FW_WR_FLOWID(qhp->ep->hwtid) | - FW_WR_LEN16(DIV_ROUND_UP(sizeof *wqe, 16))); + FW_WR_FLOWID_V(qhp->ep->hwtid) | + FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16))); wqe->cookie = (unsigned long) &qhp->ep->com.wr_wait; @@ -1538,9 +1538,9 @@ err: set_state(qhp, C4IW_QP_STATE_ERROR); free = 1; abort = 1; - wake_up(&qhp->wait); BUG_ON(!ep); flush_qp(qhp); + wake_up(&qhp->wait); out: mutex_unlock(&qhp->mutex); diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c index 3488e8c9fcb4..f914b30999f8 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.c +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c @@ -399,7 +399,7 @@ reg_user_mr_fallback: pginfo.num_kpages = num_kpages; pginfo.num_hwpages = num_hwpages; pginfo.u.usr.region = e_mr->umem; - pginfo.next_hwpage = e_mr->umem->offset / hwpage_size; + pginfo.next_hwpage = ib_umem_offset(e_mr->umem) / hwpage_size; pginfo.u.usr.next_sg = pginfo.u.usr.region->sg_head.sgl; ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags, e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index e0c404bdc4a8..4977082e081f 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -82,7 +82,6 @@ static int create_file(const char *name, umode_t mode, { int error; - *dentry = NULL; mutex_lock(&parent->d_inode->i_mutex); *dentry = lookup_one_len(name, parent, strlen(name)); if (!IS_ERR(*dentry)) diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c index 5e61e9bff697..c7278f6a8217 100644 --- a/drivers/infiniband/hw/ipath/ipath_mr.c +++ b/drivers/infiniband/hw/ipath/ipath_mr.c @@ -214,7 +214,7 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->mr.user_base = start; mr->mr.iova = virt_addr; mr->mr.length = length; - mr->mr.offset = umem->offset; + mr->mr.offset = ib_umem_offset(umem); mr->mr.access_flags = mr_access_flags; mr->mr.max_segs = n; mr->umem = umem; diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 1066eec854a9..a3b70f6c4035 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -233,7 +233,10 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector if (err) goto err_dbmap; - cq->mcq.comp = mlx4_ib_cq_comp; + if (context) + cq->mcq.tasklet_ctx.comp = mlx4_ib_cq_comp; + else + cq->mcq.comp = mlx4_ib_cq_comp; cq->mcq.event = mlx4_ib_cq_event; if (context) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index bda5994ceb68..57ecc5b204f3 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1173,18 +1173,24 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i], &mflow->reg_id[i]); if (err) - goto err_free; + goto err_create_flow; i++; } if (i < ARRAY_SIZE(type) && flow_attr->type == IB_FLOW_ATTR_NORMAL) { err = mlx4_ib_tunnel_steer_add(qp, flow_attr, &mflow->reg_id[i]); if (err) - goto err_free; + goto err_create_flow; + i++; } return &mflow->ibflow; +err_create_flow: + while (i) { + (void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev, mflow->reg_id[i]); + i--; + } err_free: kfree(mflow); return ERR_PTR(err); @@ -1969,8 +1975,7 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) dev->caps.num_ports > dev->caps.comp_pool) return; - eq_per_port = rounddown_pow_of_two(dev->caps.comp_pool/ - dev->caps.num_ports); + eq_per_port = dev->caps.comp_pool / dev->caps.num_ports; /* Init eq table */ added_eqs = 0; @@ -2222,7 +2227,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS; err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count, MLX4_IB_UC_STEER_QPN_ALIGN, - &ibdev->steer_qpn_base); + &ibdev->steer_qpn_base, 0); if (err) goto err_counter; diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 8f9325cfc85d..c36ccbd9a644 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -223,7 +223,6 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, if (flags & IB_MR_REREG_TRANS) { int shift; - int err; int n; mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 9c5150c3cb31..cf000b7ad64f 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -802,16 +802,21 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, } } } else { - /* Raw packet QPNs must be aligned to 8 bits. If not, the WQE - * BlueFlame setup flow wrongly causes VLAN insertion. */ + /* Raw packet QPNs may not have bits 6,7 set in their qp_num; + * otherwise, the WQE BlueFlame setup flow wrongly causes + * VLAN insertion. */ if (init_attr->qp_type == IB_QPT_RAW_PACKET) - err = mlx4_qp_reserve_range(dev->dev, 1, 1 << 8, &qpn); + err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn, + (init_attr->cap.max_send_wr ? + MLX4_RESERVE_ETH_BF_QP : 0) | + (init_attr->cap.max_recv_wr ? + MLX4_RESERVE_A0_QP : 0)); else if (qp->flags & MLX4_IB_QP_NETIF) err = mlx4_ib_steer_qp_alloc(dev, 1, &qpn); else err = mlx4_qp_reserve_range(dev->dev, 1, 1, - &qpn); + &qpn, 0); if (err) goto err_proxy; } diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index 4ea0135af484..27a70159e2ea 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -1,3 +1,4 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o +mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index e4056279166d..c463e7bba5f4 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -752,7 +752,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries, return ERR_PTR(-EINVAL); entries = roundup_pow_of_two(entries + 1); - if (entries > dev->mdev->caps.max_cqes) + if (entries > dev->mdev->caps.gen.max_cqes) return ERR_PTR(-EINVAL); cq = kzalloc(sizeof(*cq), GFP_KERNEL); @@ -805,14 +805,14 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries, } - mlx5_vfree(cqb); + kvfree(cqb); return &cq->ibcq; err_cmd: mlx5_core_destroy_cq(dev->mdev, &cq->mcq); err_cqb: - mlx5_vfree(cqb); + kvfree(cqb); if (context) destroy_cq_user(cq, context); else @@ -919,7 +919,7 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) int err; u32 fsel; - if (!(dev->mdev->caps.flags & MLX5_DEV_CAP_FLAG_CQ_MODER)) + if (!(dev->mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_CQ_MODER)) return -ENOSYS; in = kzalloc(sizeof(*in), GFP_KERNEL); @@ -1074,7 +1074,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) int uninitialized_var(cqe_size); unsigned long flags; - if (!(dev->mdev->caps.flags & MLX5_DEV_CAP_FLAG_RESIZE_CQ)) { + if (!(dev->mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_RESIZE_CQ)) { pr_info("Firmware does not support resize CQ\n"); return -ENOSYS; } @@ -1083,7 +1083,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) return -EINVAL; entries = roundup_pow_of_two(entries + 1); - if (entries > dev->mdev->caps.max_cqes + 1) + if (entries > dev->mdev->caps.gen.max_cqes + 1) return -EINVAL; if (entries == ibcq->cqe + 1) @@ -1159,11 +1159,11 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) } mutex_unlock(&cq->resize_mutex); - mlx5_vfree(in); + kvfree(in); return 0; ex_alloc: - mlx5_vfree(in); + kvfree(in); ex_resize: if (udata) diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index b514bbb5610f..657af9a1167c 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -129,7 +129,7 @@ int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port) packet_error = be16_to_cpu(out_mad->status); - dev->mdev->caps.ext_port_cap[port - 1] = (!err && !packet_error) ? + dev->mdev->caps.gen.ext_port_cap[port - 1] = (!err && !packet_error) ? MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO : 0; out: diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index d8907b20522a..8a87404e9c76 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -157,11 +157,13 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, struct mlx5_ib_dev *dev = to_mdev(ibdev); struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; + struct mlx5_general_caps *gen; int err = -ENOMEM; int max_rq_sg; int max_sq_sg; u64 flags; + gen = &dev->mdev->caps.gen; in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); if (!in_mad || !out_mad) @@ -183,7 +185,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN; - flags = dev->mdev->caps.flags; + flags = gen->flags; if (flags & MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR) props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; if (flags & MLX5_DEV_CAP_FLAG_BAD_QKEY_CNTR) @@ -213,34 +215,41 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, memcpy(&props->sys_image_guid, out_mad->data + 4, 8); props->max_mr_size = ~0ull; - props->page_size_cap = dev->mdev->caps.min_page_sz; - props->max_qp = 1 << dev->mdev->caps.log_max_qp; - props->max_qp_wr = dev->mdev->caps.max_wqes; - max_rq_sg = dev->mdev->caps.max_rq_desc_sz / sizeof(struct mlx5_wqe_data_seg); - max_sq_sg = (dev->mdev->caps.max_sq_desc_sz - sizeof(struct mlx5_wqe_ctrl_seg)) / + props->page_size_cap = gen->min_page_sz; + props->max_qp = 1 << gen->log_max_qp; + props->max_qp_wr = gen->max_wqes; + max_rq_sg = gen->max_rq_desc_sz / sizeof(struct mlx5_wqe_data_seg); + max_sq_sg = (gen->max_sq_desc_sz - sizeof(struct mlx5_wqe_ctrl_seg)) / sizeof(struct mlx5_wqe_data_seg); props->max_sge = min(max_rq_sg, max_sq_sg); - props->max_cq = 1 << dev->mdev->caps.log_max_cq; - props->max_cqe = dev->mdev->caps.max_cqes - 1; - props->max_mr = 1 << dev->mdev->caps.log_max_mkey; - props->max_pd = 1 << dev->mdev->caps.log_max_pd; - props->max_qp_rd_atom = dev->mdev->caps.max_ra_req_qp; - props->max_qp_init_rd_atom = dev->mdev->caps.max_ra_res_qp; + props->max_cq = 1 << gen->log_max_cq; + props->max_cqe = gen->max_cqes - 1; + props->max_mr = 1 << gen->log_max_mkey; + props->max_pd = 1 << gen->log_max_pd; + props->max_qp_rd_atom = 1 << gen->log_max_ra_req_qp; + props->max_qp_init_rd_atom = 1 << gen->log_max_ra_res_qp; + props->max_srq = 1 << gen->log_max_srq; + props->max_srq_wr = gen->max_srq_wqes - 1; + props->local_ca_ack_delay = gen->local_ca_ack_delay; props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; - props->max_srq = 1 << dev->mdev->caps.log_max_srq; - props->max_srq_wr = dev->mdev->caps.max_srq_wqes - 1; props->max_srq_sge = max_rq_sg - 1; props->max_fast_reg_page_list_len = (unsigned int)-1; - props->local_ca_ack_delay = dev->mdev->caps.local_ca_ack_delay; + props->local_ca_ack_delay = gen->local_ca_ack_delay; props->atomic_cap = IB_ATOMIC_NONE; props->masked_atomic_cap = IB_ATOMIC_NONE; props->max_pkeys = be16_to_cpup((__be16 *)(out_mad->data + 28)); - props->max_mcast_grp = 1 << dev->mdev->caps.log_max_mcg; - props->max_mcast_qp_attach = dev->mdev->caps.max_qp_mcg; + props->max_mcast_grp = 1 << gen->log_max_mcg; + props->max_mcast_qp_attach = gen->max_qp_mcg; props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * props->max_mcast_grp; props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */ +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + if (dev->mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_ON_DMND_PG) + props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING; + props->odp_caps = dev->odp_caps; +#endif + out: kfree(in_mad); kfree(out_mad); @@ -254,10 +263,12 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, struct mlx5_ib_dev *dev = to_mdev(ibdev); struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; + struct mlx5_general_caps *gen; int ext_active_speed; int err = -ENOMEM; - if (port < 1 || port > dev->mdev->caps.num_ports) { + gen = &dev->mdev->caps.gen; + if (port < 1 || port > gen->num_ports) { mlx5_ib_warn(dev, "invalid port number %d\n", port); return -EINVAL; } @@ -288,8 +299,8 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, props->phys_state = out_mad->data[33] >> 4; props->port_cap_flags = be32_to_cpup((__be32 *)(out_mad->data + 20)); props->gid_tbl_len = out_mad->data[50]; - props->max_msg_sz = 1 << to_mdev(ibdev)->mdev->caps.log_max_msg; - props->pkey_tbl_len = to_mdev(ibdev)->mdev->caps.port[port - 1].pkey_table_len; + props->max_msg_sz = 1 << gen->log_max_msg; + props->pkey_tbl_len = gen->port[port - 1].pkey_table_len; props->bad_pkey_cntr = be16_to_cpup((__be16 *)(out_mad->data + 46)); props->qkey_viol_cntr = be16_to_cpup((__be16 *)(out_mad->data + 48)); props->active_width = out_mad->data[31] & 0xf; @@ -316,7 +327,7 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, /* If reported active speed is QDR, check if is FDR-10 */ if (props->active_speed == 4) { - if (dev->mdev->caps.ext_port_cap[port - 1] & + if (gen->ext_port_cap[port - 1] & MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO) { init_query_mad(in_mad); in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO; @@ -470,6 +481,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, struct mlx5_ib_alloc_ucontext_req_v2 req; struct mlx5_ib_alloc_ucontext_resp resp; struct mlx5_ib_ucontext *context; + struct mlx5_general_caps *gen; struct mlx5_uuar_info *uuari; struct mlx5_uar *uars; int gross_uuars; @@ -480,6 +492,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, int i; size_t reqlen; + gen = &dev->mdev->caps.gen; if (!dev->ib_active) return ERR_PTR(-EAGAIN); @@ -512,14 +525,14 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE; gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE; - resp.qp_tab_size = 1 << dev->mdev->caps.log_max_qp; - resp.bf_reg_size = dev->mdev->caps.bf_reg_size; + resp.qp_tab_size = 1 << gen->log_max_qp; + resp.bf_reg_size = gen->bf_reg_size; resp.cache_line_size = L1_CACHE_BYTES; - resp.max_sq_desc_sz = dev->mdev->caps.max_sq_desc_sz; - resp.max_rq_desc_sz = dev->mdev->caps.max_rq_desc_sz; - resp.max_send_wqebb = dev->mdev->caps.max_wqes; - resp.max_recv_wr = dev->mdev->caps.max_wqes; - resp.max_srq_recv_wr = dev->mdev->caps.max_srq_wqes; + resp.max_sq_desc_sz = gen->max_sq_desc_sz; + resp.max_rq_desc_sz = gen->max_rq_desc_sz; + resp.max_send_wqebb = gen->max_wqes; + resp.max_recv_wr = gen->max_wqes; + resp.max_srq_recv_wr = gen->max_srq_wqes; context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) @@ -561,11 +574,15 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, goto out_count; } +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range; +#endif + INIT_LIST_HEAD(&context->db_page_list); mutex_init(&context->db_page_mutex); resp.tot_uuars = req.total_num_uuars; - resp.num_ports = dev->mdev->caps.num_ports; + resp.num_ports = gen->num_ports; err = ib_copy_to_udata(udata, &resp, sizeof(resp) - sizeof(resp.reserved)); if (err) @@ -650,13 +667,13 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm return -EINVAL; idx = get_index(vma->vm_pgoff); + if (idx >= uuari->num_uars) + return -EINVAL; + pfn = uar_index2pfn(dev, uuari->uars[idx].index); mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx, (unsigned long long)pfn); - if (idx >= uuari->num_uars) - return -EINVAL; - vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); if (io_remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE, vma->vm_page_prot)) @@ -851,7 +868,7 @@ static ssize_t show_reg_pages(struct device *device, struct mlx5_ib_dev *dev = container_of(device, struct mlx5_ib_dev, ib_dev.dev); - return sprintf(buf, "%d\n", dev->mdev->priv.reg_pages); + return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages)); } static ssize_t show_hca(struct device *device, struct device_attribute *attr, @@ -967,9 +984,11 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, static void get_ext_port_caps(struct mlx5_ib_dev *dev) { + struct mlx5_general_caps *gen; int port; - for (port = 1; port <= dev->mdev->caps.num_ports; port++) + gen = &dev->mdev->caps.gen; + for (port = 1; port <= gen->num_ports; port++) mlx5_query_ext_port_caps(dev, port); } @@ -977,9 +996,11 @@ static int get_port_caps(struct mlx5_ib_dev *dev) { struct ib_device_attr *dprops = NULL; struct ib_port_attr *pprops = NULL; + struct mlx5_general_caps *gen; int err = 0; int port; + gen = &dev->mdev->caps.gen; pprops = kmalloc(sizeof(*pprops), GFP_KERNEL); if (!pprops) goto out; @@ -994,14 +1015,14 @@ static int get_port_caps(struct mlx5_ib_dev *dev) goto out; } - for (port = 1; port <= dev->mdev->caps.num_ports; port++) { + for (port = 1; port <= gen->num_ports; port++) { err = mlx5_ib_query_port(&dev->ib_dev, port, pprops); if (err) { mlx5_ib_warn(dev, "query_port %d failed %d\n", port, err); break; } - dev->mdev->caps.port[port - 1].pkey_table_len = dprops->max_pkeys; - dev->mdev->caps.port[port - 1].gid_table_len = pprops->gid_tbl_len; + gen->port[port - 1].pkey_table_len = dprops->max_pkeys; + gen->port[port - 1].gid_table_len = pprops->gid_tbl_len; mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n", dprops->max_pkeys, pprops->gid_tbl_len); } @@ -1279,8 +1300,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX); dev->ib_dev.owner = THIS_MODULE; dev->ib_dev.node_type = RDMA_NODE_IB_CA; - dev->ib_dev.local_dma_lkey = mdev->caps.reserved_lkey; - dev->num_ports = mdev->caps.num_ports; + dev->ib_dev.local_dma_lkey = mdev->caps.gen.reserved_lkey; + dev->num_ports = mdev->caps.gen.num_ports; dev->ib_dev.phys_port_cnt = dev->num_ports; dev->ib_dev.num_comp_vectors = dev->num_comp_vectors; dev->ib_dev.dma_device = &mdev->pdev->dev; @@ -1310,6 +1331,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | (1ull << IB_USER_VERBS_CMD_OPEN_QP); + dev->ib_dev.uverbs_ex_cmd_mask = + (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE); dev->ib_dev.query_device = mlx5_ib_query_device; dev->ib_dev.query_port = mlx5_ib_query_port; @@ -1355,7 +1378,9 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list; dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; - if (mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC) { + mlx5_ib_internal_query_odp_caps(dev); + + if (mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_XRC) { dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd; dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd; dev->ib_dev.uverbs_cmd_mask |= @@ -1368,16 +1393,19 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) goto err_eqs; mutex_init(&dev->cap_mask_mutex); - spin_lock_init(&dev->mr_lock); err = create_dev_resources(&dev->devr); if (err) goto err_eqs; - err = ib_register_device(&dev->ib_dev, NULL); + err = mlx5_ib_odp_init_one(dev); if (err) goto err_rsrc; + err = ib_register_device(&dev->ib_dev, NULL); + if (err) + goto err_odp; + err = create_umr_res(dev); if (err) goto err_dev; @@ -1399,6 +1427,9 @@ err_umrc: err_dev: ib_unregister_device(&dev->ib_dev); +err_odp: + mlx5_ib_odp_remove_one(dev); + err_rsrc: destroy_dev_resources(&dev->devr); @@ -1414,8 +1445,10 @@ err_dealloc: static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) { struct mlx5_ib_dev *dev = context; - destroy_umrc_res(dev); + ib_unregister_device(&dev->ib_dev); + destroy_umrc_res(dev); + mlx5_ib_odp_remove_one(dev); destroy_dev_resources(&dev->devr); free_comp_eqs(dev); ib_dealloc_device(&dev->ib_dev); @@ -1429,15 +1462,30 @@ static struct mlx5_interface mlx5_ib_interface = { static int __init mlx5_ib_init(void) { + int err; + if (deprecated_prof_sel != 2) pr_warn("prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n"); - return mlx5_register_interface(&mlx5_ib_interface); + err = mlx5_ib_odp_init(); + if (err) + return err; + + err = mlx5_register_interface(&mlx5_ib_interface); + if (err) + goto clean_odp; + + return err; + +clean_odp: + mlx5_ib_odp_cleanup(); + return err; } static void __exit mlx5_ib_cleanup(void) { mlx5_unregister_interface(&mlx5_ib_interface); + mlx5_ib_odp_cleanup(); } module_init(mlx5_ib_init); diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index a3e81444c825..b56e4c5593ee 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -32,6 +32,7 @@ #include <linux/module.h> #include <rdma/ib_umem.h> +#include <rdma/ib_umem_odp.h> #include "mlx5_ib.h" /* @umem: umem object to scan @@ -55,16 +56,28 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, u64 pfn; struct scatterlist *sg; int entry; + unsigned long page_shift = ilog2(umem->page_size); - addr = addr >> PAGE_SHIFT; + /* With ODP we must always match OS page size. */ + if (umem->odp_data) { + *count = ib_umem_page_count(umem); + *shift = PAGE_SHIFT; + *ncont = *count; + if (order) + *order = ilog2(roundup_pow_of_two(*count)); + + return; + } + + addr = addr >> page_shift; tmp = (unsigned long)addr; m = find_first_bit(&tmp, sizeof(tmp)); skip = 1 << m; mask = skip - 1; i = 0; for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - len = sg_dma_len(sg) >> PAGE_SHIFT; - pfn = sg_dma_address(sg) >> PAGE_SHIFT; + len = sg_dma_len(sg) >> page_shift; + pfn = sg_dma_address(sg) >> page_shift; for (k = 0; k < len; k++) { if (!(i & mask)) { tmp = (unsigned long)pfn; @@ -103,14 +116,43 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, *ncont = 0; } - *shift = PAGE_SHIFT + m; + *shift = page_shift + m; *count = i; } -void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, - int page_shift, __be64 *pas, int umr) +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING +static u64 umem_dma_to_mtt(dma_addr_t umem_dma) { - int shift = page_shift - PAGE_SHIFT; + u64 mtt_entry = umem_dma & ODP_DMA_ADDR_MASK; + + if (umem_dma & ODP_READ_ALLOWED_BIT) + mtt_entry |= MLX5_IB_MTT_READ; + if (umem_dma & ODP_WRITE_ALLOWED_BIT) + mtt_entry |= MLX5_IB_MTT_WRITE; + + return mtt_entry; +} +#endif + +/* + * Populate the given array with bus addresses from the umem. + * + * dev - mlx5_ib device + * umem - umem to use to fill the pages + * page_shift - determines the page size used in the resulting array + * offset - offset into the umem to start from, + * only implemented for ODP umems + * num_pages - total number of pages to fill + * pas - bus addresses array to fill + * access_flags - access flags to set on all present pages. + use enum mlx5_ib_mtt_access_flags for this. + */ +void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, + int page_shift, size_t offset, size_t num_pages, + __be64 *pas, int access_flags) +{ + unsigned long umem_page_shift = ilog2(umem->page_size); + int shift = page_shift - umem_page_shift; int mask = (1 << shift) - 1; int i, k; u64 cur = 0; @@ -118,28 +160,49 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, int len; struct scatterlist *sg; int entry; +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + const bool odp = umem->odp_data != NULL; + + if (odp) { + WARN_ON(shift != 0); + WARN_ON(access_flags != (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)); + + for (i = 0; i < num_pages; ++i) { + dma_addr_t pa = umem->odp_data->dma_list[offset + i]; + + pas[i] = cpu_to_be64(umem_dma_to_mtt(pa)); + } + return; + } +#endif i = 0; for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - len = sg_dma_len(sg) >> PAGE_SHIFT; + len = sg_dma_len(sg) >> umem_page_shift; base = sg_dma_address(sg); for (k = 0; k < len; k++) { if (!(i & mask)) { - cur = base + (k << PAGE_SHIFT); - if (umr) - cur |= 3; + cur = base + (k << umem_page_shift); + cur |= access_flags; pas[i >> shift] = cpu_to_be64(cur); mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n", i >> shift, be64_to_cpu(pas[i >> shift])); } else mlx5_ib_dbg(dev, "=====> 0x%llx\n", - base + (k << PAGE_SHIFT)); + base + (k << umem_page_shift)); i++; } } } +void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, + int page_shift, __be64 *pas, int access_flags) +{ + return __mlx5_ib_populate_pas(dev, umem, page_shift, 0, + ib_umem_num_pages(umem), pas, + access_flags); +} int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset) { u64 page_size; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 386780f0d1e1..83f22fe297c8 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -111,6 +111,8 @@ struct mlx5_ib_pd { */ #define MLX5_IB_SEND_UMR_UNREG IB_SEND_RESERVED_START +#define MLX5_IB_SEND_UMR_FAIL_IF_FREE (IB_SEND_RESERVED_START << 1) +#define MLX5_IB_SEND_UMR_UPDATE_MTT (IB_SEND_RESERVED_START << 2) #define MLX5_IB_QPT_REG_UMR IB_QPT_RESERVED1 #define MLX5_IB_WR_UMR IB_WR_RESERVED1 @@ -147,6 +149,29 @@ enum { MLX5_QP_EMPTY }; +/* + * Connect-IB can trigger up to four concurrent pagefaults + * per-QP. + */ +enum mlx5_ib_pagefault_context { + MLX5_IB_PAGEFAULT_RESPONDER_READ, + MLX5_IB_PAGEFAULT_REQUESTOR_READ, + MLX5_IB_PAGEFAULT_RESPONDER_WRITE, + MLX5_IB_PAGEFAULT_REQUESTOR_WRITE, + MLX5_IB_PAGEFAULT_CONTEXTS +}; + +static inline enum mlx5_ib_pagefault_context + mlx5_ib_get_pagefault_context(struct mlx5_pagefault *pagefault) +{ + return pagefault->flags & (MLX5_PFAULT_REQUESTOR | MLX5_PFAULT_WRITE); +} + +struct mlx5_ib_pfault { + struct work_struct work; + struct mlx5_pagefault mpfault; +}; + struct mlx5_ib_qp { struct ib_qp ibqp; struct mlx5_core_qp mqp; @@ -192,6 +217,21 @@ struct mlx5_ib_qp { /* Store signature errors */ bool signature_en; + +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + /* + * A flag that is true for QP's that are in a state that doesn't + * allow page faults, and shouldn't schedule any more faults. + */ + int disable_page_faults; + /* + * The disable_page_faults_lock protects a QP's disable_page_faults + * field, allowing for a thread to atomically check whether the QP + * allows page faults, and if so schedule a page fault. + */ + spinlock_t disable_page_faults_lock; + struct mlx5_ib_pfault pagefaults[MLX5_IB_PAGEFAULT_CONTEXTS]; +#endif }; struct mlx5_ib_cq_buf { @@ -206,6 +246,19 @@ enum mlx5_ib_qp_flags { MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 1, }; +struct mlx5_umr_wr { + union { + u64 virt_addr; + u64 offset; + } target; + struct ib_pd *pd; + unsigned int page_shift; + unsigned int npages; + u32 length; + int access_flags; + u32 mkey; +}; + struct mlx5_shared_mr_info { int mr_id; struct ib_umem *umem; @@ -253,6 +306,13 @@ struct mlx5_ib_xrcd { u32 xrcdn; }; +enum mlx5_ib_mtt_access_flags { + MLX5_IB_MTT_READ = (1 << 0), + MLX5_IB_MTT_WRITE = (1 << 1), +}; + +#define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE) + struct mlx5_ib_mr { struct ib_mr ibmr; struct mlx5_core_mr mmr; @@ -261,12 +321,11 @@ struct mlx5_ib_mr { struct list_head list; int order; int umred; - __be64 *pas; - dma_addr_t dma; int npages; struct mlx5_ib_dev *dev; struct mlx5_create_mkey_mbox_out out; struct mlx5_core_sig_ctx *sig; + int live; }; struct mlx5_ib_fast_reg_page_list { @@ -372,11 +431,18 @@ struct mlx5_ib_dev { struct umr_common umrc; /* sync used page count stats */ - spinlock_t mr_lock; struct mlx5_ib_resources devr; struct mlx5_mr_cache cache; struct timer_list delay_timer; int fill_delay; +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + struct ib_odp_caps odp_caps; + /* + * Sleepable RCU that prevents destruction of MRs while they are still + * being used by a page fault handler. + */ + struct srcu_struct mr_srcu; +#endif }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) @@ -490,6 +556,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr); void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n); +int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index, + void *buffer, u32 length); struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries, int vector, struct ib_ucontext *context, struct ib_udata *udata); @@ -502,6 +570,8 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc); struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); +int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, + int npages, int zap); int mlx5_ib_dereg_mr(struct ib_mr *ibmr); int mlx5_ib_destroy_mr(struct ib_mr *ibmr); struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd, @@ -533,8 +603,11 @@ int mlx5_ib_init_fmr(struct mlx5_ib_dev *dev); void mlx5_ib_cleanup_fmr(struct mlx5_ib_dev *dev); void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, int *ncont, int *order); +void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, + int page_shift, size_t offset, size_t num_pages, + __be64 *pas, int access_flags); void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, - int page_shift, __be64 *pas, int umr); + int page_shift, __be64 *pas, int access_flags); void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num); int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq); int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); @@ -544,6 +617,38 @@ void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context); int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, struct ib_mr_status *mr_status); +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING +extern struct workqueue_struct *mlx5_ib_page_fault_wq; + +int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev); +void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp, + struct mlx5_ib_pfault *pfault); +void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp); +int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev); +void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev); +int __init mlx5_ib_odp_init(void); +void mlx5_ib_odp_cleanup(void); +void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp); +void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp); +void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start, + unsigned long end); + +#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ +static inline int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev) +{ + return 0; +} + +static inline void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp) {} +static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; } +static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) {} +static inline int mlx5_ib_odp_init(void) { return 0; } +static inline void mlx5_ib_odp_cleanup(void) {} +static inline void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp) {} +static inline void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp) {} + +#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ + static inline void init_query_mad(struct ib_smp *mad) { mad->base_version = 1; @@ -561,4 +666,7 @@ static inline u8 convert_access(int acc) MLX5_PERM_LOCAL_READ; } +#define MLX5_MAX_UMR_SHIFT 16 +#define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT) + #endif /* MLX5_IB_H */ diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 80b3c63eab5d..32a28bd50b20 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -37,21 +37,34 @@ #include <linux/export.h> #include <linux/delay.h> #include <rdma/ib_umem.h> +#include <rdma/ib_umem_odp.h> +#include <rdma/ib_verbs.h> #include "mlx5_ib.h" enum { MAX_PENDING_REG_MR = 8, }; -enum { - MLX5_UMR_ALIGN = 2048 -}; +#define MLX5_UMR_ALIGN 2048 +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING +static __be64 mlx5_ib_update_mtt_emergency_buffer[ + MLX5_UMR_MTT_MIN_CHUNK_SIZE/sizeof(__be64)] + __aligned(MLX5_UMR_ALIGN); +static DEFINE_MUTEX(mlx5_ib_update_mtt_emergency_buffer_mutex); +#endif + +static int clean_mr(struct mlx5_ib_mr *mr); -static __be64 *mr_align(__be64 *ptr, int align) +static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { - unsigned long mask = align - 1; + int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr); - return (__be64 *)(((unsigned long)ptr + mask) & ~mask); +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + /* Wait until all page fault handlers using the mr complete. */ + synchronize_srcu(&dev->mr_srcu); +#endif + + return err; } static int order2idx(struct mlx5_ib_dev *dev, int order) @@ -146,7 +159,7 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) mr->order = ent->order; mr->umred = 1; mr->dev = dev; - in->seg.status = 1 << 6; + in->seg.status = MLX5_MKEY_STATUS_FREE; in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2); in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN; @@ -159,6 +172,9 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) sizeof(*in), reg_mr_callback, mr, &mr->out); if (err) { + spin_lock_irq(&ent->lock); + ent->pending--; + spin_unlock_irq(&ent->lock); mlx5_ib_warn(dev, "create mkey failed %d\n", err); kfree(mr); break; @@ -188,7 +204,7 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) ent->cur--; ent->size--; spin_unlock_irq(&ent->lock); - err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr); + err = destroy_mkey(dev, mr); if (err) mlx5_ib_warn(dev, "failed destroy mkey\n"); else @@ -479,7 +495,7 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c) ent->cur--; ent->size--; spin_unlock_irq(&ent->lock); - err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr); + err = destroy_mkey(dev, mr); if (err) mlx5_ib_warn(dev, "failed destroy mkey\n"); else @@ -665,7 +681,7 @@ static int get_octo_len(u64 addr, u64 len, int page_size) static int use_umr(int order) { - return order <= 17; + return order <= MLX5_MAX_UMR_SHIFT; } static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, @@ -675,6 +691,7 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct ib_mr *mr = dev->umrc.mr; + struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg; sg->addr = dma; sg->length = ALIGN(sizeof(u64) * n, 64); @@ -689,21 +706,24 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, wr->num_sge = 0; wr->opcode = MLX5_IB_WR_UMR; - wr->wr.fast_reg.page_list_len = n; - wr->wr.fast_reg.page_shift = page_shift; - wr->wr.fast_reg.rkey = key; - wr->wr.fast_reg.iova_start = virt_addr; - wr->wr.fast_reg.length = len; - wr->wr.fast_reg.access_flags = access_flags; - wr->wr.fast_reg.page_list = (struct ib_fast_reg_page_list *)pd; + + umrwr->npages = n; + umrwr->page_shift = page_shift; + umrwr->mkey = key; + umrwr->target.virt_addr = virt_addr; + umrwr->length = len; + umrwr->access_flags = access_flags; + umrwr->pd = pd; } static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev, struct ib_send_wr *wr, u32 key) { - wr->send_flags = MLX5_IB_SEND_UMR_UNREG; + struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg; + + wr->send_flags = MLX5_IB_SEND_UMR_UNREG | MLX5_IB_SEND_UMR_FAIL_IF_FREE; wr->opcode = MLX5_IB_WR_UMR; - wr->wr.fast_reg.rkey = key; + umrwr->mkey = key; } void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context) @@ -739,7 +759,10 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, struct ib_send_wr wr, *bad; struct mlx5_ib_mr *mr; struct ib_sge sg; - int size = sizeof(u64) * npages; + int size; + __be64 *mr_pas; + __be64 *pas; + dma_addr_t dma; int err = 0; int i; @@ -758,25 +781,31 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, if (!mr) return ERR_PTR(-EAGAIN); - mr->pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL); - if (!mr->pas) { + /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes. + * To avoid copying garbage after the pas array, we allocate + * a little more. */ + size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT); + mr_pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL); + if (!mr_pas) { err = -ENOMEM; goto free_mr; } - mlx5_ib_populate_pas(dev, umem, page_shift, - mr_align(mr->pas, MLX5_UMR_ALIGN), 1); + pas = PTR_ALIGN(mr_pas, MLX5_UMR_ALIGN); + mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT); + /* Clear padding after the actual pages. */ + memset(pas + npages, 0, size - npages * sizeof(u64)); - mr->dma = dma_map_single(ddev, mr_align(mr->pas, MLX5_UMR_ALIGN), size, - DMA_TO_DEVICE); - if (dma_mapping_error(ddev, mr->dma)) { + dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE); + if (dma_mapping_error(ddev, dma)) { err = -ENOMEM; goto free_pas; } memset(&wr, 0, sizeof(wr)); wr.wr_id = (u64)(unsigned long)&umr_context; - prep_umr_reg_wqe(pd, &wr, &sg, mr->dma, npages, mr->mmr.key, page_shift, virt_addr, len, access_flags); + prep_umr_reg_wqe(pd, &wr, &sg, dma, npages, mr->mmr.key, page_shift, + virt_addr, len, access_flags); mlx5_ib_init_umr_context(&umr_context); down(&umrc->sem); @@ -796,12 +825,14 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, mr->mmr.size = len; mr->mmr.pd = to_mpd(pd)->pdn; + mr->live = 1; + unmap_dma: up(&umrc->sem); - dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE); + dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); free_pas: - kfree(mr->pas); + kfree(mr_pas); free_mr: if (err) { @@ -812,6 +843,128 @@ free_mr: return mr; } +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING +int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages, + int zap) +{ + struct mlx5_ib_dev *dev = mr->dev; + struct device *ddev = dev->ib_dev.dma_device; + struct umr_common *umrc = &dev->umrc; + struct mlx5_ib_umr_context umr_context; + struct ib_umem *umem = mr->umem; + int size; + __be64 *pas; + dma_addr_t dma; + struct ib_send_wr wr, *bad; + struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr.wr.fast_reg; + struct ib_sge sg; + int err = 0; + const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64); + const int page_index_mask = page_index_alignment - 1; + size_t pages_mapped = 0; + size_t pages_to_map = 0; + size_t pages_iter = 0; + int use_emergency_buf = 0; + + /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes, + * so we need to align the offset and length accordingly */ + if (start_page_index & page_index_mask) { + npages += start_page_index & page_index_mask; + start_page_index &= ~page_index_mask; + } + + pages_to_map = ALIGN(npages, page_index_alignment); + + if (start_page_index + pages_to_map > MLX5_MAX_UMR_PAGES) + return -EINVAL; + + size = sizeof(u64) * pages_to_map; + size = min_t(int, PAGE_SIZE, size); + /* We allocate with GFP_ATOMIC to avoid recursion into page-reclaim + * code, when we are called from an invalidation. The pas buffer must + * be 2k-aligned for Connect-IB. */ + pas = (__be64 *)get_zeroed_page(GFP_ATOMIC); + if (!pas) { + mlx5_ib_warn(dev, "unable to allocate memory during MTT update, falling back to slower chunked mechanism.\n"); + pas = mlx5_ib_update_mtt_emergency_buffer; + size = MLX5_UMR_MTT_MIN_CHUNK_SIZE; + use_emergency_buf = 1; + mutex_lock(&mlx5_ib_update_mtt_emergency_buffer_mutex); + memset(pas, 0, size); + } + pages_iter = size / sizeof(u64); + dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE); + if (dma_mapping_error(ddev, dma)) { + mlx5_ib_err(dev, "unable to map DMA during MTT update.\n"); + err = -ENOMEM; + goto free_pas; + } + + for (pages_mapped = 0; + pages_mapped < pages_to_map && !err; + pages_mapped += pages_iter, start_page_index += pages_iter) { + dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE); + + npages = min_t(size_t, + pages_iter, + ib_umem_num_pages(umem) - start_page_index); + + if (!zap) { + __mlx5_ib_populate_pas(dev, umem, PAGE_SHIFT, + start_page_index, npages, pas, + MLX5_IB_MTT_PRESENT); + /* Clear padding after the pages brought from the + * umem. */ + memset(pas + npages, 0, size - npages * sizeof(u64)); + } + + dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE); + + memset(&wr, 0, sizeof(wr)); + wr.wr_id = (u64)(unsigned long)&umr_context; + + sg.addr = dma; + sg.length = ALIGN(npages * sizeof(u64), + MLX5_UMR_MTT_ALIGNMENT); + sg.lkey = dev->umrc.mr->lkey; + + wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE | + MLX5_IB_SEND_UMR_UPDATE_MTT; + wr.sg_list = &sg; + wr.num_sge = 1; + wr.opcode = MLX5_IB_WR_UMR; + umrwr->npages = sg.length / sizeof(u64); + umrwr->page_shift = PAGE_SHIFT; + umrwr->mkey = mr->mmr.key; + umrwr->target.offset = start_page_index; + + mlx5_ib_init_umr_context(&umr_context); + down(&umrc->sem); + err = ib_post_send(umrc->qp, &wr, &bad); + if (err) { + mlx5_ib_err(dev, "UMR post send failed, err %d\n", err); + } else { + wait_for_completion(&umr_context.done); + if (umr_context.status != IB_WC_SUCCESS) { + mlx5_ib_err(dev, "UMR completion failed, code %d\n", + umr_context.status); + err = -EFAULT; + } + } + up(&umrc->sem); + } + dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); + +free_pas: + if (!use_emergency_buf) + free_page((unsigned long)pas); + else + mutex_unlock(&mlx5_ib_update_mtt_emergency_buffer_mutex); + + return err; +} +#endif + static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, u64 length, struct ib_umem *umem, int npages, int page_shift, @@ -822,6 +975,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, struct mlx5_ib_mr *mr; int inlen; int err; + bool pg_cap = !!(dev->mdev->caps.gen.flags & + MLX5_DEV_CAP_FLAG_ON_DMND_PG); mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) @@ -833,8 +988,12 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, err = -ENOMEM; goto err_1; } - mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, 0); + mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, + pg_cap ? MLX5_IB_MTT_PRESENT : 0); + /* The MLX5_MKEY_INBOX_PG_ACCESS bit allows setting the access flags + * in the page list submitted with the command. */ + in->flags = pg_cap ? cpu_to_be32(MLX5_MKEY_INBOX_PG_ACCESS) : 0; in->seg.flags = convert_access(access_flags) | MLX5_ACCESS_MODE_MTT; in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); @@ -853,14 +1012,15 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, goto err_2; } mr->umem = umem; - mlx5_vfree(in); + mr->live = 1; + kvfree(in); mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key); return mr; err_2: - mlx5_vfree(in); + kvfree(in); err_1: kfree(mr); @@ -881,12 +1041,12 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, int order; int err; - mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx\n", - start, virt_addr, length); + mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", + start, virt_addr, length, access_flags); umem = ib_umem_get(pd->uobject->context, start, length, access_flags, 0); if (IS_ERR(umem)) { - mlx5_ib_dbg(dev, "umem get failed\n"); + mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(umem)); return (void *)umem; } @@ -907,6 +1067,10 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mlx5_ib_dbg(dev, "cache empty for order %d", order); mr = NULL; } + } else if (access_flags & IB_ACCESS_ON_DEMAND) { + err = -EINVAL; + pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB"); + goto error; } if (!mr) @@ -922,16 +1086,51 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->umem = umem; mr->npages = npages; - spin_lock(&dev->mr_lock); - dev->mdev->priv.reg_pages += npages; - spin_unlock(&dev->mr_lock); + atomic_add(npages, &dev->mdev->priv.reg_pages); mr->ibmr.lkey = mr->mmr.key; mr->ibmr.rkey = mr->mmr.key; +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + if (umem->odp_data) { + /* + * This barrier prevents the compiler from moving the + * setting of umem->odp_data->private to point to our + * MR, before reg_umr finished, to ensure that the MR + * initialization have finished before starting to + * handle invalidations. + */ + smp_wmb(); + mr->umem->odp_data->private = mr; + /* + * Make sure we will see the new + * umem->odp_data->private value in the invalidation + * routines, before we can get page faults on the + * MR. Page faults can happen once we put the MR in + * the tree, below this line. Without the barrier, + * there can be a fault handling and an invalidation + * before umem->odp_data->private == mr is visible to + * the invalidation handler. + */ + smp_wmb(); + } +#endif + return &mr->ibmr; error: + /* + * Destroy the umem *before* destroying the MR, to ensure we + * will not have any in-flight notifiers when destroying the + * MR. + * + * As the MR is completely invalid to begin with, and this + * error path is only taken if we can't push the mr entry into + * the pagefault tree, this is safe. + */ + ib_umem_release(umem); + /* Kill the MR, and return an error code. */ + clean_mr(mr); return ERR_PTR(err); } @@ -968,17 +1167,14 @@ error: return err; } -int mlx5_ib_dereg_mr(struct ib_mr *ibmr) +static int clean_mr(struct mlx5_ib_mr *mr) { - struct mlx5_ib_dev *dev = to_mdev(ibmr->device); - struct mlx5_ib_mr *mr = to_mmr(ibmr); - struct ib_umem *umem = mr->umem; - int npages = mr->npages; + struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); int umred = mr->umred; int err; if (!umred) { - err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr); + err = destroy_mkey(dev, mr); if (err) { mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", mr->mmr.key, err); @@ -993,15 +1189,47 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr) free_cached_mr(dev, mr); } - if (umem) { + if (!umred) + kfree(mr); + + return 0; +} + +int mlx5_ib_dereg_mr(struct ib_mr *ibmr) +{ + struct mlx5_ib_dev *dev = to_mdev(ibmr->device); + struct mlx5_ib_mr *mr = to_mmr(ibmr); + int npages = mr->npages; + struct ib_umem *umem = mr->umem; + +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + if (umem && umem->odp_data) { + /* Prevent new page faults from succeeding */ + mr->live = 0; + /* Wait for all running page-fault handlers to finish. */ + synchronize_srcu(&dev->mr_srcu); + /* Destroy all page mappings */ + mlx5_ib_invalidate_range(umem, ib_umem_start(umem), + ib_umem_end(umem)); + /* + * We kill the umem before the MR for ODP, + * so that there will not be any invalidations in + * flight, looking at the *mr struct. + */ ib_umem_release(umem); - spin_lock(&dev->mr_lock); - dev->mdev->priv.reg_pages -= npages; - spin_unlock(&dev->mr_lock); + atomic_sub(npages, &dev->mdev->priv.reg_pages); + + /* Avoid double-freeing the umem. */ + umem = NULL; } +#endif - if (!umred) - kfree(mr); + clean_mr(mr); + + if (umem) { + ib_umem_release(umem); + atomic_sub(npages, &dev->mdev->priv.reg_pages); + } return 0; } @@ -1025,7 +1253,7 @@ struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd, goto err_free; } - in->seg.status = 1 << 6; /* free */ + in->seg.status = MLX5_MKEY_STATUS_FREE; in->seg.xlt_oct_size = cpu_to_be32(ndescs); in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); @@ -1110,7 +1338,7 @@ int mlx5_ib_destroy_mr(struct ib_mr *ibmr) kfree(mr->sig); } - err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr); + err = destroy_mkey(dev, mr); if (err) { mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", mr->mmr.key, err); @@ -1140,7 +1368,7 @@ struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd, goto err_free; } - in->seg.status = 1 << 6; /* free */ + in->seg.status = MLX5_MKEY_STATUS_FREE; in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2); in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT; diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c new file mode 100644 index 000000000000..a2c541c4809a --- /dev/null +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -0,0 +1,798 @@ +/* + * Copyright (c) 2014 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <rdma/ib_umem.h> +#include <rdma/ib_umem_odp.h> + +#include "mlx5_ib.h" + +#define MAX_PREFETCH_LEN (4*1024*1024U) + +/* Timeout in ms to wait for an active mmu notifier to complete when handling + * a pagefault. */ +#define MMU_NOTIFIER_TIMEOUT 1000 + +struct workqueue_struct *mlx5_ib_page_fault_wq; + +void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start, + unsigned long end) +{ + struct mlx5_ib_mr *mr; + const u64 umr_block_mask = (MLX5_UMR_MTT_ALIGNMENT / sizeof(u64)) - 1; + u64 idx = 0, blk_start_idx = 0; + int in_block = 0; + u64 addr; + + if (!umem || !umem->odp_data) { + pr_err("invalidation called on NULL umem or non-ODP umem\n"); + return; + } + + mr = umem->odp_data->private; + + if (!mr || !mr->ibmr.pd) + return; + + start = max_t(u64, ib_umem_start(umem), start); + end = min_t(u64, ib_umem_end(umem), end); + + /* + * Iteration one - zap the HW's MTTs. The notifiers_count ensures that + * while we are doing the invalidation, no page fault will attempt to + * overwrite the same MTTs. Concurent invalidations might race us, + * but they will write 0s as well, so no difference in the end result. + */ + + for (addr = start; addr < end; addr += (u64)umem->page_size) { + idx = (addr - ib_umem_start(umem)) / PAGE_SIZE; + /* + * Strive to write the MTTs in chunks, but avoid overwriting + * non-existing MTTs. The huristic here can be improved to + * estimate the cost of another UMR vs. the cost of bigger + * UMR. + */ + if (umem->odp_data->dma_list[idx] & + (ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT)) { + if (!in_block) { + blk_start_idx = idx; + in_block = 1; + } + } else { + u64 umr_offset = idx & umr_block_mask; + + if (in_block && umr_offset == 0) { + mlx5_ib_update_mtt(mr, blk_start_idx, + idx - blk_start_idx, 1); + in_block = 0; + } + } + } + if (in_block) + mlx5_ib_update_mtt(mr, blk_start_idx, idx - blk_start_idx + 1, + 1); + + /* + * We are now sure that the device will not access the + * memory. We can safely unmap it, and mark it as dirty if + * needed. + */ + + ib_umem_odp_unmap_dma_pages(umem, start, end); +} + +#define COPY_ODP_BIT_MLX_TO_IB(reg, ib_caps, field_name, bit_name) do { \ + if (be32_to_cpu(reg.field_name) & MLX5_ODP_SUPPORT_##bit_name) \ + ib_caps->field_name |= IB_ODP_SUPPORT_##bit_name; \ +} while (0) + +int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev) +{ + int err; + struct mlx5_odp_caps hw_caps; + struct ib_odp_caps *caps = &dev->odp_caps; + + memset(caps, 0, sizeof(*caps)); + + if (!(dev->mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_ON_DMND_PG)) + return 0; + + err = mlx5_query_odp_caps(dev->mdev, &hw_caps); + if (err) + goto out; + + caps->general_caps = IB_ODP_SUPPORT; + COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.ud_odp_caps, + SEND); + COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.rc_odp_caps, + SEND); + COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.rc_odp_caps, + RECV); + COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.rc_odp_caps, + WRITE); + COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.rc_odp_caps, + READ); + +out: + return err; +} + +static struct mlx5_ib_mr *mlx5_ib_odp_find_mr_lkey(struct mlx5_ib_dev *dev, + u32 key) +{ + u32 base_key = mlx5_base_mkey(key); + struct mlx5_core_mr *mmr = __mlx5_mr_lookup(dev->mdev, base_key); + struct mlx5_ib_mr *mr = container_of(mmr, struct mlx5_ib_mr, mmr); + + if (!mmr || mmr->key != key || !mr->live) + return NULL; + + return container_of(mmr, struct mlx5_ib_mr, mmr); +} + +static void mlx5_ib_page_fault_resume(struct mlx5_ib_qp *qp, + struct mlx5_ib_pfault *pfault, + int error) { + struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device); + int ret = mlx5_core_page_fault_resume(dev->mdev, qp->mqp.qpn, + pfault->mpfault.flags, + error); + if (ret) + pr_err("Failed to resolve the page fault on QP 0x%x\n", + qp->mqp.qpn); +} + +/* + * Handle a single data segment in a page-fault WQE. + * + * Returns number of pages retrieved on success. The caller will continue to + * the next data segment. + * Can return the following error codes: + * -EAGAIN to designate a temporary error. The caller will abort handling the + * page fault and resolve it. + * -EFAULT when there's an error mapping the requested pages. The caller will + * abort the page fault handling and possibly move the QP to an error state. + * On other errors the QP should also be closed with an error. + */ +static int pagefault_single_data_segment(struct mlx5_ib_qp *qp, + struct mlx5_ib_pfault *pfault, + u32 key, u64 io_virt, size_t bcnt, + u32 *bytes_mapped) +{ + struct mlx5_ib_dev *mib_dev = to_mdev(qp->ibqp.pd->device); + int srcu_key; + unsigned int current_seq; + u64 start_idx; + int npages = 0, ret = 0; + struct mlx5_ib_mr *mr; + u64 access_mask = ODP_READ_ALLOWED_BIT; + + srcu_key = srcu_read_lock(&mib_dev->mr_srcu); + mr = mlx5_ib_odp_find_mr_lkey(mib_dev, key); + /* + * If we didn't find the MR, it means the MR was closed while we were + * handling the ODP event. In this case we return -EFAULT so that the + * QP will be closed. + */ + if (!mr || !mr->ibmr.pd) { + pr_err("Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n", + key); + ret = -EFAULT; + goto srcu_unlock; + } + if (!mr->umem->odp_data) { + pr_debug("skipping non ODP MR (lkey=0x%06x) in page fault handler.\n", + key); + if (bytes_mapped) + *bytes_mapped += + (bcnt - pfault->mpfault.bytes_committed); + goto srcu_unlock; + } + if (mr->ibmr.pd != qp->ibqp.pd) { + pr_err("Page-fault with different PDs for QP and MR.\n"); + ret = -EFAULT; + goto srcu_unlock; + } + + current_seq = ACCESS_ONCE(mr->umem->odp_data->notifiers_seq); + /* + * Ensure the sequence number is valid for some time before we call + * gup. + */ + smp_rmb(); + + /* + * Avoid branches - this code will perform correctly + * in all iterations (in iteration 2 and above, + * bytes_committed == 0). + */ + io_virt += pfault->mpfault.bytes_committed; + bcnt -= pfault->mpfault.bytes_committed; + + start_idx = (io_virt - (mr->mmr.iova & PAGE_MASK)) >> PAGE_SHIFT; + + if (mr->umem->writable) + access_mask |= ODP_WRITE_ALLOWED_BIT; + npages = ib_umem_odp_map_dma_pages(mr->umem, io_virt, bcnt, + access_mask, current_seq); + if (npages < 0) { + ret = npages; + goto srcu_unlock; + } + + if (npages > 0) { + mutex_lock(&mr->umem->odp_data->umem_mutex); + if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) { + /* + * No need to check whether the MTTs really belong to + * this MR, since ib_umem_odp_map_dma_pages already + * checks this. + */ + ret = mlx5_ib_update_mtt(mr, start_idx, npages, 0); + } else { + ret = -EAGAIN; + } + mutex_unlock(&mr->umem->odp_data->umem_mutex); + if (ret < 0) { + if (ret != -EAGAIN) + pr_err("Failed to update mkey page tables\n"); + goto srcu_unlock; + } + + if (bytes_mapped) { + u32 new_mappings = npages * PAGE_SIZE - + (io_virt - round_down(io_virt, PAGE_SIZE)); + *bytes_mapped += min_t(u32, new_mappings, bcnt); + } + } + +srcu_unlock: + if (ret == -EAGAIN) { + if (!mr->umem->odp_data->dying) { + struct ib_umem_odp *odp_data = mr->umem->odp_data; + unsigned long timeout = + msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT); + + if (!wait_for_completion_timeout( + &odp_data->notifier_completion, + timeout)) { + pr_warn("timeout waiting for mmu notifier completion\n"); + } + } else { + /* The MR is being killed, kill the QP as well. */ + ret = -EFAULT; + } + } + srcu_read_unlock(&mib_dev->mr_srcu, srcu_key); + pfault->mpfault.bytes_committed = 0; + return ret ? ret : npages; +} + +/** + * Parse a series of data segments for page fault handling. + * + * @qp the QP on which the fault occurred. + * @pfault contains page fault information. + * @wqe points at the first data segment in the WQE. + * @wqe_end points after the end of the WQE. + * @bytes_mapped receives the number of bytes that the function was able to + * map. This allows the caller to decide intelligently whether + * enough memory was mapped to resolve the page fault + * successfully (e.g. enough for the next MTU, or the entire + * WQE). + * @total_wqe_bytes receives the total data size of this WQE in bytes (minus + * the committed bytes). + * + * Returns the number of pages loaded if positive, zero for an empty WQE, or a + * negative error code. + */ +static int pagefault_data_segments(struct mlx5_ib_qp *qp, + struct mlx5_ib_pfault *pfault, void *wqe, + void *wqe_end, u32 *bytes_mapped, + u32 *total_wqe_bytes, int receive_queue) +{ + int ret = 0, npages = 0; + u64 io_virt; + u32 key; + u32 byte_count; + size_t bcnt; + int inline_segment; + + /* Skip SRQ next-WQE segment. */ + if (receive_queue && qp->ibqp.srq) + wqe += sizeof(struct mlx5_wqe_srq_next_seg); + + if (bytes_mapped) + *bytes_mapped = 0; + if (total_wqe_bytes) + *total_wqe_bytes = 0; + + while (wqe < wqe_end) { + struct mlx5_wqe_data_seg *dseg = wqe; + + io_virt = be64_to_cpu(dseg->addr); + key = be32_to_cpu(dseg->lkey); + byte_count = be32_to_cpu(dseg->byte_count); + inline_segment = !!(byte_count & MLX5_INLINE_SEG); + bcnt = byte_count & ~MLX5_INLINE_SEG; + + if (inline_segment) { + bcnt = bcnt & MLX5_WQE_INLINE_SEG_BYTE_COUNT_MASK; + wqe += ALIGN(sizeof(struct mlx5_wqe_inline_seg) + bcnt, + 16); + } else { + wqe += sizeof(*dseg); + } + + /* receive WQE end of sg list. */ + if (receive_queue && bcnt == 0 && key == MLX5_INVALID_LKEY && + io_virt == 0) + break; + + if (!inline_segment && total_wqe_bytes) { + *total_wqe_bytes += bcnt - min_t(size_t, bcnt, + pfault->mpfault.bytes_committed); + } + + /* A zero length data segment designates a length of 2GB. */ + if (bcnt == 0) + bcnt = 1U << 31; + + if (inline_segment || bcnt <= pfault->mpfault.bytes_committed) { + pfault->mpfault.bytes_committed -= + min_t(size_t, bcnt, + pfault->mpfault.bytes_committed); + continue; + } + + ret = pagefault_single_data_segment(qp, pfault, key, io_virt, + bcnt, bytes_mapped); + if (ret < 0) + break; + npages += ret; + } + + return ret < 0 ? ret : npages; +} + +/* + * Parse initiator WQE. Advances the wqe pointer to point at the + * scatter-gather list, and set wqe_end to the end of the WQE. + */ +static int mlx5_ib_mr_initiator_pfault_handler( + struct mlx5_ib_qp *qp, struct mlx5_ib_pfault *pfault, + void **wqe, void **wqe_end, int wqe_length) +{ + struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device); + struct mlx5_wqe_ctrl_seg *ctrl = *wqe; + u16 wqe_index = pfault->mpfault.wqe.wqe_index; + unsigned ds, opcode; +#if defined(DEBUG) + u32 ctrl_wqe_index, ctrl_qpn; +#endif + + ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK; + if (ds * MLX5_WQE_DS_UNITS > wqe_length) { + mlx5_ib_err(dev, "Unable to read the complete WQE. ds = 0x%x, ret = 0x%x\n", + ds, wqe_length); + return -EFAULT; + } + + if (ds == 0) { + mlx5_ib_err(dev, "Got WQE with zero DS. wqe_index=%x, qpn=%x\n", + wqe_index, qp->mqp.qpn); + return -EFAULT; + } + +#if defined(DEBUG) + ctrl_wqe_index = (be32_to_cpu(ctrl->opmod_idx_opcode) & + MLX5_WQE_CTRL_WQE_INDEX_MASK) >> + MLX5_WQE_CTRL_WQE_INDEX_SHIFT; + if (wqe_index != ctrl_wqe_index) { + mlx5_ib_err(dev, "Got WQE with invalid wqe_index. wqe_index=0x%x, qpn=0x%x ctrl->wqe_index=0x%x\n", + wqe_index, qp->mqp.qpn, + ctrl_wqe_index); + return -EFAULT; + } + + ctrl_qpn = (be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_QPN_MASK) >> + MLX5_WQE_CTRL_QPN_SHIFT; + if (qp->mqp.qpn != ctrl_qpn) { + mlx5_ib_err(dev, "Got WQE with incorrect QP number. wqe_index=0x%x, qpn=0x%x ctrl->qpn=0x%x\n", + wqe_index, qp->mqp.qpn, + ctrl_qpn); + return -EFAULT; + } +#endif /* DEBUG */ + + *wqe_end = *wqe + ds * MLX5_WQE_DS_UNITS; + *wqe += sizeof(*ctrl); + + opcode = be32_to_cpu(ctrl->opmod_idx_opcode) & + MLX5_WQE_CTRL_OPCODE_MASK; + switch (qp->ibqp.qp_type) { + case IB_QPT_RC: + switch (opcode) { + case MLX5_OPCODE_SEND: + case MLX5_OPCODE_SEND_IMM: + case MLX5_OPCODE_SEND_INVAL: + if (!(dev->odp_caps.per_transport_caps.rc_odp_caps & + IB_ODP_SUPPORT_SEND)) + goto invalid_transport_or_opcode; + break; + case MLX5_OPCODE_RDMA_WRITE: + case MLX5_OPCODE_RDMA_WRITE_IMM: + if (!(dev->odp_caps.per_transport_caps.rc_odp_caps & + IB_ODP_SUPPORT_WRITE)) + goto invalid_transport_or_opcode; + *wqe += sizeof(struct mlx5_wqe_raddr_seg); + break; + case MLX5_OPCODE_RDMA_READ: + if (!(dev->odp_caps.per_transport_caps.rc_odp_caps & + IB_ODP_SUPPORT_READ)) + goto invalid_transport_or_opcode; + *wqe += sizeof(struct mlx5_wqe_raddr_seg); + break; + default: + goto invalid_transport_or_opcode; + } + break; + case IB_QPT_UD: + switch (opcode) { + case MLX5_OPCODE_SEND: + case MLX5_OPCODE_SEND_IMM: + if (!(dev->odp_caps.per_transport_caps.ud_odp_caps & + IB_ODP_SUPPORT_SEND)) + goto invalid_transport_or_opcode; + *wqe += sizeof(struct mlx5_wqe_datagram_seg); + break; + default: + goto invalid_transport_or_opcode; + } + break; + default: +invalid_transport_or_opcode: + mlx5_ib_err(dev, "ODP fault on QP of an unsupported opcode or transport. transport: 0x%x opcode: 0x%x.\n", + qp->ibqp.qp_type, opcode); + return -EFAULT; + } + + return 0; +} + +/* + * Parse responder WQE. Advances the wqe pointer to point at the + * scatter-gather list, and set wqe_end to the end of the WQE. + */ +static int mlx5_ib_mr_responder_pfault_handler( + struct mlx5_ib_qp *qp, struct mlx5_ib_pfault *pfault, + void **wqe, void **wqe_end, int wqe_length) +{ + struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device); + struct mlx5_ib_wq *wq = &qp->rq; + int wqe_size = 1 << wq->wqe_shift; + + if (qp->ibqp.srq) { + mlx5_ib_err(dev, "ODP fault on SRQ is not supported\n"); + return -EFAULT; + } + + if (qp->wq_sig) { + mlx5_ib_err(dev, "ODP fault with WQE signatures is not supported\n"); + return -EFAULT; + } + + if (wqe_size > wqe_length) { + mlx5_ib_err(dev, "Couldn't read all of the receive WQE's content\n"); + return -EFAULT; + } + + switch (qp->ibqp.qp_type) { + case IB_QPT_RC: + if (!(dev->odp_caps.per_transport_caps.rc_odp_caps & + IB_ODP_SUPPORT_RECV)) + goto invalid_transport_or_opcode; + break; + default: +invalid_transport_or_opcode: + mlx5_ib_err(dev, "ODP fault on QP of an unsupported transport. transport: 0x%x\n", + qp->ibqp.qp_type); + return -EFAULT; + } + + *wqe_end = *wqe + wqe_size; + + return 0; +} + +static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp, + struct mlx5_ib_pfault *pfault) +{ + struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device); + int ret; + void *wqe, *wqe_end; + u32 bytes_mapped, total_wqe_bytes; + char *buffer = NULL; + int resume_with_error = 0; + u16 wqe_index = pfault->mpfault.wqe.wqe_index; + int requestor = pfault->mpfault.flags & MLX5_PFAULT_REQUESTOR; + + buffer = (char *)__get_free_page(GFP_KERNEL); + if (!buffer) { + mlx5_ib_err(dev, "Error allocating memory for IO page fault handling.\n"); + resume_with_error = 1; + goto resolve_page_fault; + } + + ret = mlx5_ib_read_user_wqe(qp, requestor, wqe_index, buffer, + PAGE_SIZE); + if (ret < 0) { + mlx5_ib_err(dev, "Failed reading a WQE following page fault, error=%x, wqe_index=%x, qpn=%x\n", + -ret, wqe_index, qp->mqp.qpn); + resume_with_error = 1; + goto resolve_page_fault; + } + + wqe = buffer; + if (requestor) + ret = mlx5_ib_mr_initiator_pfault_handler(qp, pfault, &wqe, + &wqe_end, ret); + else + ret = mlx5_ib_mr_responder_pfault_handler(qp, pfault, &wqe, + &wqe_end, ret); + if (ret < 0) { + resume_with_error = 1; + goto resolve_page_fault; + } + + if (wqe >= wqe_end) { + mlx5_ib_err(dev, "ODP fault on invalid WQE.\n"); + resume_with_error = 1; + goto resolve_page_fault; + } + + ret = pagefault_data_segments(qp, pfault, wqe, wqe_end, &bytes_mapped, + &total_wqe_bytes, !requestor); + if (ret == -EAGAIN) { + goto resolve_page_fault; + } else if (ret < 0 || total_wqe_bytes > bytes_mapped) { + mlx5_ib_err(dev, "Error getting user pages for page fault. Error: 0x%x\n", + -ret); + resume_with_error = 1; + goto resolve_page_fault; + } + +resolve_page_fault: + mlx5_ib_page_fault_resume(qp, pfault, resume_with_error); + mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, flags: 0x%x\n", + qp->mqp.qpn, resume_with_error, pfault->mpfault.flags); + + free_page((unsigned long)buffer); +} + +static int pages_in_range(u64 address, u32 length) +{ + return (ALIGN(address + length, PAGE_SIZE) - + (address & PAGE_MASK)) >> PAGE_SHIFT; +} + +static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_qp *qp, + struct mlx5_ib_pfault *pfault) +{ + struct mlx5_pagefault *mpfault = &pfault->mpfault; + u64 address; + u32 length; + u32 prefetch_len = mpfault->bytes_committed; + int prefetch_activated = 0; + u32 rkey = mpfault->rdma.r_key; + int ret; + + /* The RDMA responder handler handles the page fault in two parts. + * First it brings the necessary pages for the current packet + * (and uses the pfault context), and then (after resuming the QP) + * prefetches more pages. The second operation cannot use the pfault + * context and therefore uses the dummy_pfault context allocated on + * the stack */ + struct mlx5_ib_pfault dummy_pfault = {}; + + dummy_pfault.mpfault.bytes_committed = 0; + + mpfault->rdma.rdma_va += mpfault->bytes_committed; + mpfault->rdma.rdma_op_len -= min(mpfault->bytes_committed, + mpfault->rdma.rdma_op_len); + mpfault->bytes_committed = 0; + + address = mpfault->rdma.rdma_va; + length = mpfault->rdma.rdma_op_len; + + /* For some operations, the hardware cannot tell the exact message + * length, and in those cases it reports zero. Use prefetch + * logic. */ + if (length == 0) { + prefetch_activated = 1; + length = mpfault->rdma.packet_size; + prefetch_len = min(MAX_PREFETCH_LEN, prefetch_len); + } + + ret = pagefault_single_data_segment(qp, pfault, rkey, address, length, + NULL); + if (ret == -EAGAIN) { + /* We're racing with an invalidation, don't prefetch */ + prefetch_activated = 0; + } else if (ret < 0 || pages_in_range(address, length) > ret) { + mlx5_ib_page_fault_resume(qp, pfault, 1); + return; + } + + mlx5_ib_page_fault_resume(qp, pfault, 0); + + /* At this point, there might be a new pagefault already arriving in + * the eq, switch to the dummy pagefault for the rest of the + * processing. We're still OK with the objects being alive as the + * work-queue is being fenced. */ + + if (prefetch_activated) { + ret = pagefault_single_data_segment(qp, &dummy_pfault, rkey, + address, + prefetch_len, + NULL); + if (ret < 0) { + pr_warn("Prefetch failed (ret = %d, prefetch_activated = %d) for QPN %d, address: 0x%.16llx, length = 0x%.16x\n", + ret, prefetch_activated, + qp->ibqp.qp_num, address, prefetch_len); + } + } +} + +void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp, + struct mlx5_ib_pfault *pfault) +{ + u8 event_subtype = pfault->mpfault.event_subtype; + + switch (event_subtype) { + case MLX5_PFAULT_SUBTYPE_WQE: + mlx5_ib_mr_wqe_pfault_handler(qp, pfault); + break; + case MLX5_PFAULT_SUBTYPE_RDMA: + mlx5_ib_mr_rdma_pfault_handler(qp, pfault); + break; + default: + pr_warn("Invalid page fault event subtype: 0x%x\n", + event_subtype); + mlx5_ib_page_fault_resume(qp, pfault, 1); + break; + } +} + +static void mlx5_ib_qp_pfault_action(struct work_struct *work) +{ + struct mlx5_ib_pfault *pfault = container_of(work, + struct mlx5_ib_pfault, + work); + enum mlx5_ib_pagefault_context context = + mlx5_ib_get_pagefault_context(&pfault->mpfault); + struct mlx5_ib_qp *qp = container_of(pfault, struct mlx5_ib_qp, + pagefaults[context]); + mlx5_ib_mr_pfault_handler(qp, pfault); +} + +void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp) +{ + unsigned long flags; + + spin_lock_irqsave(&qp->disable_page_faults_lock, flags); + qp->disable_page_faults = 1; + spin_unlock_irqrestore(&qp->disable_page_faults_lock, flags); + + /* + * Note that at this point, we are guarenteed that no more + * work queue elements will be posted to the work queue with + * the QP we are closing. + */ + flush_workqueue(mlx5_ib_page_fault_wq); +} + +void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp) +{ + unsigned long flags; + + spin_lock_irqsave(&qp->disable_page_faults_lock, flags); + qp->disable_page_faults = 0; + spin_unlock_irqrestore(&qp->disable_page_faults_lock, flags); +} + +static void mlx5_ib_pfault_handler(struct mlx5_core_qp *qp, + struct mlx5_pagefault *pfault) +{ + /* + * Note that we will only get one fault event per QP per context + * (responder/initiator, read/write), until we resolve the page fault + * with the mlx5_ib_page_fault_resume command. Since this function is + * called from within the work element, there is no risk of missing + * events. + */ + struct mlx5_ib_qp *mibqp = to_mibqp(qp); + enum mlx5_ib_pagefault_context context = + mlx5_ib_get_pagefault_context(pfault); + struct mlx5_ib_pfault *qp_pfault = &mibqp->pagefaults[context]; + + qp_pfault->mpfault = *pfault; + + /* No need to stop interrupts here since we are in an interrupt */ + spin_lock(&mibqp->disable_page_faults_lock); + if (!mibqp->disable_page_faults) + queue_work(mlx5_ib_page_fault_wq, &qp_pfault->work); + spin_unlock(&mibqp->disable_page_faults_lock); +} + +void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp) +{ + int i; + + qp->disable_page_faults = 1; + spin_lock_init(&qp->disable_page_faults_lock); + + qp->mqp.pfault_handler = mlx5_ib_pfault_handler; + + for (i = 0; i < MLX5_IB_PAGEFAULT_CONTEXTS; ++i) + INIT_WORK(&qp->pagefaults[i].work, mlx5_ib_qp_pfault_action); +} + +int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) +{ + int ret; + + ret = init_srcu_struct(&ibdev->mr_srcu); + if (ret) + return ret; + + return 0; +} + +void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) +{ + cleanup_srcu_struct(&ibdev->mr_srcu); +} + +int __init mlx5_ib_odp_init(void) +{ + mlx5_ib_page_fault_wq = + create_singlethread_workqueue("mlx5_ib_page_faults"); + if (!mlx5_ib_page_fault_wq) + return -ENOMEM; + + return 0; +} + +void mlx5_ib_odp_cleanup(void) +{ + destroy_workqueue(mlx5_ib_page_fault_wq); +} diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 8c574b63d77b..be0cd358b080 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -70,15 +70,6 @@ static const u32 mlx5_ib_opcode[] = { [MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR, }; -struct umr_wr { - u64 virt_addr; - struct ib_pd *pd; - unsigned int page_shift; - unsigned int npages; - u32 length; - int access_flags; - u32 mkey; -}; static int is_qp0(enum ib_qp_type qp_type) { @@ -110,6 +101,77 @@ void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n) return get_wqe(qp, qp->sq.offset + (n << MLX5_IB_SQ_STRIDE)); } +/** + * mlx5_ib_read_user_wqe() - Copy a user-space WQE to kernel space. + * + * @qp: QP to copy from. + * @send: copy from the send queue when non-zero, use the receive queue + * otherwise. + * @wqe_index: index to start copying from. For send work queues, the + * wqe_index is in units of MLX5_SEND_WQE_BB. + * For receive work queue, it is the number of work queue + * element in the queue. + * @buffer: destination buffer. + * @length: maximum number of bytes to copy. + * + * Copies at least a single WQE, but may copy more data. + * + * Return: the number of bytes copied, or an error code. + */ +int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index, + void *buffer, u32 length) +{ + struct ib_device *ibdev = qp->ibqp.device; + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_ib_wq *wq = send ? &qp->sq : &qp->rq; + size_t offset; + size_t wq_end; + struct ib_umem *umem = qp->umem; + u32 first_copy_length; + int wqe_length; + int ret; + + if (wq->wqe_cnt == 0) { + mlx5_ib_dbg(dev, "mlx5_ib_read_user_wqe for a QP with wqe_cnt == 0. qp_type: 0x%x\n", + qp->ibqp.qp_type); + return -EINVAL; + } + + offset = wq->offset + ((wqe_index % wq->wqe_cnt) << wq->wqe_shift); + wq_end = wq->offset + (wq->wqe_cnt << wq->wqe_shift); + + if (send && length < sizeof(struct mlx5_wqe_ctrl_seg)) + return -EINVAL; + + if (offset > umem->length || + (send && offset + sizeof(struct mlx5_wqe_ctrl_seg) > umem->length)) + return -EINVAL; + + first_copy_length = min_t(u32, offset + length, wq_end) - offset; + ret = ib_umem_copy_from(buffer, umem, offset, first_copy_length); + if (ret) + return ret; + + if (send) { + struct mlx5_wqe_ctrl_seg *ctrl = buffer; + int ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK; + + wqe_length = ds * MLX5_WQE_DS_UNITS; + } else { + wqe_length = 1 << wq->wqe_shift; + } + + if (wqe_length <= first_copy_length) + return first_copy_length; + + ret = ib_umem_copy_from(buffer + first_copy_length, umem, wq->offset, + wqe_length - first_copy_length); + if (ret) + return ret; + + return wqe_length; +} + static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type) { struct ib_qp *ibqp = &to_mibqp(qp)->ibqp; @@ -158,11 +220,13 @@ static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type) static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap, int has_rq, struct mlx5_ib_qp *qp, struct mlx5_ib_create_qp *ucmd) { + struct mlx5_general_caps *gen; int wqe_size; int wq_size; + gen = &dev->mdev->caps.gen; /* Sanity check RQ size before proceeding */ - if (cap->max_recv_wr > dev->mdev->caps.max_wqes) + if (cap->max_recv_wr > gen->max_wqes) return -EINVAL; if (!has_rq) { @@ -182,10 +246,10 @@ static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap, wq_size = roundup_pow_of_two(cap->max_recv_wr) * wqe_size; wq_size = max_t(int, wq_size, MLX5_SEND_WQE_BB); qp->rq.wqe_cnt = wq_size / wqe_size; - if (wqe_size > dev->mdev->caps.max_rq_desc_sz) { + if (wqe_size > gen->max_rq_desc_sz) { mlx5_ib_dbg(dev, "wqe_size %d, max %d\n", wqe_size, - dev->mdev->caps.max_rq_desc_sz); + gen->max_rq_desc_sz); return -EINVAL; } qp->rq.wqe_shift = ilog2(wqe_size); @@ -266,9 +330,11 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr) static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, struct mlx5_ib_qp *qp) { + struct mlx5_general_caps *gen; int wqe_size; int wq_size; + gen = &dev->mdev->caps.gen; if (!attr->cap.max_send_wr) return 0; @@ -277,9 +343,9 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, if (wqe_size < 0) return wqe_size; - if (wqe_size > dev->mdev->caps.max_sq_desc_sz) { + if (wqe_size > gen->max_sq_desc_sz) { mlx5_ib_dbg(dev, "wqe_size(%d) > max_sq_desc_sz(%d)\n", - wqe_size, dev->mdev->caps.max_sq_desc_sz); + wqe_size, gen->max_sq_desc_sz); return -EINVAL; } @@ -292,9 +358,9 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size); qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB; - if (qp->sq.wqe_cnt > dev->mdev->caps.max_wqes) { + if (qp->sq.wqe_cnt > gen->max_wqes) { mlx5_ib_dbg(dev, "wqe count(%d) exceeds limits(%d)\n", - qp->sq.wqe_cnt, dev->mdev->caps.max_wqes); + qp->sq.wqe_cnt, gen->max_wqes); return -ENOMEM; } qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB); @@ -309,11 +375,13 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, struct mlx5_ib_create_qp *ucmd) { + struct mlx5_general_caps *gen; int desc_sz = 1 << qp->sq.wqe_shift; - if (desc_sz > dev->mdev->caps.max_sq_desc_sz) { + gen = &dev->mdev->caps.gen; + if (desc_sz > gen->max_sq_desc_sz) { mlx5_ib_warn(dev, "desc_sz %d, max_sq_desc_sz %d\n", - desc_sz, dev->mdev->caps.max_sq_desc_sz); + desc_sz, gen->max_sq_desc_sz); return -EINVAL; } @@ -325,9 +393,9 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev, qp->sq.wqe_cnt = ucmd->sq_wqe_count; - if (qp->sq.wqe_cnt > dev->mdev->caps.max_wqes) { + if (qp->sq.wqe_cnt > gen->max_wqes) { mlx5_ib_warn(dev, "wqe_cnt %d, max_wqes %d\n", - qp->sq.wqe_cnt, dev->mdev->caps.max_wqes); + qp->sq.wqe_cnt, gen->max_wqes); return -EINVAL; } @@ -641,7 +709,7 @@ err_unmap: mlx5_ib_db_unmap_user(context, &qp->db); err_free: - mlx5_vfree(*in); + kvfree(*in); err_umem: if (qp->umem) @@ -755,7 +823,7 @@ err_wrid: kfree(qp->rq.wrid); err_free: - mlx5_vfree(*in); + kvfree(*in); err_buf: mlx5_buf_free(dev->mdev, &qp->buf); @@ -803,16 +871,20 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_resources *devr = &dev->devr; struct mlx5_ib_create_qp_resp resp; struct mlx5_create_qp_mbox_in *in; + struct mlx5_general_caps *gen; struct mlx5_ib_create_qp ucmd; int inlen = sizeof(*in); int err; + mlx5_ib_odp_create_qp(qp); + + gen = &dev->mdev->caps.gen; mutex_init(&qp->mutex); spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { - if (!(dev->mdev->caps.flags & MLX5_DEV_CAP_FLAG_BLOCK_MCAST)) { + if (!(gen->flags & MLX5_DEV_CAP_FLAG_BLOCK_MCAST)) { mlx5_ib_dbg(dev, "block multicast loopback isn't supported\n"); return -EINVAL; } else { @@ -851,9 +923,9 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, mlx5_ib_dbg(dev, "invalid rq params\n"); return -EINVAL; } - if (ucmd.sq_wqe_count > dev->mdev->caps.max_wqes) { + if (ucmd.sq_wqe_count > gen->max_wqes) { mlx5_ib_dbg(dev, "requested sq_wqe_count (%d) > max allowed (%d)\n", - ucmd.sq_wqe_count, dev->mdev->caps.max_wqes); + ucmd.sq_wqe_count, gen->max_wqes); return -EINVAL; } err = create_user_qp(dev, pd, qp, udata, &in, &resp, &inlen); @@ -963,7 +1035,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, goto err_create; } - mlx5_vfree(in); + kvfree(in); /* Hardware wants QPN written in big-endian order (after * shifting) for send doorbell. Precompute this value to save * a little bit when posting sends. @@ -980,7 +1052,7 @@ err_create: else if (qp->create_type == MLX5_QP_KERNEL) destroy_qp_kernel(dev, qp); - mlx5_vfree(in); + kvfree(in); return err; } @@ -1003,9 +1075,14 @@ static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv } } else { spin_lock_irq(&send_cq->lock); + __acquire(&recv_cq->lock); } } else if (recv_cq) { spin_lock_irq(&recv_cq->lock); + __acquire(&send_cq->lock); + } else { + __acquire(&send_cq->lock); + __acquire(&recv_cq->lock); } } @@ -1025,10 +1102,15 @@ static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *re spin_unlock_irq(&recv_cq->lock); } } else { + __release(&recv_cq->lock); spin_unlock_irq(&send_cq->lock); } } else if (recv_cq) { + __release(&send_cq->lock); spin_unlock_irq(&recv_cq->lock); + } else { + __release(&recv_cq->lock); + __release(&send_cq->lock); } } @@ -1080,11 +1162,13 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) in = kzalloc(sizeof(*in), GFP_KERNEL); if (!in) return; - if (qp->state != IB_QPS_RESET) + if (qp->state != IB_QPS_RESET) { + mlx5_ib_qp_disable_pagefaults(qp); if (mlx5_core_qp_modify(dev->mdev, to_mlx5_state(qp->state), MLX5_QP_STATE_RST, in, sizeof(*in), &qp->mqp)) mlx5_ib_warn(dev, "mlx5_ib: modify QP %06x to RESET failed\n", qp->mqp.qpn); + } get_cqs(qp, &send_cq, &recv_cq); @@ -1144,6 +1228,7 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { + struct mlx5_general_caps *gen; struct mlx5_ib_dev *dev; struct mlx5_ib_qp *qp; u16 xrcdn = 0; @@ -1161,11 +1246,12 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, } dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device); } + gen = &dev->mdev->caps.gen; switch (init_attr->qp_type) { case IB_QPT_XRC_TGT: case IB_QPT_XRC_INI: - if (!(dev->mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC)) { + if (!(gen->flags & MLX5_DEV_CAP_FLAG_XRC)) { mlx5_ib_dbg(dev, "XRC not supported\n"); return ERR_PTR(-ENOSYS); } @@ -1272,6 +1358,9 @@ enum { static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate) { + struct mlx5_general_caps *gen; + + gen = &dev->mdev->caps.gen; if (rate == IB_RATE_PORT_CURRENT) { return 0; } else if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS) { @@ -1279,7 +1368,7 @@ static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate) } else { while (rate != IB_RATE_2_5_GBPS && !(1 << (rate + MLX5_STAT_RATE_OFFSET) & - dev->mdev->caps.stat_rate_support)) + gen->stat_rate_support)) --rate; } @@ -1290,8 +1379,10 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah, struct mlx5_qp_path *path, u8 port, int attr_mask, u32 path_flags, const struct ib_qp_attr *attr) { + struct mlx5_general_caps *gen; int err; + gen = &dev->mdev->caps.gen; path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0; path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 : 0; @@ -1302,6 +1393,11 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah, path->rlid = cpu_to_be16(ah->dlid); if (ah->ah_flags & IB_AH_GRH) { + if (ah->grh.sgid_index >= gen->port[port - 1].gid_table_len) { + pr_err(KERN_ERR "sgid_index (%u) too large. max is %d\n", + ah->grh.sgid_index, gen->port[port - 1].gid_table_len); + return -EINVAL; + } path->grh_mlid |= 1 << 7; path->mgid_index = ah->grh.sgid_index; path->hop_limit = ah->grh.hop_limit; @@ -1317,22 +1413,6 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah, path->static_rate = err; path->port = port; - if (ah->ah_flags & IB_AH_GRH) { - if (ah->grh.sgid_index >= dev->mdev->caps.port[port - 1].gid_table_len) { - pr_err(KERN_ERR "sgid_index (%u) too large. max is %d\n", - ah->grh.sgid_index, dev->mdev->caps.port[port - 1].gid_table_len); - return -EINVAL; - } - - path->grh_mlid |= 1 << 7; - path->mgid_index = ah->grh.sgid_index; - path->hop_limit = ah->grh.hop_limit; - path->tclass_flowlabel = - cpu_to_be32((ah->grh.traffic_class << 20) | - (ah->grh.flow_label)); - memcpy(path->rgid, ah->grh.dgid.raw, 16); - } - if (attr_mask & IB_QP_TIMEOUT) path->ackto_lt = attr->timeout << 3; @@ -1492,6 +1572,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, struct mlx5_ib_qp *qp = to_mqp(ibqp); struct mlx5_ib_cq *send_cq, *recv_cq; struct mlx5_qp_context *context; + struct mlx5_general_caps *gen; struct mlx5_modify_qp_mbox_in *in; struct mlx5_ib_pd *pd; enum mlx5_qp_state mlx5_cur, mlx5_new; @@ -1500,6 +1581,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, int mlx5_st; int err; + gen = &dev->mdev->caps.gen; in = kzalloc(sizeof(*in), GFP_KERNEL); if (!in) return -ENOMEM; @@ -1539,7 +1621,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, err = -EINVAL; goto out; } - context->mtu_msgmax = (attr->path_mtu << 5) | dev->mdev->caps.log_max_msg; + context->mtu_msgmax = (attr->path_mtu << 5) | gen->log_max_msg; } if (attr_mask & IB_QP_DEST_QPN) @@ -1634,6 +1716,15 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, if (mlx5_st < 0) goto out; + /* If moving to a reset or error state, we must disable page faults on + * this QP and flush all current page faults. Otherwise a stale page + * fault may attempt to work on this QP after it is reset and moved + * again to RTS, and may cause the driver and the device to get out of + * sync. */ + if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR && + (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR)) + mlx5_ib_qp_disable_pagefaults(qp); + optpar = ib_mask_to_mlx5_opt(attr_mask); optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st]; in->optparam = cpu_to_be32(optpar); @@ -1643,6 +1734,9 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, if (err) goto out; + if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) + mlx5_ib_qp_enable_pagefaults(qp); + qp->state = new_state; if (attr_mask & IB_QP_ACCESS_FLAGS) @@ -1685,9 +1779,11 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct mlx5_ib_dev *dev = to_mdev(ibqp->device); struct mlx5_ib_qp *qp = to_mqp(ibqp); enum ib_qp_state cur_state, new_state; + struct mlx5_general_caps *gen; int err = -EINVAL; int port; + gen = &dev->mdev->caps.gen; mutex_lock(&qp->mutex); cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; @@ -1699,21 +1795,21 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto out; if ((attr_mask & IB_QP_PORT) && - (attr->port_num == 0 || attr->port_num > dev->mdev->caps.num_ports)) + (attr->port_num == 0 || attr->port_num > gen->num_ports)) goto out; if (attr_mask & IB_QP_PKEY_INDEX) { port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; - if (attr->pkey_index >= dev->mdev->caps.port[port - 1].pkey_table_len) + if (attr->pkey_index >= gen->port[port - 1].pkey_table_len) goto out; } if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && - attr->max_rd_atomic > dev->mdev->caps.max_ra_res_qp) + attr->max_rd_atomic > (1 << gen->log_max_ra_res_qp)) goto out; if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && - attr->max_dest_rd_atomic > dev->mdev->caps.max_ra_req_qp) + attr->max_dest_rd_atomic > (1 << gen->log_max_ra_req_qp)) goto out; if (cur_state == new_state && cur_state == IB_QPS_RESET) { @@ -1830,37 +1926,70 @@ static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, umr->mkey_mask = frwr_mkey_mask(); } +static __be64 get_umr_reg_mr_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_LEN | + MLX5_MKEY_MASK_PAGE_SIZE | + MLX5_MKEY_MASK_START_ADDR | + MLX5_MKEY_MASK_PD | + MLX5_MKEY_MASK_LR | + MLX5_MKEY_MASK_LW | + MLX5_MKEY_MASK_KEY | + MLX5_MKEY_MASK_RR | + MLX5_MKEY_MASK_RW | + MLX5_MKEY_MASK_A | + MLX5_MKEY_MASK_FREE; + + return cpu_to_be64(result); +} + +static __be64 get_umr_unreg_mr_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_FREE; + + return cpu_to_be64(result); +} + +static __be64 get_umr_update_mtt_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_FREE; + + return cpu_to_be64(result); +} + static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, struct ib_send_wr *wr) { - struct umr_wr *umrwr = (struct umr_wr *)&wr->wr.fast_reg; - u64 mask; + struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg; memset(umr, 0, sizeof(*umr)); + if (wr->send_flags & MLX5_IB_SEND_UMR_FAIL_IF_FREE) + umr->flags = MLX5_UMR_CHECK_FREE; /* fail if free */ + else + umr->flags = MLX5_UMR_CHECK_NOT_FREE; /* fail if not free */ + if (!(wr->send_flags & MLX5_IB_SEND_UMR_UNREG)) { - umr->flags = 1 << 5; /* fail if not free */ umr->klm_octowords = get_klm_octo(umrwr->npages); - mask = MLX5_MKEY_MASK_LEN | - MLX5_MKEY_MASK_PAGE_SIZE | - MLX5_MKEY_MASK_START_ADDR | - MLX5_MKEY_MASK_PD | - MLX5_MKEY_MASK_LR | - MLX5_MKEY_MASK_LW | - MLX5_MKEY_MASK_KEY | - MLX5_MKEY_MASK_RR | - MLX5_MKEY_MASK_RW | - MLX5_MKEY_MASK_A | - MLX5_MKEY_MASK_FREE; - umr->mkey_mask = cpu_to_be64(mask); + if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_MTT) { + umr->mkey_mask = get_umr_update_mtt_mask(); + umr->bsf_octowords = get_klm_octo(umrwr->target.offset); + umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN; + } else { + umr->mkey_mask = get_umr_reg_mr_mask(); + } } else { - umr->flags = 2 << 5; /* fail if free */ - mask = MLX5_MKEY_MASK_FREE; - umr->mkey_mask = cpu_to_be64(mask); + umr->mkey_mask = get_umr_unreg_mr_mask(); } if (!wr->num_sge) - umr->flags |= (1 << 7); /* inline */ + umr->flags |= MLX5_UMR_INLINE; } static u8 get_umr_flags(int acc) @@ -1877,7 +2006,7 @@ static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr, { memset(seg, 0, sizeof(*seg)); if (li) { - seg->status = 1 << 6; + seg->status = MLX5_MKEY_STATUS_FREE; return; } @@ -1894,19 +2023,23 @@ static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr, static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr) { + struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg; + memset(seg, 0, sizeof(*seg)); if (wr->send_flags & MLX5_IB_SEND_UMR_UNREG) { - seg->status = 1 << 6; + seg->status = MLX5_MKEY_STATUS_FREE; return; } - seg->flags = convert_access(wr->wr.fast_reg.access_flags); - seg->flags_pd = cpu_to_be32(to_mpd((struct ib_pd *)wr->wr.fast_reg.page_list)->pdn); - seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start); - seg->len = cpu_to_be64(wr->wr.fast_reg.length); - seg->log2_page_size = wr->wr.fast_reg.page_shift; + seg->flags = convert_access(umrwr->access_flags); + if (!(wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_MTT)) { + seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn); + seg->start_addr = cpu_to_be64(umrwr->target.virt_addr); + } + seg->len = cpu_to_be64(umrwr->length); + seg->log2_page_size = umrwr->page_shift; seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 | - mlx5_mkey_variant(wr->wr.fast_reg.rkey)); + mlx5_mkey_variant(umrwr->mkey)); } static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg, @@ -2020,56 +2153,31 @@ static u8 bs_selector(int block_size) } } -static int format_selector(struct ib_sig_attrs *attr, - struct ib_sig_domain *domain, - int *selector) +static void mlx5_fill_inl_bsf(struct ib_sig_domain *domain, + struct mlx5_bsf_inl *inl) { + /* Valid inline section and allow BSF refresh */ + inl->vld_refresh = cpu_to_be16(MLX5_BSF_INL_VALID | + MLX5_BSF_REFRESH_DIF); + inl->dif_apptag = cpu_to_be16(domain->sig.dif.app_tag); + inl->dif_reftag = cpu_to_be32(domain->sig.dif.ref_tag); + /* repeating block */ + inl->rp_inv_seed = MLX5_BSF_REPEAT_BLOCK; + inl->sig_type = domain->sig.dif.bg_type == IB_T10DIF_CRC ? + MLX5_DIF_CRC : MLX5_DIF_IPCS; -#define FORMAT_DIF_NONE 0 -#define FORMAT_DIF_CRC_INC 8 -#define FORMAT_DIF_CRC_NO_INC 12 -#define FORMAT_DIF_CSUM_INC 13 -#define FORMAT_DIF_CSUM_NO_INC 14 + if (domain->sig.dif.ref_remap) + inl->dif_inc_ref_guard_check |= MLX5_BSF_INC_REFTAG; - switch (domain->sig.dif.type) { - case IB_T10DIF_NONE: - /* No DIF */ - *selector = FORMAT_DIF_NONE; - break; - case IB_T10DIF_TYPE1: /* Fall through */ - case IB_T10DIF_TYPE2: - switch (domain->sig.dif.bg_type) { - case IB_T10DIF_CRC: - *selector = FORMAT_DIF_CRC_INC; - break; - case IB_T10DIF_CSUM: - *selector = FORMAT_DIF_CSUM_INC; - break; - default: - return 1; - } - break; - case IB_T10DIF_TYPE3: - switch (domain->sig.dif.bg_type) { - case IB_T10DIF_CRC: - *selector = domain->sig.dif.type3_inc_reftag ? - FORMAT_DIF_CRC_INC : - FORMAT_DIF_CRC_NO_INC; - break; - case IB_T10DIF_CSUM: - *selector = domain->sig.dif.type3_inc_reftag ? - FORMAT_DIF_CSUM_INC : - FORMAT_DIF_CSUM_NO_INC; - break; - default: - return 1; - } - break; - default: - return 1; + if (domain->sig.dif.app_escape) { + if (domain->sig.dif.ref_escape) + inl->dif_inc_ref_guard_check |= MLX5_BSF_APPREF_ESCAPE; + else + inl->dif_inc_ref_guard_check |= MLX5_BSF_APPTAG_ESCAPE; } - return 0; + inl->dif_app_bitmask_check = + cpu_to_be16(domain->sig.dif.apptag_check_mask); } static int mlx5_set_bsf(struct ib_mr *sig_mr, @@ -2080,45 +2188,49 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr, struct mlx5_bsf_basic *basic = &bsf->basic; struct ib_sig_domain *mem = &sig_attrs->mem; struct ib_sig_domain *wire = &sig_attrs->wire; - int ret, selector; memset(bsf, 0, sizeof(*bsf)); + + /* Basic + Extended + Inline */ + basic->bsf_size_sbs = 1 << 7; + /* Input domain check byte mask */ + basic->check_byte_mask = sig_attrs->check_mask; + basic->raw_data_size = cpu_to_be32(data_size); + + /* Memory domain */ switch (sig_attrs->mem.sig_type) { + case IB_SIG_TYPE_NONE: + break; case IB_SIG_TYPE_T10_DIF: - if (sig_attrs->wire.sig_type != IB_SIG_TYPE_T10_DIF) - return -EINVAL; + basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval); + basic->m_bfs_psv = cpu_to_be32(msig->psv_memory.psv_idx); + mlx5_fill_inl_bsf(mem, &bsf->m_inl); + break; + default: + return -EINVAL; + } - /* Input domain check byte mask */ - basic->check_byte_mask = sig_attrs->check_mask; + /* Wire domain */ + switch (sig_attrs->wire.sig_type) { + case IB_SIG_TYPE_NONE: + break; + case IB_SIG_TYPE_T10_DIF: if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval && - mem->sig.dif.type == wire->sig.dif.type) { + mem->sig_type == wire->sig_type) { /* Same block structure */ - basic->bsf_size_sbs = 1 << 4; + basic->bsf_size_sbs |= 1 << 4; if (mem->sig.dif.bg_type == wire->sig.dif.bg_type) - basic->wire.copy_byte_mask |= 0xc0; + basic->wire.copy_byte_mask |= MLX5_CPY_GRD_MASK; if (mem->sig.dif.app_tag == wire->sig.dif.app_tag) - basic->wire.copy_byte_mask |= 0x30; + basic->wire.copy_byte_mask |= MLX5_CPY_APP_MASK; if (mem->sig.dif.ref_tag == wire->sig.dif.ref_tag) - basic->wire.copy_byte_mask |= 0x0f; + basic->wire.copy_byte_mask |= MLX5_CPY_REF_MASK; } else basic->wire.bs_selector = bs_selector(wire->sig.dif.pi_interval); - basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval); - basic->raw_data_size = cpu_to_be32(data_size); - - ret = format_selector(sig_attrs, mem, &selector); - if (ret) - return -EINVAL; - basic->m_bfs_psv = cpu_to_be32(selector << 24 | - msig->psv_memory.psv_idx); - - ret = format_selector(sig_attrs, wire, &selector); - if (ret) - return -EINVAL; - basic->w_bfs_psv = cpu_to_be32(selector << 24 | - msig->psv_wire.psv_idx); + basic->w_bfs_psv = cpu_to_be32(msig->psv_wire.psv_idx); + mlx5_fill_inl_bsf(wire, &bsf->w_inl); break; - default: return -EINVAL; } @@ -2317,20 +2429,21 @@ static int set_psv_wr(struct ib_sig_domain *domain, memset(psv_seg, 0, sizeof(*psv_seg)); psv_seg->psv_num = cpu_to_be32(psv_idx); switch (domain->sig_type) { + case IB_SIG_TYPE_NONE: + break; case IB_SIG_TYPE_T10_DIF: psv_seg->transient_sig = cpu_to_be32(domain->sig.dif.bg << 16 | domain->sig.dif.app_tag); psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag); - - *seg += sizeof(*psv_seg); - *size += sizeof(*psv_seg) / 16; break; - default: pr_err("Bad signature type given.\n"); return 1; } + *seg += sizeof(*psv_seg); + *size += sizeof(*psv_seg) / 16; + return 0; } @@ -2423,7 +2536,7 @@ static u8 get_fence(u8 fence, struct ib_send_wr *wr) static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, struct mlx5_wqe_ctrl_seg **ctrl, - struct ib_send_wr *wr, int *idx, + struct ib_send_wr *wr, unsigned *idx, int *size, int nreq) { int err = 0; @@ -2749,6 +2862,8 @@ out: if (bf->need_lock) spin_lock(&bf->lock); + else + __acquire(&bf->lock); /* TBD enable WC */ if (0 && nreq == 1 && bf->uuarn && inl && size > 1 && size <= bf->buf_size / 16) { @@ -2765,6 +2880,8 @@ out: bf->offset ^= bf->buf_size; if (bf->need_lock) spin_unlock(&bf->lock); + else + __release(&bf->lock); } spin_unlock_irqrestore(&qp->sq.lock, flags); @@ -2893,7 +3010,8 @@ static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_at memset(ib_ah_attr, 0, sizeof(*ib_ah_attr)); ib_ah_attr->port_num = path->port; - if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports) + if (ib_ah_attr->port_num == 0 || + ib_ah_attr->port_num > dev->caps.gen.num_ports) return; ib_ah_attr->sl = path->sl & 0xf; @@ -2924,6 +3042,14 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr int mlx5_state; int err = 0; +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + /* + * Wait for any outstanding page faults, in case the user frees memory + * based upon this query's result. + */ + flush_workqueue(mlx5_ib_page_fault_wq); +#endif + mutex_lock(&qp->mutex); outb = kzalloc(sizeof(*outb), GFP_KERNEL); if (!outb) { @@ -3011,10 +3137,12 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_general_caps *gen; struct mlx5_ib_xrcd *xrcd; int err; - if (!(dev->mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC)) + gen = &dev->mdev->caps.gen; + if (!(gen->flags & MLX5_DEV_CAP_FLAG_XRC)) return ERR_PTR(-ENOSYS); xrcd = kmalloc(sizeof(*xrcd), GFP_KERNEL); diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 70bd131ba646..41fec66217dd 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -141,7 +141,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, return 0; err_in: - mlx5_vfree(*in); + kvfree(*in); err_umem: ib_umem_release(srq->umem); @@ -209,7 +209,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, return 0; err_in: - mlx5_vfree(*in); + kvfree(*in); err_buf: mlx5_buf_free(dev->mdev, &srq->buf); @@ -238,6 +238,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_general_caps *gen; struct mlx5_ib_srq *srq; int desc_size; int buf_size; @@ -247,11 +248,12 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, int is_xrc; u32 flgs, xrcdn; + gen = &dev->mdev->caps.gen; /* Sanity check SRQ size before proceeding */ - if (init_attr->attr.max_wr >= dev->mdev->caps.max_srq_wqes) { + if (init_attr->attr.max_wr >= gen->max_srq_wqes) { mlx5_ib_dbg(dev, "max_wr %d, cap %d\n", init_attr->attr.max_wr, - dev->mdev->caps.max_srq_wqes); + gen->max_srq_wqes); return ERR_PTR(-EINVAL); } @@ -304,7 +306,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, in->ctx.pd = cpu_to_be32(to_mpd(pd)->pdn); in->ctx.db_record = cpu_to_be64(srq->db.dma); err = mlx5_core_create_srq(dev->mdev, &srq->msrq, in, inlen); - mlx5_vfree(in); + kvfree(in); if (err) { mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err); goto err_usr_kern_srq; diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index fef067c959fc..c0d0296e7a00 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -2341,9 +2341,9 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, nes_debug(NES_DBG_MR, "User base = 0x%lX, Virt base = 0x%lX, length = %u," " offset = %u, page size = %u.\n", (unsigned long int)start, (unsigned long int)virt, (u32)length, - region->offset, region->page_size); + ib_umem_offset(region), region->page_size); - skip_pages = ((u32)region->offset) >> 12; + skip_pages = ((u32)ib_umem_offset(region)) >> 12; if (ib_copy_from_udata(&req, udata, sizeof(req))) { ib_umem_release(region); @@ -2408,7 +2408,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, region_length -= skip_pages << 12; for (page_index = skip_pages; page_index < chunk_pages; page_index++) { skip_pages = 0; - if ((page_count != 0) && (page_count<<12)-(region->offset&(4096-1)) >= region->length) + if ((page_count != 0) && (page_count << 12) - (ib_umem_offset(region) & (4096 - 1)) >= region->length) goto enough_pages; if ((page_count&0x01FF) == 0) { if (page_count >= 1024 * 512) { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index ac02ce4e8040..f3cc8c9e65ae 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -96,7 +96,6 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); union ib_gid sgid; - u8 zmac[ETH_ALEN]; if (!(attr->ah_flags & IB_AH_GRH)) return ERR_PTR(-EINVAL); @@ -118,9 +117,7 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) goto av_conf_err; } - memset(&zmac, 0, ETH_ALEN); - if (pd->uctx && - memcmp(attr->dmac, &zmac, ETH_ALEN)) { + if (pd->uctx) { status = rdma_addr_find_dmac_by_grh(&sgid, &attr->grh.dgid, attr->dmac, &attr->vlan_id); if (status) { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index dd35ae558ae1..638bff1ffc6c 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -348,11 +348,6 @@ static void *ocrdma_init_emb_mqe(u8 opcode, u32 cmd_len) return mqe; } -static void *ocrdma_alloc_mqe(void) -{ - return kzalloc(sizeof(struct ocrdma_mqe), GFP_KERNEL); -} - static void ocrdma_free_q(struct ocrdma_dev *dev, struct ocrdma_queue_info *q) { dma_free_coherent(&dev->nic_info.pdev->dev, q->size, q->va, q->dma); @@ -566,8 +561,8 @@ static int ocrdma_mbx_create_mq(struct ocrdma_dev *dev, cmd->cqid_pages |= (cq->id << OCRDMA_CREATE_MQ_CQ_ID_SHIFT); cmd->async_cqid_valid = OCRDMA_CREATE_MQ_ASYNC_CQ_VALID; - cmd->async_event_bitmap = Bit(OCRDMA_ASYNC_GRP5_EVE_CODE); - cmd->async_event_bitmap |= Bit(OCRDMA_ASYNC_RDMA_EVE_CODE); + cmd->async_event_bitmap = BIT(OCRDMA_ASYNC_GRP5_EVE_CODE); + cmd->async_event_bitmap |= BIT(OCRDMA_ASYNC_RDMA_EVE_CODE); cmd->async_cqid_ringsize = cq->id; cmd->async_cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) << @@ -1189,10 +1184,10 @@ int ocrdma_mbx_rdma_stats(struct ocrdma_dev *dev, bool reset) { struct ocrdma_rdma_stats_req *req = dev->stats_mem.va; struct ocrdma_mqe *mqe = &dev->stats_mem.mqe; - struct ocrdma_rdma_stats_resp *old_stats = NULL; + struct ocrdma_rdma_stats_resp *old_stats; int status; - old_stats = kzalloc(sizeof(*old_stats), GFP_KERNEL); + old_stats = kmalloc(sizeof(*old_stats), GFP_KERNEL); if (old_stats == NULL) return -ENOMEM; @@ -1235,10 +1230,9 @@ static int ocrdma_mbx_get_ctrl_attribs(struct ocrdma_dev *dev) struct ocrdma_get_ctrl_attribs_rsp *ctrl_attr_rsp; struct mgmt_hba_attribs *hba_attribs; - mqe = ocrdma_alloc_mqe(); + mqe = kzalloc(sizeof(struct ocrdma_mqe), GFP_KERNEL); if (!mqe) return status; - memset(mqe, 0, sizeof(*mqe)); dma.size = sizeof(struct ocrdma_get_ctrl_attribs_rsp); dma.va = dma_alloc_coherent(&dev->nic_info.pdev->dev, @@ -2279,7 +2273,8 @@ mbx_err: static int ocrdma_set_av_params(struct ocrdma_qp *qp, struct ocrdma_modify_qp *cmd, - struct ib_qp_attr *attrs) + struct ib_qp_attr *attrs, + int attr_mask) { int status; struct ib_ah_attr *ah_attr = &attrs->ah_attr; @@ -2319,8 +2314,8 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, ocrdma_cpu_to_le32(&cmd->params.dgid[0], sizeof(cmd->params.dgid)); ocrdma_cpu_to_le32(&cmd->params.sgid[0], sizeof(cmd->params.sgid)); cmd->params.vlan_dmac_b4_to_b5 = mac_addr[4] | (mac_addr[5] << 8); - vlan_id = ah_attr->vlan_id; - if (vlan_id && (vlan_id < 0x1000)) { + if (attr_mask & IB_QP_VID) { + vlan_id = attrs->vlan_id; cmd->params.vlan_dmac_b4_to_b5 |= vlan_id << OCRDMA_QP_PARAMS_VLAN_SHIFT; cmd->flags |= OCRDMA_QP_PARA_VLAN_EN_VALID; @@ -2347,7 +2342,7 @@ static int ocrdma_set_qp_params(struct ocrdma_qp *qp, cmd->flags |= OCRDMA_QP_PARA_QKEY_VALID; } if (attr_mask & IB_QP_AV) { - status = ocrdma_set_av_params(qp, cmd, attrs); + status = ocrdma_set_av_params(qp, cmd, attrs, attr_mask); if (status) return status; } else if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_UD) { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 256a06bc0b68..b0b2257b8e04 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -388,6 +388,15 @@ static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev) device_remove_file(&dev->ibdev.dev, ocrdma_attributes[i]); } +static void ocrdma_add_default_sgid(struct ocrdma_dev *dev) +{ + /* GID Index 0 - Invariant manufacturer-assigned EUI-64 */ + union ib_gid *sgid = &dev->sgid_tbl[0]; + + sgid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); + ocrdma_get_guid(dev, &sgid->raw[8]); +} + static void ocrdma_init_ipv4_gids(struct ocrdma_dev *dev, struct net_device *net) { @@ -434,6 +443,7 @@ static void ocrdma_init_gid_table(struct ocrdma_dev *dev) rdma_vlan_dev_real_dev(net_dev) : net_dev; if (real_dev == dev->nic_info.netdev) { + ocrdma_add_default_sgid(dev); ocrdma_init_ipv4_gids(dev, net_dev); ocrdma_init_ipv6_gids(dev, net_dev); } @@ -646,8 +656,10 @@ static int __init ocrdma_init_module(void) return 0; err_be_reg: +#if IS_ENABLED(CONFIG_IPV6) ocrdma_unregister_inet6addr_notifier(); err_notifier6: +#endif ocrdma_unregister_inetaddr_notifier(); return status; } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index 904989ec5eaa..4e036480c1a8 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -28,8 +28,6 @@ #ifndef __OCRDMA_SLI_H__ #define __OCRDMA_SLI_H__ -#define Bit(_b) (1 << (_b)) - enum { OCRDMA_ASIC_GEN_SKH_R = 0x04, OCRDMA_ASIC_GEN_LANCER = 0x0B @@ -103,7 +101,7 @@ enum { QTYPE_MCCQ = 3 }; -#define OCRDMA_MAX_SGID (8) +#define OCRDMA_MAX_SGID 8 #define OCRDMA_MAX_QP 2048 #define OCRDMA_MAX_CQ 2048 @@ -128,33 +126,33 @@ enum { #define OCRDMA_DB_CQ_RING_ID_EXT_MASK 0x0C00 /* bits 10-11 of qid at 12-11 */ /* qid #2 msbits at 12-11 */ #define OCRDMA_DB_CQ_RING_ID_EXT_MASK_SHIFT 0x1 -#define OCRDMA_DB_CQ_NUM_POPPED_SHIFT (16) /* bits 16 - 28 */ +#define OCRDMA_DB_CQ_NUM_POPPED_SHIFT 16 /* bits 16 - 28 */ /* Rearm bit */ -#define OCRDMA_DB_CQ_REARM_SHIFT (29) /* bit 29 */ +#define OCRDMA_DB_CQ_REARM_SHIFT 29 /* bit 29 */ /* solicited bit */ -#define OCRDMA_DB_CQ_SOLICIT_SHIFT (31) /* bit 31 */ +#define OCRDMA_DB_CQ_SOLICIT_SHIFT 31 /* bit 31 */ #define OCRDMA_EQ_ID_MASK 0x1FF /* bits 0 - 8 */ #define OCRDMA_EQ_ID_EXT_MASK 0x3e00 /* bits 9-13 */ -#define OCRDMA_EQ_ID_EXT_MASK_SHIFT (2) /* qid bits 9-13 at 11-15 */ +#define OCRDMA_EQ_ID_EXT_MASK_SHIFT 2 /* qid bits 9-13 at 11-15 */ /* Clear the interrupt for this eq */ -#define OCRDMA_EQ_CLR_SHIFT (9) /* bit 9 */ +#define OCRDMA_EQ_CLR_SHIFT 9 /* bit 9 */ /* Must be 1 */ -#define OCRDMA_EQ_TYPE_SHIFT (10) /* bit 10 */ +#define OCRDMA_EQ_TYPE_SHIFT 10 /* bit 10 */ /* Number of event entries processed */ -#define OCRDMA_NUM_EQE_SHIFT (16) /* bits 16 - 28 */ +#define OCRDMA_NUM_EQE_SHIFT 16 /* bits 16 - 28 */ /* Rearm bit */ -#define OCRDMA_REARM_SHIFT (29) /* bit 29 */ +#define OCRDMA_REARM_SHIFT 29 /* bit 29 */ #define OCRDMA_MQ_ID_MASK 0x7FF /* bits 0 - 10 */ /* Number of entries posted */ -#define OCRDMA_MQ_NUM_MQE_SHIFT (16) /* bits 16 - 29 */ +#define OCRDMA_MQ_NUM_MQE_SHIFT 16 /* bits 16 - 29 */ -#define OCRDMA_MIN_HPAGE_SIZE (4096) +#define OCRDMA_MIN_HPAGE_SIZE 4096 -#define OCRDMA_MIN_Q_PAGE_SIZE (4096) -#define OCRDMA_MAX_Q_PAGES (8) +#define OCRDMA_MIN_Q_PAGE_SIZE 4096 +#define OCRDMA_MAX_Q_PAGES 8 #define OCRDMA_SLI_ASIC_ID_OFFSET 0x9C #define OCRDMA_SLI_ASIC_REV_MASK 0x000000FF @@ -170,14 +168,14 @@ enum { # 6: 256K Bytes # 7: 512K Bytes */ -#define OCRDMA_MAX_Q_PAGE_SIZE_CNT (8) +#define OCRDMA_MAX_Q_PAGE_SIZE_CNT 8 #define OCRDMA_Q_PAGE_BASE_SIZE (OCRDMA_MIN_Q_PAGE_SIZE * OCRDMA_MAX_Q_PAGES) -#define MAX_OCRDMA_QP_PAGES (8) +#define MAX_OCRDMA_QP_PAGES 8 #define OCRDMA_MAX_WQE_MEM_SIZE (MAX_OCRDMA_QP_PAGES * OCRDMA_MIN_HQ_PAGE_SIZE) -#define OCRDMA_CREATE_CQ_MAX_PAGES (4) -#define OCRDMA_DPP_CQE_SIZE (4) +#define OCRDMA_CREATE_CQ_MAX_PAGES 4 +#define OCRDMA_DPP_CQE_SIZE 4 #define OCRDMA_GEN2_MAX_CQE 1024 #define OCRDMA_GEN2_CQ_PAGE_SIZE 4096 @@ -238,7 +236,7 @@ struct ocrdma_mqe_sge { enum { OCRDMA_MQE_HDR_EMB_SHIFT = 0, - OCRDMA_MQE_HDR_EMB_MASK = Bit(0), + OCRDMA_MQE_HDR_EMB_MASK = BIT(0), OCRDMA_MQE_HDR_SGE_CNT_SHIFT = 3, OCRDMA_MQE_HDR_SGE_CNT_MASK = 0x1F << OCRDMA_MQE_HDR_SGE_CNT_SHIFT, OCRDMA_MQE_HDR_SPECIAL_SHIFT = 24, @@ -292,7 +290,7 @@ struct ocrdma_pa { u32 hi; }; -#define MAX_OCRDMA_EQ_PAGES (8) +#define MAX_OCRDMA_EQ_PAGES 8 struct ocrdma_create_eq_req { struct ocrdma_mbx_hdr req; u32 num_pages; @@ -304,7 +302,7 @@ struct ocrdma_create_eq_req { }; enum { - OCRDMA_CREATE_EQ_VALID = Bit(29), + OCRDMA_CREATE_EQ_VALID = BIT(29), OCRDMA_CREATE_EQ_CNT_SHIFT = 26, OCRDMA_CREATE_CQ_DELAY_SHIFT = 13, }; @@ -314,7 +312,7 @@ struct ocrdma_create_eq_rsp { u32 vector_eqid; }; -#define OCRDMA_EQ_MINOR_OTHER (0x1) +#define OCRDMA_EQ_MINOR_OTHER 0x1 enum { OCRDMA_MCQE_STATUS_SHIFT = 0, @@ -322,13 +320,13 @@ enum { OCRDMA_MCQE_ESTATUS_SHIFT = 16, OCRDMA_MCQE_ESTATUS_MASK = 0xFFFF << OCRDMA_MCQE_ESTATUS_SHIFT, OCRDMA_MCQE_CONS_SHIFT = 27, - OCRDMA_MCQE_CONS_MASK = Bit(27), + OCRDMA_MCQE_CONS_MASK = BIT(27), OCRDMA_MCQE_CMPL_SHIFT = 28, - OCRDMA_MCQE_CMPL_MASK = Bit(28), + OCRDMA_MCQE_CMPL_MASK = BIT(28), OCRDMA_MCQE_AE_SHIFT = 30, - OCRDMA_MCQE_AE_MASK = Bit(30), + OCRDMA_MCQE_AE_MASK = BIT(30), OCRDMA_MCQE_VALID_SHIFT = 31, - OCRDMA_MCQE_VALID_MASK = Bit(31) + OCRDMA_MCQE_VALID_MASK = BIT(31) }; struct ocrdma_mcqe { @@ -339,13 +337,13 @@ struct ocrdma_mcqe { }; enum { - OCRDMA_AE_MCQE_QPVALID = Bit(31), + OCRDMA_AE_MCQE_QPVALID = BIT(31), OCRDMA_AE_MCQE_QPID_MASK = 0xFFFF, - OCRDMA_AE_MCQE_CQVALID = Bit(31), + OCRDMA_AE_MCQE_CQVALID = BIT(31), OCRDMA_AE_MCQE_CQID_MASK = 0xFFFF, - OCRDMA_AE_MCQE_VALID = Bit(31), - OCRDMA_AE_MCQE_AE = Bit(30), + OCRDMA_AE_MCQE_VALID = BIT(31), + OCRDMA_AE_MCQE_AE = BIT(30), OCRDMA_AE_MCQE_EVENT_TYPE_SHIFT = 16, OCRDMA_AE_MCQE_EVENT_TYPE_MASK = 0xFF << OCRDMA_AE_MCQE_EVENT_TYPE_SHIFT, @@ -386,9 +384,9 @@ enum { OCRDMA_AE_MPA_MCQE_EVENT_TYPE_MASK = 0xFF << OCRDMA_AE_MPA_MCQE_EVENT_TYPE_SHIFT, OCRDMA_AE_MPA_MCQE_EVENT_AE_SHIFT = 30, - OCRDMA_AE_MPA_MCQE_EVENT_AE_MASK = Bit(30), + OCRDMA_AE_MPA_MCQE_EVENT_AE_MASK = BIT(30), OCRDMA_AE_MPA_MCQE_EVENT_VALID_SHIFT = 31, - OCRDMA_AE_MPA_MCQE_EVENT_VALID_MASK = Bit(31) + OCRDMA_AE_MPA_MCQE_EVENT_VALID_MASK = BIT(31) }; struct ocrdma_ae_mpa_mcqe { @@ -412,9 +410,9 @@ enum { OCRDMA_AE_QP_MCQE_EVENT_TYPE_MASK = 0xFF << OCRDMA_AE_QP_MCQE_EVENT_TYPE_SHIFT, OCRDMA_AE_QP_MCQE_EVENT_AE_SHIFT = 30, - OCRDMA_AE_QP_MCQE_EVENT_AE_MASK = Bit(30), + OCRDMA_AE_QP_MCQE_EVENT_AE_MASK = BIT(30), OCRDMA_AE_QP_MCQE_EVENT_VALID_SHIFT = 31, - OCRDMA_AE_QP_MCQE_EVENT_VALID_MASK = Bit(31) + OCRDMA_AE_QP_MCQE_EVENT_VALID_MASK = BIT(31) }; struct ocrdma_ae_qp_mcqe { @@ -449,9 +447,9 @@ enum OCRDMA_ASYNC_EVENT_TYPE { /* mailbox command request and responses */ enum { OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_SHIFT = 2, - OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_MASK = Bit(2), + OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_MASK = BIT(2), OCRDMA_MBX_QUERY_CFG_SRQ_SUPPORTED_SHIFT = 3, - OCRDMA_MBX_QUERY_CFG_SRQ_SUPPORTED_MASK = Bit(3), + OCRDMA_MBX_QUERY_CFG_SRQ_SUPPORTED_MASK = BIT(3), OCRDMA_MBX_QUERY_CFG_MAX_QP_SHIFT = 8, OCRDMA_MBX_QUERY_CFG_MAX_QP_MASK = 0xFFFFFF << OCRDMA_MBX_QUERY_CFG_MAX_QP_SHIFT, @@ -672,9 +670,9 @@ enum { OCRDMA_CREATE_CQ_PAGE_SIZE_MASK = 0xFF, OCRDMA_CREATE_CQ_COALESCWM_SHIFT = 12, - OCRDMA_CREATE_CQ_COALESCWM_MASK = Bit(13) | Bit(12), - OCRDMA_CREATE_CQ_FLAGS_NODELAY = Bit(14), - OCRDMA_CREATE_CQ_FLAGS_AUTO_VALID = Bit(15), + OCRDMA_CREATE_CQ_COALESCWM_MASK = BIT(13) | BIT(12), + OCRDMA_CREATE_CQ_FLAGS_NODELAY = BIT(14), + OCRDMA_CREATE_CQ_FLAGS_AUTO_VALID = BIT(15), OCRDMA_CREATE_CQ_EQ_ID_MASK = 0xFFFF, OCRDMA_CREATE_CQ_CQE_COUNT_MASK = 0xFFFF @@ -687,8 +685,8 @@ enum { OCRDMA_CREATE_CQ_EQID_SHIFT = 22, OCRDMA_CREATE_CQ_CNT_SHIFT = 27, - OCRDMA_CREATE_CQ_FLAGS_VALID = Bit(29), - OCRDMA_CREATE_CQ_FLAGS_EVENTABLE = Bit(31), + OCRDMA_CREATE_CQ_FLAGS_VALID = BIT(29), + OCRDMA_CREATE_CQ_FLAGS_EVENTABLE = BIT(31), OCRDMA_CREATE_CQ_DEF_FLAGS = OCRDMA_CREATE_CQ_FLAGS_VALID | OCRDMA_CREATE_CQ_FLAGS_EVENTABLE | OCRDMA_CREATE_CQ_FLAGS_NODELAY @@ -731,8 +729,8 @@ enum { OCRDMA_CREATE_MQ_V0_CQ_ID_SHIFT = 22, OCRDMA_CREATE_MQ_CQ_ID_SHIFT = 16, OCRDMA_CREATE_MQ_RING_SIZE_SHIFT = 16, - OCRDMA_CREATE_MQ_VALID = Bit(31), - OCRDMA_CREATE_MQ_ASYNC_CQ_VALID = Bit(0) + OCRDMA_CREATE_MQ_VALID = BIT(31), + OCRDMA_CREATE_MQ_ASYNC_CQ_VALID = BIT(0) }; struct ocrdma_create_mq_req { @@ -783,7 +781,7 @@ enum { OCRDMA_CREATE_QP_REQ_SQ_PAGE_SIZE_SHIFT = 16, OCRDMA_CREATE_QP_REQ_RQ_PAGE_SIZE_SHIFT = 19, OCRDMA_CREATE_QP_REQ_QPT_SHIFT = 29, - OCRDMA_CREATE_QP_REQ_QPT_MASK = Bit(31) | Bit(30) | Bit(29), + OCRDMA_CREATE_QP_REQ_QPT_MASK = BIT(31) | BIT(30) | BIT(29), OCRDMA_CREATE_QP_REQ_MAX_RQE_SHIFT = 0, OCRDMA_CREATE_QP_REQ_MAX_RQE_MASK = 0xFFFF, @@ -798,23 +796,23 @@ enum { OCRDMA_CREATE_QP_REQ_MAX_SGE_SEND_SHIFT, OCRDMA_CREATE_QP_REQ_FMR_EN_SHIFT = 0, - OCRDMA_CREATE_QP_REQ_FMR_EN_MASK = Bit(0), + OCRDMA_CREATE_QP_REQ_FMR_EN_MASK = BIT(0), OCRDMA_CREATE_QP_REQ_ZERO_LKEYEN_SHIFT = 1, - OCRDMA_CREATE_QP_REQ_ZERO_LKEYEN_MASK = Bit(1), + OCRDMA_CREATE_QP_REQ_ZERO_LKEYEN_MASK = BIT(1), OCRDMA_CREATE_QP_REQ_BIND_MEMWIN_SHIFT = 2, - OCRDMA_CREATE_QP_REQ_BIND_MEMWIN_MASK = Bit(2), + OCRDMA_CREATE_QP_REQ_BIND_MEMWIN_MASK = BIT(2), OCRDMA_CREATE_QP_REQ_INB_WREN_SHIFT = 3, - OCRDMA_CREATE_QP_REQ_INB_WREN_MASK = Bit(3), + OCRDMA_CREATE_QP_REQ_INB_WREN_MASK = BIT(3), OCRDMA_CREATE_QP_REQ_INB_RDEN_SHIFT = 4, - OCRDMA_CREATE_QP_REQ_INB_RDEN_MASK = Bit(4), + OCRDMA_CREATE_QP_REQ_INB_RDEN_MASK = BIT(4), OCRDMA_CREATE_QP_REQ_USE_SRQ_SHIFT = 5, - OCRDMA_CREATE_QP_REQ_USE_SRQ_MASK = Bit(5), + OCRDMA_CREATE_QP_REQ_USE_SRQ_MASK = BIT(5), OCRDMA_CREATE_QP_REQ_ENABLE_RPIR_SHIFT = 6, - OCRDMA_CREATE_QP_REQ_ENABLE_RPIR_MASK = Bit(6), + OCRDMA_CREATE_QP_REQ_ENABLE_RPIR_MASK = BIT(6), OCRDMA_CREATE_QP_REQ_ENABLE_DPP_SHIFT = 7, - OCRDMA_CREATE_QP_REQ_ENABLE_DPP_MASK = Bit(7), + OCRDMA_CREATE_QP_REQ_ENABLE_DPP_MASK = BIT(7), OCRDMA_CREATE_QP_REQ_ENABLE_DPP_CQ_SHIFT = 8, - OCRDMA_CREATE_QP_REQ_ENABLE_DPP_CQ_MASK = Bit(8), + OCRDMA_CREATE_QP_REQ_ENABLE_DPP_CQ_MASK = BIT(8), OCRDMA_CREATE_QP_REQ_MAX_SGE_RECV_SHIFT = 16, OCRDMA_CREATE_QP_REQ_MAX_SGE_RECV_MASK = 0xFFFF << OCRDMA_CREATE_QP_REQ_MAX_SGE_RECV_SHIFT, @@ -927,7 +925,7 @@ enum { OCRDMA_CREATE_QP_RSP_SQ_ID_MASK = 0xFFFF << OCRDMA_CREATE_QP_RSP_SQ_ID_SHIFT, - OCRDMA_CREATE_QP_RSP_DPP_ENABLED_MASK = Bit(0), + OCRDMA_CREATE_QP_RSP_DPP_ENABLED_MASK = BIT(0), OCRDMA_CREATE_QP_RSP_DPP_PAGE_OFFSET_SHIFT = 1, OCRDMA_CREATE_QP_RSP_DPP_PAGE_OFFSET_MASK = 0x7FFF << OCRDMA_CREATE_QP_RSP_DPP_PAGE_OFFSET_SHIFT, @@ -964,38 +962,38 @@ enum { OCRDMA_MODIFY_QP_ID_SHIFT = 0, OCRDMA_MODIFY_QP_ID_MASK = 0xFFFF, - OCRDMA_QP_PARA_QPS_VALID = Bit(0), - OCRDMA_QP_PARA_SQD_ASYNC_VALID = Bit(1), - OCRDMA_QP_PARA_PKEY_VALID = Bit(2), - OCRDMA_QP_PARA_QKEY_VALID = Bit(3), - OCRDMA_QP_PARA_PMTU_VALID = Bit(4), - OCRDMA_QP_PARA_ACK_TO_VALID = Bit(5), - OCRDMA_QP_PARA_RETRY_CNT_VALID = Bit(6), - OCRDMA_QP_PARA_RRC_VALID = Bit(7), - OCRDMA_QP_PARA_RQPSN_VALID = Bit(8), - OCRDMA_QP_PARA_MAX_IRD_VALID = Bit(9), - OCRDMA_QP_PARA_MAX_ORD_VALID = Bit(10), - OCRDMA_QP_PARA_RNT_VALID = Bit(11), - OCRDMA_QP_PARA_SQPSN_VALID = Bit(12), - OCRDMA_QP_PARA_DST_QPN_VALID = Bit(13), - OCRDMA_QP_PARA_MAX_WQE_VALID = Bit(14), - OCRDMA_QP_PARA_MAX_RQE_VALID = Bit(15), - OCRDMA_QP_PARA_SGE_SEND_VALID = Bit(16), - OCRDMA_QP_PARA_SGE_RECV_VALID = Bit(17), - OCRDMA_QP_PARA_SGE_WR_VALID = Bit(18), - OCRDMA_QP_PARA_INB_RDEN_VALID = Bit(19), - OCRDMA_QP_PARA_INB_WREN_VALID = Bit(20), - OCRDMA_QP_PARA_FLOW_LBL_VALID = Bit(21), - OCRDMA_QP_PARA_BIND_EN_VALID = Bit(22), - OCRDMA_QP_PARA_ZLKEY_EN_VALID = Bit(23), - OCRDMA_QP_PARA_FMR_EN_VALID = Bit(24), - OCRDMA_QP_PARA_INBAT_EN_VALID = Bit(25), - OCRDMA_QP_PARA_VLAN_EN_VALID = Bit(26), - - OCRDMA_MODIFY_QP_FLAGS_RD = Bit(0), - OCRDMA_MODIFY_QP_FLAGS_WR = Bit(1), - OCRDMA_MODIFY_QP_FLAGS_SEND = Bit(2), - OCRDMA_MODIFY_QP_FLAGS_ATOMIC = Bit(3) + OCRDMA_QP_PARA_QPS_VALID = BIT(0), + OCRDMA_QP_PARA_SQD_ASYNC_VALID = BIT(1), + OCRDMA_QP_PARA_PKEY_VALID = BIT(2), + OCRDMA_QP_PARA_QKEY_VALID = BIT(3), + OCRDMA_QP_PARA_PMTU_VALID = BIT(4), + OCRDMA_QP_PARA_ACK_TO_VALID = BIT(5), + OCRDMA_QP_PARA_RETRY_CNT_VALID = BIT(6), + OCRDMA_QP_PARA_RRC_VALID = BIT(7), + OCRDMA_QP_PARA_RQPSN_VALID = BIT(8), + OCRDMA_QP_PARA_MAX_IRD_VALID = BIT(9), + OCRDMA_QP_PARA_MAX_ORD_VALID = BIT(10), + OCRDMA_QP_PARA_RNT_VALID = BIT(11), + OCRDMA_QP_PARA_SQPSN_VALID = BIT(12), + OCRDMA_QP_PARA_DST_QPN_VALID = BIT(13), + OCRDMA_QP_PARA_MAX_WQE_VALID = BIT(14), + OCRDMA_QP_PARA_MAX_RQE_VALID = BIT(15), + OCRDMA_QP_PARA_SGE_SEND_VALID = BIT(16), + OCRDMA_QP_PARA_SGE_RECV_VALID = BIT(17), + OCRDMA_QP_PARA_SGE_WR_VALID = BIT(18), + OCRDMA_QP_PARA_INB_RDEN_VALID = BIT(19), + OCRDMA_QP_PARA_INB_WREN_VALID = BIT(20), + OCRDMA_QP_PARA_FLOW_LBL_VALID = BIT(21), + OCRDMA_QP_PARA_BIND_EN_VALID = BIT(22), + OCRDMA_QP_PARA_ZLKEY_EN_VALID = BIT(23), + OCRDMA_QP_PARA_FMR_EN_VALID = BIT(24), + OCRDMA_QP_PARA_INBAT_EN_VALID = BIT(25), + OCRDMA_QP_PARA_VLAN_EN_VALID = BIT(26), + + OCRDMA_MODIFY_QP_FLAGS_RD = BIT(0), + OCRDMA_MODIFY_QP_FLAGS_WR = BIT(1), + OCRDMA_MODIFY_QP_FLAGS_SEND = BIT(2), + OCRDMA_MODIFY_QP_FLAGS_ATOMIC = BIT(3) }; enum { @@ -1014,15 +1012,15 @@ enum { OCRDMA_QP_PARAMS_MAX_SGE_SEND_MASK = 0xFFFF << OCRDMA_QP_PARAMS_MAX_SGE_SEND_SHIFT, - OCRDMA_QP_PARAMS_FLAGS_FMR_EN = Bit(0), - OCRDMA_QP_PARAMS_FLAGS_LKEY_0_EN = Bit(1), - OCRDMA_QP_PARAMS_FLAGS_BIND_MW_EN = Bit(2), - OCRDMA_QP_PARAMS_FLAGS_INBWR_EN = Bit(3), - OCRDMA_QP_PARAMS_FLAGS_INBRD_EN = Bit(4), + OCRDMA_QP_PARAMS_FLAGS_FMR_EN = BIT(0), + OCRDMA_QP_PARAMS_FLAGS_LKEY_0_EN = BIT(1), + OCRDMA_QP_PARAMS_FLAGS_BIND_MW_EN = BIT(2), + OCRDMA_QP_PARAMS_FLAGS_INBWR_EN = BIT(3), + OCRDMA_QP_PARAMS_FLAGS_INBRD_EN = BIT(4), OCRDMA_QP_PARAMS_STATE_SHIFT = 5, - OCRDMA_QP_PARAMS_STATE_MASK = Bit(5) | Bit(6) | Bit(7), - OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC = Bit(8), - OCRDMA_QP_PARAMS_FLAGS_INB_ATEN = Bit(9), + OCRDMA_QP_PARAMS_STATE_MASK = BIT(5) | BIT(6) | BIT(7), + OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC = BIT(8), + OCRDMA_QP_PARAMS_FLAGS_INB_ATEN = BIT(9), OCRDMA_QP_PARAMS_MAX_SGE_RECV_SHIFT = 16, OCRDMA_QP_PARAMS_MAX_SGE_RECV_MASK = 0xFFFF << OCRDMA_QP_PARAMS_MAX_SGE_RECV_SHIFT, @@ -1277,7 +1275,7 @@ struct ocrdma_alloc_pd { }; enum { - OCRDMA_ALLOC_PD_RSP_DPP = Bit(16), + OCRDMA_ALLOC_PD_RSP_DPP = BIT(16), OCRDMA_ALLOC_PD_RSP_DPP_PAGE_SHIFT = 20, OCRDMA_ALLOC_PD_RSP_PDID_MASK = 0xFFFF, }; @@ -1309,18 +1307,18 @@ enum { OCRDMA_ALLOC_LKEY_PD_ID_MASK = 0xFFFF, OCRDMA_ALLOC_LKEY_ADDR_CHECK_SHIFT = 0, - OCRDMA_ALLOC_LKEY_ADDR_CHECK_MASK = Bit(0), + OCRDMA_ALLOC_LKEY_ADDR_CHECK_MASK = BIT(0), OCRDMA_ALLOC_LKEY_FMR_SHIFT = 1, - OCRDMA_ALLOC_LKEY_FMR_MASK = Bit(1), + OCRDMA_ALLOC_LKEY_FMR_MASK = BIT(1), OCRDMA_ALLOC_LKEY_REMOTE_INV_SHIFT = 2, - OCRDMA_ALLOC_LKEY_REMOTE_INV_MASK = Bit(2), + OCRDMA_ALLOC_LKEY_REMOTE_INV_MASK = BIT(2), OCRDMA_ALLOC_LKEY_REMOTE_WR_SHIFT = 3, - OCRDMA_ALLOC_LKEY_REMOTE_WR_MASK = Bit(3), + OCRDMA_ALLOC_LKEY_REMOTE_WR_MASK = BIT(3), OCRDMA_ALLOC_LKEY_REMOTE_RD_SHIFT = 4, - OCRDMA_ALLOC_LKEY_REMOTE_RD_MASK = Bit(4), + OCRDMA_ALLOC_LKEY_REMOTE_RD_MASK = BIT(4), OCRDMA_ALLOC_LKEY_LOCAL_WR_SHIFT = 5, - OCRDMA_ALLOC_LKEY_LOCAL_WR_MASK = Bit(5), - OCRDMA_ALLOC_LKEY_REMOTE_ATOMIC_MASK = Bit(6), + OCRDMA_ALLOC_LKEY_LOCAL_WR_MASK = BIT(5), + OCRDMA_ALLOC_LKEY_REMOTE_ATOMIC_MASK = BIT(6), OCRDMA_ALLOC_LKEY_REMOTE_ATOMIC_SHIFT = 6, OCRDMA_ALLOC_LKEY_PBL_SIZE_SHIFT = 16, OCRDMA_ALLOC_LKEY_PBL_SIZE_MASK = 0xFFFF << @@ -1379,21 +1377,21 @@ enum { OCRDMA_REG_NSMR_HPAGE_SIZE_MASK = 0xFF << OCRDMA_REG_NSMR_HPAGE_SIZE_SHIFT, OCRDMA_REG_NSMR_BIND_MEMWIN_SHIFT = 24, - OCRDMA_REG_NSMR_BIND_MEMWIN_MASK = Bit(24), + OCRDMA_REG_NSMR_BIND_MEMWIN_MASK = BIT(24), OCRDMA_REG_NSMR_ZB_SHIFT = 25, - OCRDMA_REG_NSMR_ZB_SHIFT_MASK = Bit(25), + OCRDMA_REG_NSMR_ZB_SHIFT_MASK = BIT(25), OCRDMA_REG_NSMR_REMOTE_INV_SHIFT = 26, - OCRDMA_REG_NSMR_REMOTE_INV_MASK = Bit(26), + OCRDMA_REG_NSMR_REMOTE_INV_MASK = BIT(26), OCRDMA_REG_NSMR_REMOTE_WR_SHIFT = 27, - OCRDMA_REG_NSMR_REMOTE_WR_MASK = Bit(27), + OCRDMA_REG_NSMR_REMOTE_WR_MASK = BIT(27), OCRDMA_REG_NSMR_REMOTE_RD_SHIFT = 28, - OCRDMA_REG_NSMR_REMOTE_RD_MASK = Bit(28), + OCRDMA_REG_NSMR_REMOTE_RD_MASK = BIT(28), OCRDMA_REG_NSMR_LOCAL_WR_SHIFT = 29, - OCRDMA_REG_NSMR_LOCAL_WR_MASK = Bit(29), + OCRDMA_REG_NSMR_LOCAL_WR_MASK = BIT(29), OCRDMA_REG_NSMR_REMOTE_ATOMIC_SHIFT = 30, - OCRDMA_REG_NSMR_REMOTE_ATOMIC_MASK = Bit(30), + OCRDMA_REG_NSMR_REMOTE_ATOMIC_MASK = BIT(30), OCRDMA_REG_NSMR_LAST_SHIFT = 31, - OCRDMA_REG_NSMR_LAST_MASK = Bit(31) + OCRDMA_REG_NSMR_LAST_MASK = BIT(31) }; struct ocrdma_reg_nsmr { @@ -1420,7 +1418,7 @@ enum { OCRDMA_REG_NSMR_CONT_NUM_PBL_SHIFT, OCRDMA_REG_NSMR_CONT_LAST_SHIFT = 31, - OCRDMA_REG_NSMR_CONT_LAST_MASK = Bit(31) + OCRDMA_REG_NSMR_CONT_LAST_MASK = BIT(31) }; struct ocrdma_reg_nsmr_cont { @@ -1566,7 +1564,7 @@ struct ocrdma_delete_ah_tbl_rsp { enum { OCRDMA_EQE_VALID_SHIFT = 0, - OCRDMA_EQE_VALID_MASK = Bit(0), + OCRDMA_EQE_VALID_MASK = BIT(0), OCRDMA_EQE_FOR_CQE_MASK = 0xFFFE, OCRDMA_EQE_RESOURCE_ID_SHIFT = 16, OCRDMA_EQE_RESOURCE_ID_MASK = 0xFFFF << @@ -1624,11 +1622,11 @@ enum { OCRDMA_CQE_UD_STATUS_MASK = 0x7 << OCRDMA_CQE_UD_STATUS_SHIFT, OCRDMA_CQE_STATUS_SHIFT = 16, OCRDMA_CQE_STATUS_MASK = 0xFF << OCRDMA_CQE_STATUS_SHIFT, - OCRDMA_CQE_VALID = Bit(31), - OCRDMA_CQE_INVALIDATE = Bit(30), - OCRDMA_CQE_QTYPE = Bit(29), - OCRDMA_CQE_IMM = Bit(28), - OCRDMA_CQE_WRITE_IMM = Bit(27), + OCRDMA_CQE_VALID = BIT(31), + OCRDMA_CQE_INVALIDATE = BIT(30), + OCRDMA_CQE_QTYPE = BIT(29), + OCRDMA_CQE_IMM = BIT(28), + OCRDMA_CQE_WRITE_IMM = BIT(27), OCRDMA_CQE_QTYPE_SQ = 0, OCRDMA_CQE_QTYPE_RQ = 1, OCRDMA_CQE_SRCQP_MASK = 0xFFFFFF @@ -1772,8 +1770,8 @@ struct ocrdma_grh { u16 rsvd; } __packed; -#define OCRDMA_AV_VALID Bit(7) -#define OCRDMA_AV_VLAN_VALID Bit(1) +#define OCRDMA_AV_VALID BIT(7) +#define OCRDMA_AV_VLAN_VALID BIT(1) struct ocrdma_av { struct ocrdma_eth_vlan eth_hdr; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 8f5f2577f288..fb8d8c4dfbb9 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -388,7 +388,7 @@ struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev, memset(&resp, 0, sizeof(resp)); resp.ah_tbl_len = ctx->ah_tbl.len; - resp.ah_tbl_page = ctx->ah_tbl.pa; + resp.ah_tbl_page = virt_to_phys(ctx->ah_tbl.va); status = ocrdma_add_mmap(ctx, resp.ah_tbl_page, resp.ah_tbl_len); if (status) @@ -805,7 +805,7 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, goto umem_err; mr->hwmr.pbe_size = mr->umem->page_size; - mr->hwmr.fbo = mr->umem->offset; + mr->hwmr.fbo = ib_umem_offset(mr->umem); mr->hwmr.va = usr_addr; mr->hwmr.len = len; mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0; @@ -870,7 +870,7 @@ static int ocrdma_copy_cq_uresp(struct ocrdma_dev *dev, struct ocrdma_cq *cq, uresp.page_size = PAGE_ALIGN(cq->len); uresp.num_pages = 1; uresp.max_hw_cqe = cq->max_hw_cqe; - uresp.page_addr[0] = cq->pa; + uresp.page_addr[0] = virt_to_phys(cq->va); uresp.db_page_addr = ocrdma_get_db_addr(dev, uctx->cntxt_pd->id); uresp.db_page_size = dev->nic_info.db_page_size; uresp.phase_change = cq->phase_change ? 1 : 0; @@ -1123,13 +1123,13 @@ static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp, uresp.sq_dbid = qp->sq.dbid; uresp.num_sq_pages = 1; uresp.sq_page_size = PAGE_ALIGN(qp->sq.len); - uresp.sq_page_addr[0] = qp->sq.pa; + uresp.sq_page_addr[0] = virt_to_phys(qp->sq.va); uresp.num_wqe_allocated = qp->sq.max_cnt; if (!srq) { uresp.rq_dbid = qp->rq.dbid; uresp.num_rq_pages = 1; uresp.rq_page_size = PAGE_ALIGN(qp->rq.len); - uresp.rq_page_addr[0] = qp->rq.pa; + uresp.rq_page_addr[0] = virt_to_phys(qp->rq.va); uresp.num_rqe_allocated = qp->rq.max_cnt; } uresp.db_page_addr = usr_db; @@ -1410,6 +1410,8 @@ int ocrdma_query_qp(struct ib_qp *ibqp, mutex_unlock(&dev->dev_lock); if (status) goto mbx_err; + if (qp->qp_type == IB_QPT_UD) + qp_attr->qkey = params.qkey; qp_attr->qp_state = get_ibqp_state(IB_QPS_INIT); qp_attr->cur_qp_state = get_ibqp_state(IB_QPS_INIT); qp_attr->path_mtu = @@ -1680,7 +1682,7 @@ static int ocrdma_copy_srq_uresp(struct ocrdma_dev *dev, struct ocrdma_srq *srq, memset(&uresp, 0, sizeof(uresp)); uresp.rq_dbid = srq->rq.dbid; uresp.num_rq_pages = 1; - uresp.rq_page_addr[0] = srq->rq.pa; + uresp.rq_page_addr[0] = virt_to_phys(srq->rq.va); uresp.rq_page_size = srq->rq.len; uresp.db_page_addr = dev->nic_info.unmapped_db + (srq->pd->id * dev->nic_info.db_page_size); @@ -1870,7 +1872,7 @@ static int ocrdma_build_inline_sges(struct ocrdma_qp *qp, hdr->total_len = ocrdma_sglist_len(wr->sg_list, wr->num_sge); if (unlikely(hdr->total_len > qp->max_inline_data)) { pr_err("%s() supported_len=0x%x,\n" - " unspported len req=0x%x\n", __func__, + " unsupported len req=0x%x\n", __func__, qp->max_inline_data, hdr->total_len); return -EINVAL; } diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index cab610ccd50e..81854586c081 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -89,7 +89,6 @@ static int create_file(const char *name, umode_t mode, { int error; - *dentry = NULL; mutex_lock(&parent->d_inode->i_mutex); *dentry = lookup_one_len(name, parent, strlen(name)); if (!IS_ERR(*dentry)) diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c index 9bbb55347cc1..a77fb4fb14e4 100644 --- a/drivers/infiniband/hw/qib/qib_mr.c +++ b/drivers/infiniband/hw/qib/qib_mr.c @@ -258,7 +258,7 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->mr.user_base = start; mr->mr.iova = virt_addr; mr->mr.length = length; - mr->mr.offset = umem->offset; + mr->mr.offset = ib_umem_offset(umem); mr->mr.access_flags = mr_access_flags; mr->umem = umem; diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c index f8dfd76be89f..db3588df3546 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c @@ -511,7 +511,7 @@ int usnic_ib_qp_grp_modify(struct usnic_ib_qp_grp *qp_grp, usnic_ib_qp_grp_state_to_string(old_state), usnic_ib_qp_grp_state_to_string(new_state)); } else { - usnic_err("Failed to transistion %u from %s to %s", + usnic_err("Failed to transition %u from %s to %s", qp_grp->grp_id, usnic_ib_qp_grp_state_to_string(old_state), usnic_ib_qp_grp_state_to_string(new_state)); diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index 801a1d6937e4..417de1f32960 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -507,7 +507,7 @@ int usnic_uiom_attach_dev_to_pd(struct usnic_uiom_pd *pd, struct device *dev) if (err) goto out_free_dev; - if (!iommu_domain_has_cap(pd->domain, IOMMU_CAP_CACHE_COHERENCY)) { + if (!iommu_capable(dev->bus, IOMMU_CAP_CACHE_COHERENCY)) { usnic_err("IOMMU of %s does not support cache coherency\n", dev_name(dev)); err = -EINVAL; |