summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r--drivers/infiniband/hw/amso1100/c2_provider.c2
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c213
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c8
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c4
-rw-r--r--drivers/infiniband/hw/cxgb4/mem.c62
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c8
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c28
-rw-r--r--drivers/infiniband/hw/ehca/ehca_mrmw.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_fs.c1
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mr.c2
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c5
-rw-r--r--drivers/infiniband/hw/mlx4/main.c15
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c1
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c13
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile1
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c16
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c2
-rw-r--r--drivers/infiniband/hw/mlx5/main.c136
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c87
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h116
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c336
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c798
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c438
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c12
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c6
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.c5
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c25
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c12
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_sli.h238
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c16
-rw-r--r--drivers/infiniband/hw/qib/qib_fs.c1
-rw-r--r--drivers/infiniband/hw/qib/qib_mr.c2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.c2
34 files changed, 2021 insertions, 594 deletions
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c
index 2d5cbf4363e4..bdf3507810cb 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.c
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c
@@ -476,7 +476,7 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
c2mr->umem->page_size,
i,
length,
- c2mr->umem->offset,
+ ib_umem_offset(c2mr->umem),
&kva,
c2_convert_access(acc),
c2mr);
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index c2fb71c182a8..9edc200b311d 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -236,10 +236,12 @@ static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb)
static void set_emss(struct c4iw_ep *ep, u16 opt)
{
ep->emss = ep->com.dev->rdev.lldi.mtus[GET_TCPOPT_MSS(opt)] -
- sizeof(struct iphdr) - sizeof(struct tcphdr);
+ ((AF_INET == ep->com.remote_addr.ss_family) ?
+ sizeof(struct iphdr) : sizeof(struct ipv6hdr)) -
+ sizeof(struct tcphdr);
ep->mss = ep->emss;
if (GET_TCPOPT_TSTAMP(opt))
- ep->emss -= 12;
+ ep->emss -= round_up(TCPOLEN_TIMESTAMP, 4);
if (ep->emss < 128)
ep->emss = 128;
if (ep->emss & 7)
@@ -415,6 +417,7 @@ static struct dst_entry *find_route(struct c4iw_dev *dev, __be32 local_ip,
return NULL;
if (!our_interface(dev, n->dev) &&
!(n->dev->flags & IFF_LOOPBACK)) {
+ neigh_release(n);
dst_release(&rt->dst);
return NULL;
}
@@ -469,13 +472,13 @@ static void send_flowc(struct c4iw_ep *ep, struct sk_buff *skb)
skb = get_skb(skb, flowclen, GFP_KERNEL);
flowc = (struct fw_flowc_wr *)__skb_put(skb, flowclen);
- flowc->op_to_nparams = cpu_to_be32(FW_WR_OP(FW_FLOWC_WR) |
- FW_FLOWC_WR_NPARAMS(8));
- flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16(DIV_ROUND_UP(flowclen,
- 16)) | FW_WR_FLOWID(ep->hwtid));
+ flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) |
+ FW_FLOWC_WR_NPARAMS_V(8));
+ flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(flowclen,
+ 16)) | FW_WR_FLOWID_V(ep->hwtid));
flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
- flowc->mnemval[0].val = cpu_to_be32(FW_PFVF_CMD_PFN
+ flowc->mnemval[0].val = cpu_to_be32(FW_PFVF_CMD_PFN_V
(ep->com.dev->rdev.lldi.pf));
flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH;
flowc->mnemval[1].val = cpu_to_be32(ep->tx_chan);
@@ -581,11 +584,14 @@ static void c4iw_record_pm_msg(struct c4iw_ep *ep,
}
static void best_mtu(const unsigned short *mtus, unsigned short mtu,
- unsigned int *idx, int use_ts)
+ unsigned int *idx, int use_ts, int ipv6)
{
- unsigned short hdr_size = sizeof(struct iphdr) +
+ unsigned short hdr_size = (ipv6 ?
+ sizeof(struct ipv6hdr) :
+ sizeof(struct iphdr)) +
sizeof(struct tcphdr) +
- (use_ts ? 12 : 0);
+ (use_ts ?
+ round_up(TCPOLEN_TIMESTAMP, 4) : 0);
unsigned short data_size = mtu - hdr_size;
cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx);
@@ -634,7 +640,8 @@ static int send_connect(struct c4iw_ep *ep)
set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
- enable_tcp_timestamps);
+ enable_tcp_timestamps,
+ (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1);
wscale = compute_wscale(rcv_win);
/*
@@ -642,32 +649,33 @@ static int send_connect(struct c4iw_ep *ep)
* remainder will be specified in the rx_data_ack.
*/
win = ep->rcv_win >> 10;
- if (win > RCV_BUFSIZ_MASK)
- win = RCV_BUFSIZ_MASK;
+ if (win > RCV_BUFSIZ_M)
+ win = RCV_BUFSIZ_M;
opt0 = (nocong ? NO_CONG(1) : 0) |
- KEEP_ALIVE(1) |
+ KEEP_ALIVE_F |
DELACK(1) |
- WND_SCALE(wscale) |
- MSS_IDX(mtu_idx) |
- L2T_IDX(ep->l2t->idx) |
- TX_CHAN(ep->tx_chan) |
- SMAC_SEL(ep->smac_idx) |
+ WND_SCALE_V(wscale) |
+ MSS_IDX_V(mtu_idx) |
+ L2T_IDX_V(ep->l2t->idx) |
+ TX_CHAN_V(ep->tx_chan) |
+ SMAC_SEL_V(ep->smac_idx) |
DSCP(ep->tos) |
- ULP_MODE(ULP_MODE_TCPDDP) |
- RCV_BUFSIZ(win);
- opt2 = RX_CHANNEL(0) |
+ ULP_MODE_V(ULP_MODE_TCPDDP) |
+ RCV_BUFSIZ_V(win);
+ opt2 = RX_CHANNEL_V(0) |
CCTRL_ECN(enable_ecn) |
- RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid);
+ RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid);
if (enable_tcp_timestamps)
opt2 |= TSTAMPS_EN(1);
if (enable_tcp_sack)
opt2 |= SACK_EN(1);
if (wscale && enable_tcp_window_scaling)
- opt2 |= WND_SCALE_EN(1);
+ opt2 |= WND_SCALE_EN_F;
if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
- opt2 |= T5_OPT_2_VALID;
+ opt2 |= T5_OPT_2_VALID_F;
opt2 |= V_CONG_CNTRL(CONG_ALG_TAHOE);
+ opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */
}
t4_set_arp_err_handler(skb, ep, act_open_req_arp_failure);
@@ -713,8 +721,6 @@ static int send_connect(struct c4iw_ep *ep)
} else {
u32 isn = (prandom_u32() & ~7UL) - 1;
- opt2 |= T5_OPT_2_VALID;
- opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */
if (peer2peer)
isn += 4;
@@ -730,7 +736,7 @@ static int send_connect(struct c4iw_ep *ep)
t5_req->local_ip = la->sin_addr.s_addr;
t5_req->peer_ip = ra->sin_addr.s_addr;
t5_req->opt0 = cpu_to_be64(opt0);
- t5_req->params = cpu_to_be64(V_FILTER_TUPLE(
+ t5_req->params = cpu_to_be64(FILTER_TUPLE_V(
cxgb4_select_ntuple(
ep->com.dev->rdev.lldi.ports[0],
ep->l2t)));
@@ -756,10 +762,10 @@ static int send_connect(struct c4iw_ep *ep)
t5_req6->peer_ip_lo = *((__be64 *)
(ra6->sin6_addr.s6_addr + 8));
t5_req6->opt0 = cpu_to_be64(opt0);
- t5_req6->params = (__force __be64)cpu_to_be32(
+ t5_req6->params = cpu_to_be64(FILTER_TUPLE_V(
cxgb4_select_ntuple(
ep->com.dev->rdev.lldi.ports[0],
- ep->l2t));
+ ep->l2t)));
t5_req6->rsvd = cpu_to_be32(isn);
PDBG("%s snd_isn %u\n", __func__,
be32_to_cpu(t5_req6->rsvd));
@@ -797,16 +803,16 @@ static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
req = (struct fw_ofld_tx_data_wr *)skb_put(skb, wrlen);
memset(req, 0, wrlen);
req->op_to_immdlen = cpu_to_be32(
- FW_WR_OP(FW_OFLD_TX_DATA_WR) |
- FW_WR_COMPL(1) |
- FW_WR_IMMDLEN(mpalen));
+ FW_WR_OP_V(FW_OFLD_TX_DATA_WR) |
+ FW_WR_COMPL_F |
+ FW_WR_IMMDLEN_V(mpalen));
req->flowid_len16 = cpu_to_be32(
- FW_WR_FLOWID(ep->hwtid) |
- FW_WR_LEN16(wrlen >> 4));
+ FW_WR_FLOWID_V(ep->hwtid) |
+ FW_WR_LEN16_V(wrlen >> 4));
req->plen = cpu_to_be32(mpalen);
req->tunnel_to_proxy = cpu_to_be32(
- FW_OFLD_TX_DATA_WR_FLUSH(1) |
- FW_OFLD_TX_DATA_WR_SHOVE(1));
+ FW_OFLD_TX_DATA_WR_FLUSH_F |
+ FW_OFLD_TX_DATA_WR_SHOVE_F);
mpa = (struct mpa_message *)(req + 1);
memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
@@ -891,16 +897,16 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
req = (struct fw_ofld_tx_data_wr *)skb_put(skb, wrlen);
memset(req, 0, wrlen);
req->op_to_immdlen = cpu_to_be32(
- FW_WR_OP(FW_OFLD_TX_DATA_WR) |
- FW_WR_COMPL(1) |
- FW_WR_IMMDLEN(mpalen));
+ FW_WR_OP_V(FW_OFLD_TX_DATA_WR) |
+ FW_WR_COMPL_F |
+ FW_WR_IMMDLEN_V(mpalen));
req->flowid_len16 = cpu_to_be32(
- FW_WR_FLOWID(ep->hwtid) |
- FW_WR_LEN16(wrlen >> 4));
+ FW_WR_FLOWID_V(ep->hwtid) |
+ FW_WR_LEN16_V(wrlen >> 4));
req->plen = cpu_to_be32(mpalen);
req->tunnel_to_proxy = cpu_to_be32(
- FW_OFLD_TX_DATA_WR_FLUSH(1) |
- FW_OFLD_TX_DATA_WR_SHOVE(1));
+ FW_OFLD_TX_DATA_WR_FLUSH_F |
+ FW_OFLD_TX_DATA_WR_SHOVE_F);
mpa = (struct mpa_message *)(req + 1);
memset(mpa, 0, sizeof(*mpa));
@@ -971,16 +977,16 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
req = (struct fw_ofld_tx_data_wr *) skb_put(skb, wrlen);
memset(req, 0, wrlen);
req->op_to_immdlen = cpu_to_be32(
- FW_WR_OP(FW_OFLD_TX_DATA_WR) |
- FW_WR_COMPL(1) |
- FW_WR_IMMDLEN(mpalen));
+ FW_WR_OP_V(FW_OFLD_TX_DATA_WR) |
+ FW_WR_COMPL_F |
+ FW_WR_IMMDLEN_V(mpalen));
req->flowid_len16 = cpu_to_be32(
- FW_WR_FLOWID(ep->hwtid) |
- FW_WR_LEN16(wrlen >> 4));
+ FW_WR_FLOWID_V(ep->hwtid) |
+ FW_WR_LEN16_V(wrlen >> 4));
req->plen = cpu_to_be32(mpalen);
req->tunnel_to_proxy = cpu_to_be32(
- FW_OFLD_TX_DATA_WR_FLUSH(1) |
- FW_OFLD_TX_DATA_WR_SHOVE(1));
+ FW_OFLD_TX_DATA_WR_FLUSH_F |
+ FW_OFLD_TX_DATA_WR_SHOVE_F);
mpa = (struct mpa_message *)(req + 1);
memset(mpa, 0, sizeof(*mpa));
@@ -1243,15 +1249,15 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
* due to the limit in the number of bits in the RCV_BUFSIZ field,
* then add the overage in to the credits returned.
*/
- if (ep->rcv_win > RCV_BUFSIZ_MASK * 1024)
- credits += ep->rcv_win - RCV_BUFSIZ_MASK * 1024;
+ if (ep->rcv_win > RCV_BUFSIZ_M * 1024)
+ credits += ep->rcv_win - RCV_BUFSIZ_M * 1024;
req = (struct cpl_rx_data_ack *) skb_put(skb, wrlen);
memset(req, 0, wrlen);
INIT_TP_WR(req, ep->hwtid);
OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK,
ep->hwtid));
- req->credit_dack = cpu_to_be32(credits | RX_FORCE_ACK(1) |
+ req->credit_dack = cpu_to_be32(credits | RX_FORCE_ACK_F |
F_RX_DACK_CHANGE |
V_RX_DACK_MODE(dack_mode));
set_wr_txq(skb, CPL_PRIORITY_ACK, ep->ctrlq_idx);
@@ -1634,7 +1640,8 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
__state_set(&ep->com, MPA_REQ_RCVD);
/* drive upcall */
- mutex_lock(&ep->parent_ep->com.mutex);
+ mutex_lock_nested(&ep->parent_ep->com.mutex,
+ SINGLE_DEPTH_NESTING);
if (ep->parent_ep->com.state != DEAD) {
if (connect_request_upcall(ep))
abort_connection(ep, skb, GFP_KERNEL);
@@ -1745,7 +1752,7 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
req = (struct fw_ofld_connection_wr *)__skb_put(skb, sizeof(*req));
memset(req, 0, sizeof(*req));
req->op_compl = htonl(V_WR_OP(FW_OFLD_CONNECTION_WR));
- req->len16_pkd = htonl(FW_WR_LEN16(DIV_ROUND_UP(sizeof(*req), 16)));
+ req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16)));
req->le.filter = cpu_to_be32(cxgb4_select_ntuple(
ep->com.dev->rdev.lldi.ports[0],
ep->l2t));
@@ -1756,14 +1763,15 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
req->le.pport = sin->sin_port;
req->le.u.ipv4.pip = sin->sin_addr.s_addr;
req->tcb.t_state_to_astid =
- htonl(V_FW_OFLD_CONNECTION_WR_T_STATE(TCP_SYN_SENT) |
- V_FW_OFLD_CONNECTION_WR_ASTID(atid));
+ htonl(FW_OFLD_CONNECTION_WR_T_STATE_V(TCP_SYN_SENT) |
+ FW_OFLD_CONNECTION_WR_ASTID_V(atid));
req->tcb.cplrxdataack_cplpassacceptrpl =
- htons(F_FW_OFLD_CONNECTION_WR_CPLRXDATAACK);
+ htons(FW_OFLD_CONNECTION_WR_CPLRXDATAACK_F);
req->tcb.tx_max = (__force __be32) jiffies;
req->tcb.rcv_adv = htons(1);
best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
- enable_tcp_timestamps);
+ enable_tcp_timestamps,
+ (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1);
wscale = compute_wscale(rcv_win);
/*
@@ -1771,34 +1779,34 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
* remainder will be specified in the rx_data_ack.
*/
win = ep->rcv_win >> 10;
- if (win > RCV_BUFSIZ_MASK)
- win = RCV_BUFSIZ_MASK;
+ if (win > RCV_BUFSIZ_M)
+ win = RCV_BUFSIZ_M;
req->tcb.opt0 = (__force __be64) (TCAM_BYPASS(1) |
(nocong ? NO_CONG(1) : 0) |
- KEEP_ALIVE(1) |
+ KEEP_ALIVE_F |
DELACK(1) |
- WND_SCALE(wscale) |
- MSS_IDX(mtu_idx) |
- L2T_IDX(ep->l2t->idx) |
- TX_CHAN(ep->tx_chan) |
- SMAC_SEL(ep->smac_idx) |
+ WND_SCALE_V(wscale) |
+ MSS_IDX_V(mtu_idx) |
+ L2T_IDX_V(ep->l2t->idx) |
+ TX_CHAN_V(ep->tx_chan) |
+ SMAC_SEL_V(ep->smac_idx) |
DSCP(ep->tos) |
- ULP_MODE(ULP_MODE_TCPDDP) |
- RCV_BUFSIZ(win));
+ ULP_MODE_V(ULP_MODE_TCPDDP) |
+ RCV_BUFSIZ_V(win));
req->tcb.opt2 = (__force __be32) (PACE(1) |
TX_QUEUE(ep->com.dev->rdev.lldi.tx_modq[ep->tx_chan]) |
- RX_CHANNEL(0) |
+ RX_CHANNEL_V(0) |
CCTRL_ECN(enable_ecn) |
- RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid));
+ RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid));
if (enable_tcp_timestamps)
- req->tcb.opt2 |= (__force __be32) TSTAMPS_EN(1);
+ req->tcb.opt2 |= (__force __be32)TSTAMPS_EN(1);
if (enable_tcp_sack)
- req->tcb.opt2 |= (__force __be32) SACK_EN(1);
+ req->tcb.opt2 |= (__force __be32)SACK_EN(1);
if (wscale && enable_tcp_window_scaling)
- req->tcb.opt2 |= (__force __be32) WND_SCALE_EN(1);
- req->tcb.opt0 = cpu_to_be64((__force u64) req->tcb.opt0);
- req->tcb.opt2 = cpu_to_be32((__force u32) req->tcb.opt2);
+ req->tcb.opt2 |= (__force __be32)WND_SCALE_EN_F;
+ req->tcb.opt0 = cpu_to_be64((__force u64)req->tcb.opt0);
+ req->tcb.opt2 = cpu_to_be32((__force u32)req->tcb.opt2);
set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx);
set_bit(ACT_OFLD_CONN, &ep->com.history);
c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
@@ -2162,7 +2170,8 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
ep->hwtid));
best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
- enable_tcp_timestamps && req->tcpopt.tstamp);
+ enable_tcp_timestamps && req->tcpopt.tstamp,
+ (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1);
wscale = compute_wscale(rcv_win);
/*
@@ -2170,28 +2179,28 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
* remainder will be specified in the rx_data_ack.
*/
win = ep->rcv_win >> 10;
- if (win > RCV_BUFSIZ_MASK)
- win = RCV_BUFSIZ_MASK;
+ if (win > RCV_BUFSIZ_M)
+ win = RCV_BUFSIZ_M;
opt0 = (nocong ? NO_CONG(1) : 0) |
- KEEP_ALIVE(1) |
+ KEEP_ALIVE_F |
DELACK(1) |
- WND_SCALE(wscale) |
- MSS_IDX(mtu_idx) |
- L2T_IDX(ep->l2t->idx) |
- TX_CHAN(ep->tx_chan) |
- SMAC_SEL(ep->smac_idx) |
+ WND_SCALE_V(wscale) |
+ MSS_IDX_V(mtu_idx) |
+ L2T_IDX_V(ep->l2t->idx) |
+ TX_CHAN_V(ep->tx_chan) |
+ SMAC_SEL_V(ep->smac_idx) |
DSCP(ep->tos >> 2) |
- ULP_MODE(ULP_MODE_TCPDDP) |
- RCV_BUFSIZ(win);
- opt2 = RX_CHANNEL(0) |
- RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid);
+ ULP_MODE_V(ULP_MODE_TCPDDP) |
+ RCV_BUFSIZ_V(win);
+ opt2 = RX_CHANNEL_V(0) |
+ RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid);
if (enable_tcp_timestamps && req->tcpopt.tstamp)
opt2 |= TSTAMPS_EN(1);
if (enable_tcp_sack && req->tcpopt.sack)
opt2 |= SACK_EN(1);
if (wscale && enable_tcp_window_scaling)
- opt2 |= WND_SCALE_EN(1);
+ opt2 |= WND_SCALE_EN_F;
if (enable_ecn) {
const struct tcphdr *tcph;
u32 hlen = ntohl(req->hdr_len);
@@ -2203,7 +2212,7 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
}
if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
u32 isn = (prandom_u32() & ~7UL) - 1;
- opt2 |= T5_OPT_2_VALID;
+ opt2 |= T5_OPT_2_VALID_F;
opt2 |= V_CONG_CNTRL(CONG_ALG_TAHOE);
opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */
rpl5 = (void *)rpl;
@@ -3118,6 +3127,8 @@ static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
err = c4iw_wait_for_reply(&ep->com.dev->rdev,
&ep->com.wr_wait,
0, 0, __func__);
+ else if (err > 0)
+ err = net_xmit_errno(err);
if (err)
pr_err("cxgb4_create_server6/filter failed err %d stid %d laddr %pI6 lport %d\n",
err, ep->stid,
@@ -3151,6 +3162,8 @@ static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
err = c4iw_wait_for_reply(&ep->com.dev->rdev,
&ep->com.wr_wait,
0, 0, __func__);
+ else if (err > 0)
+ err = net_xmit_errno(err);
}
if (err)
pr_err("cxgb4_create_server/filter failed err %d stid %d laddr %pI4 lport %d\n"
@@ -3529,9 +3542,9 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb,
req_skb = alloc_skb(sizeof(struct fw_ofld_connection_wr), GFP_KERNEL);
req = (struct fw_ofld_connection_wr *)__skb_put(req_skb, sizeof(*req));
memset(req, 0, sizeof(*req));
- req->op_compl = htonl(V_WR_OP(FW_OFLD_CONNECTION_WR) | FW_WR_COMPL(1));
- req->len16_pkd = htonl(FW_WR_LEN16(DIV_ROUND_UP(sizeof(*req), 16)));
- req->le.version_cpl = htonl(F_FW_OFLD_CONNECTION_WR_CPL);
+ req->op_compl = htonl(V_WR_OP(FW_OFLD_CONNECTION_WR) | FW_WR_COMPL_F);
+ req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16)));
+ req->le.version_cpl = htonl(FW_OFLD_CONNECTION_WR_CPL_F);
req->le.filter = (__force __be32) filter;
req->le.lport = lport;
req->le.pport = rport;
@@ -3540,16 +3553,16 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb,
req->tcb.rcv_nxt = htonl(rcv_isn + 1);
req->tcb.rcv_adv = htons(window);
req->tcb.t_state_to_astid =
- htonl(V_FW_OFLD_CONNECTION_WR_T_STATE(TCP_SYN_RECV) |
- V_FW_OFLD_CONNECTION_WR_RCV_SCALE(cpl->tcpopt.wsf) |
- V_FW_OFLD_CONNECTION_WR_ASTID(
+ htonl(FW_OFLD_CONNECTION_WR_T_STATE_V(TCP_SYN_RECV) |
+ FW_OFLD_CONNECTION_WR_RCV_SCALE_V(cpl->tcpopt.wsf) |
+ FW_OFLD_CONNECTION_WR_ASTID_V(
GET_PASS_OPEN_TID(ntohl(cpl->tos_stid))));
/*
* We store the qid in opt2 which will be used by the firmware
* to send us the wr response.
*/
- req->tcb.opt2 = htonl(V_RSS_QUEUE(rss_qid));
+ req->tcb.opt2 = htonl(RSS_QUEUE_V(rss_qid));
/*
* We initialize the MSS index in TCB to 0xF.
@@ -3557,7 +3570,7 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb,
* TCB picks up the correct value. If this was 0
* TP will ignore any value > 0 for MSS index.
*/
- req->tcb.opt0 = cpu_to_be64(V_MSS_IDX(0xF));
+ req->tcb.opt0 = cpu_to_be64(MSS_IDX_V(0xF));
req->cookie = (unsigned long)skb;
set_wr_txq(req_skb, CPL_PRIORITY_CONTROL, port_id);
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 0f773e78e080..e9fd3a029296 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -51,9 +51,9 @@ static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
memset(res_wr, 0, wr_len);
res_wr->op_nres = cpu_to_be32(
- FW_WR_OP(FW_RI_RES_WR) |
+ FW_WR_OP_V(FW_RI_RES_WR) |
V_FW_RI_RES_WR_NRES(1) |
- FW_WR_COMPL(1));
+ FW_WR_COMPL_F);
res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
res_wr->cookie = (unsigned long) &wr_wait;
res = res_wr->res;
@@ -121,9 +121,9 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
memset(res_wr, 0, wr_len);
res_wr->op_nres = cpu_to_be32(
- FW_WR_OP(FW_RI_RES_WR) |
+ FW_WR_OP_V(FW_RI_RES_WR) |
V_FW_RI_RES_WR_NRES(1) |
- FW_WR_COMPL(1));
+ FW_WR_COMPL_F);
res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
res_wr->cookie = (unsigned long) &wr_wait;
res = res_wr->res;
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index f25df5276c22..eb5df4e62703 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -60,7 +60,7 @@ int c4iw_wr_log = 0;
module_param(c4iw_wr_log, int, 0444);
MODULE_PARM_DESC(c4iw_wr_log, "Enables logging of work request timing data.");
-int c4iw_wr_log_size_order = 12;
+static int c4iw_wr_log_size_order = 12;
module_param(c4iw_wr_log_size_order, int, 0444);
MODULE_PARM_DESC(c4iw_wr_log_size_order,
"Number of entries (log2) in the work request timing log.");
@@ -670,7 +670,7 @@ static int ep_open(struct inode *inode, struct file *file)
idr_for_each(&epd->devp->stid_idr, count_idrs, &count);
spin_unlock_irq(&epd->devp->lock);
- epd->bufsize = count * 160;
+ epd->bufsize = count * 240;
epd->buf = vmalloc(epd->bufsize);
if (!epd->buf) {
ret = -ENOMEM;
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index ec7a2988a703..cb43c2299ac0 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -50,6 +50,13 @@ static int inline_threshold = C4IW_INLINE_THRESHOLD;
module_param(inline_threshold, int, 0644);
MODULE_PARM_DESC(inline_threshold, "inline vs dsgl threshold (default=128)");
+static int mr_exceeds_hw_limits(struct c4iw_dev *dev, u64 length)
+{
+ return (is_t4(dev->rdev.lldi.adapter_type) ||
+ is_t5(dev->rdev.lldi.adapter_type)) &&
+ length >= 8*1024*1024*1024ULL;
+}
+
static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr,
u32 len, dma_addr_t data, int wait)
{
@@ -74,18 +81,18 @@ static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr,
req = (struct ulp_mem_io *)__skb_put(skb, wr_len);
memset(req, 0, wr_len);
INIT_ULPTX_WR(req, wr_len, 0, 0);
- req->wr.wr_hi = cpu_to_be32(FW_WR_OP(FW_ULPTX_WR) |
- (wait ? FW_WR_COMPL(1) : 0));
+ req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR) |
+ (wait ? FW_WR_COMPL_F : 0));
req->wr.wr_lo = wait ? (__force __be64)(unsigned long) &wr_wait : 0L;
- req->wr.wr_mid = cpu_to_be32(FW_WR_LEN16(DIV_ROUND_UP(wr_len, 16)));
- req->cmd = cpu_to_be32(ULPTX_CMD(ULP_TX_MEM_WRITE));
+ req->wr.wr_mid = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(wr_len, 16)));
+ req->cmd = cpu_to_be32(ULPTX_CMD_V(ULP_TX_MEM_WRITE));
req->cmd |= cpu_to_be32(V_T5_ULP_MEMIO_ORDER(1));
- req->dlen = cpu_to_be32(ULP_MEMIO_DATA_LEN(len>>5));
+ req->dlen = cpu_to_be32(ULP_MEMIO_DATA_LEN_V(len>>5));
req->len16 = cpu_to_be32(DIV_ROUND_UP(wr_len-sizeof(req->wr), 16));
- req->lock_addr = cpu_to_be32(ULP_MEMIO_ADDR(addr));
+ req->lock_addr = cpu_to_be32(ULP_MEMIO_ADDR_V(addr));
sgl = (struct ulptx_sgl *)(req + 1);
- sgl->cmd_nsge = cpu_to_be32(ULPTX_CMD(ULP_TX_SC_DSGL) |
+ sgl->cmd_nsge = cpu_to_be32(ULPTX_CMD_V(ULP_TX_SC_DSGL) |
ULPTX_NSGE(1));
sgl->len0 = cpu_to_be32(len);
sgl->addr0 = cpu_to_be64(data);
@@ -107,12 +114,12 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len,
u8 wr_len, *to_dp, *from_dp;
int copy_len, num_wqe, i, ret = 0;
struct c4iw_wr_wait wr_wait;
- __be32 cmd = cpu_to_be32(ULPTX_CMD(ULP_TX_MEM_WRITE));
+ __be32 cmd = cpu_to_be32(ULPTX_CMD_V(ULP_TX_MEM_WRITE));
if (is_t4(rdev->lldi.adapter_type))
- cmd |= cpu_to_be32(ULP_MEMIO_ORDER(1));
+ cmd |= cpu_to_be32(ULP_MEMIO_ORDER_F);
else
- cmd |= cpu_to_be32(V_T5_ULP_MEMIO_IMM(1));
+ cmd |= cpu_to_be32(T5_ULP_MEMIO_IMM_F);
addr &= 0x7FFFFFF;
PDBG("%s addr 0x%x len %u\n", __func__, addr, len);
@@ -135,23 +142,23 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len,
INIT_ULPTX_WR(req, wr_len, 0, 0);
if (i == (num_wqe-1)) {
- req->wr.wr_hi = cpu_to_be32(FW_WR_OP(FW_ULPTX_WR) |
- FW_WR_COMPL(1));
+ req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR) |
+ FW_WR_COMPL_F);
req->wr.wr_lo = (__force __be64)(unsigned long) &wr_wait;
} else
- req->wr.wr_hi = cpu_to_be32(FW_WR_OP(FW_ULPTX_WR));
+ req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR));
req->wr.wr_mid = cpu_to_be32(
- FW_WR_LEN16(DIV_ROUND_UP(wr_len, 16)));
+ FW_WR_LEN16_V(DIV_ROUND_UP(wr_len, 16)));
req->cmd = cmd;
- req->dlen = cpu_to_be32(ULP_MEMIO_DATA_LEN(
+ req->dlen = cpu_to_be32(ULP_MEMIO_DATA_LEN_V(
DIV_ROUND_UP(copy_len, T4_ULPTX_MIN_IO)));
req->len16 = cpu_to_be32(DIV_ROUND_UP(wr_len-sizeof(req->wr),
16));
- req->lock_addr = cpu_to_be32(ULP_MEMIO_ADDR(addr + i * 3));
+ req->lock_addr = cpu_to_be32(ULP_MEMIO_ADDR_V(addr + i * 3));
sc = (struct ulptx_idata *)(req + 1);
- sc->cmd_more = cpu_to_be32(ULPTX_CMD(ULP_TX_SC_IMM));
+ sc->cmd_more = cpu_to_be32(ULPTX_CMD_V(ULP_TX_SC_IMM));
sc->len = cpu_to_be32(roundup(copy_len, T4_ULPTX_MIN_IO));
to_dp = (u8 *)(sc + 1);
@@ -369,9 +376,11 @@ static int register_mem(struct c4iw_dev *rhp, struct c4iw_pd *php,
int ret;
ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, mhp->attr.pdid,
- FW_RI_STAG_NSMR, mhp->attr.perms,
+ FW_RI_STAG_NSMR, mhp->attr.len ?
+ mhp->attr.perms : 0,
mhp->attr.mw_bind_enable, mhp->attr.zbva,
- mhp->attr.va_fbo, mhp->attr.len, shift - 12,
+ mhp->attr.va_fbo, mhp->attr.len ?
+ mhp->attr.len : -1, shift - 12,
mhp->attr.pbl_size, mhp->attr.pbl_addr);
if (ret)
return ret;
@@ -536,6 +545,11 @@ int c4iw_reregister_phys_mem(struct ib_mr *mr, int mr_rereg_mask,
return ret;
}
+ if (mr_exceeds_hw_limits(rhp, total_size)) {
+ kfree(page_list);
+ return -EINVAL;
+ }
+
ret = reregister_mem(rhp, php, &mh, shift, npages);
kfree(page_list);
if (ret)
@@ -596,6 +610,12 @@ struct ib_mr *c4iw_register_phys_mem(struct ib_pd *pd,
if (ret)
goto err;
+ if (mr_exceeds_hw_limits(rhp, total_size)) {
+ kfree(page_list);
+ ret = -EINVAL;
+ goto err;
+ }
+
ret = alloc_pbl(mhp, npages);
if (ret) {
kfree(page_list);
@@ -699,6 +719,10 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
php = to_c4iw_pd(pd);
rhp = php->rhp;
+
+ if (mr_exceeds_hw_limits(rhp, length))
+ return ERR_PTR(-EINVAL);
+
mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
if (!mhp)
return ERR_PTR(-ENOMEM);
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 72e3b69d1b76..66bd6a2ad83b 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -408,10 +408,10 @@ static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr,
PDBG("%s dev 0x%p\n", __func__, dev);
return sprintf(buf, "%u.%u.%u.%u\n",
- FW_HDR_FW_VER_MAJOR_GET(c4iw_dev->rdev.lldi.fw_vers),
- FW_HDR_FW_VER_MINOR_GET(c4iw_dev->rdev.lldi.fw_vers),
- FW_HDR_FW_VER_MICRO_GET(c4iw_dev->rdev.lldi.fw_vers),
- FW_HDR_FW_VER_BUILD_GET(c4iw_dev->rdev.lldi.fw_vers));
+ FW_HDR_FW_VER_MAJOR_G(c4iw_dev->rdev.lldi.fw_vers),
+ FW_HDR_FW_VER_MINOR_G(c4iw_dev->rdev.lldi.fw_vers),
+ FW_HDR_FW_VER_MICRO_G(c4iw_dev->rdev.lldi.fw_vers),
+ FW_HDR_FW_VER_BUILD_G(c4iw_dev->rdev.lldi.fw_vers));
}
static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 41cd6882b648..bb85d479e66e 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -271,9 +271,9 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
memset(res_wr, 0, wr_len);
res_wr->op_nres = cpu_to_be32(
- FW_WR_OP(FW_RI_RES_WR) |
+ FW_WR_OP_V(FW_RI_RES_WR) |
V_FW_RI_RES_WR_NRES(2) |
- FW_WR_COMPL(1));
+ FW_WR_COMPL_F);
res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
res_wr->cookie = (unsigned long) &wr_wait;
res = res_wr->res;
@@ -1082,10 +1082,10 @@ static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe,
wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe));
memset(wqe, 0, sizeof *wqe);
- wqe->op_compl = cpu_to_be32(FW_WR_OP(FW_RI_INIT_WR));
+ wqe->op_compl = cpu_to_be32(FW_WR_OP_V(FW_RI_INIT_WR));
wqe->flowid_len16 = cpu_to_be32(
- FW_WR_FLOWID(qhp->ep->hwtid) |
- FW_WR_LEN16(DIV_ROUND_UP(sizeof *wqe, 16)));
+ FW_WR_FLOWID_V(qhp->ep->hwtid) |
+ FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16)));
wqe->u.terminate.type = FW_RI_TYPE_TERMINATE;
wqe->u.terminate.immdlen = cpu_to_be32(sizeof *term);
@@ -1204,11 +1204,11 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe));
memset(wqe, 0, sizeof *wqe);
wqe->op_compl = cpu_to_be32(
- FW_WR_OP(FW_RI_INIT_WR) |
- FW_WR_COMPL(1));
+ FW_WR_OP_V(FW_RI_INIT_WR) |
+ FW_WR_COMPL_F);
wqe->flowid_len16 = cpu_to_be32(
- FW_WR_FLOWID(ep->hwtid) |
- FW_WR_LEN16(DIV_ROUND_UP(sizeof *wqe, 16)));
+ FW_WR_FLOWID_V(ep->hwtid) |
+ FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16)));
wqe->cookie = (unsigned long) &ep->com.wr_wait;
wqe->u.fini.type = FW_RI_TYPE_FINI;
@@ -1273,11 +1273,11 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp)
wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe));
memset(wqe, 0, sizeof *wqe);
wqe->op_compl = cpu_to_be32(
- FW_WR_OP(FW_RI_INIT_WR) |
- FW_WR_COMPL(1));
+ FW_WR_OP_V(FW_RI_INIT_WR) |
+ FW_WR_COMPL_F);
wqe->flowid_len16 = cpu_to_be32(
- FW_WR_FLOWID(qhp->ep->hwtid) |
- FW_WR_LEN16(DIV_ROUND_UP(sizeof *wqe, 16)));
+ FW_WR_FLOWID_V(qhp->ep->hwtid) |
+ FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16)));
wqe->cookie = (unsigned long) &qhp->ep->com.wr_wait;
@@ -1538,9 +1538,9 @@ err:
set_state(qhp, C4IW_QP_STATE_ERROR);
free = 1;
abort = 1;
- wake_up(&qhp->wait);
BUG_ON(!ep);
flush_qp(qhp);
+ wake_up(&qhp->wait);
out:
mutex_unlock(&qhp->mutex);
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index 3488e8c9fcb4..f914b30999f8 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -399,7 +399,7 @@ reg_user_mr_fallback:
pginfo.num_kpages = num_kpages;
pginfo.num_hwpages = num_hwpages;
pginfo.u.usr.region = e_mr->umem;
- pginfo.next_hwpage = e_mr->umem->offset / hwpage_size;
+ pginfo.next_hwpage = ib_umem_offset(e_mr->umem) / hwpage_size;
pginfo.u.usr.next_sg = pginfo.u.usr.region->sg_head.sgl;
ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags,
e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index e0c404bdc4a8..4977082e081f 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -82,7 +82,6 @@ static int create_file(const char *name, umode_t mode,
{
int error;
- *dentry = NULL;
mutex_lock(&parent->d_inode->i_mutex);
*dentry = lookup_one_len(name, parent, strlen(name));
if (!IS_ERR(*dentry))
diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c
index 5e61e9bff697..c7278f6a8217 100644
--- a/drivers/infiniband/hw/ipath/ipath_mr.c
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c
@@ -214,7 +214,7 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mr->mr.user_base = start;
mr->mr.iova = virt_addr;
mr->mr.length = length;
- mr->mr.offset = umem->offset;
+ mr->mr.offset = ib_umem_offset(umem);
mr->mr.access_flags = mr_access_flags;
mr->mr.max_segs = n;
mr->umem = umem;
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 1066eec854a9..a3b70f6c4035 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -233,7 +233,10 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector
if (err)
goto err_dbmap;
- cq->mcq.comp = mlx4_ib_cq_comp;
+ if (context)
+ cq->mcq.tasklet_ctx.comp = mlx4_ib_cq_comp;
+ else
+ cq->mcq.comp = mlx4_ib_cq_comp;
cq->mcq.event = mlx4_ib_cq_event;
if (context)
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index bda5994ceb68..57ecc5b204f3 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1173,18 +1173,24 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i],
&mflow->reg_id[i]);
if (err)
- goto err_free;
+ goto err_create_flow;
i++;
}
if (i < ARRAY_SIZE(type) && flow_attr->type == IB_FLOW_ATTR_NORMAL) {
err = mlx4_ib_tunnel_steer_add(qp, flow_attr, &mflow->reg_id[i]);
if (err)
- goto err_free;
+ goto err_create_flow;
+ i++;
}
return &mflow->ibflow;
+err_create_flow:
+ while (i) {
+ (void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev, mflow->reg_id[i]);
+ i--;
+ }
err_free:
kfree(mflow);
return ERR_PTR(err);
@@ -1969,8 +1975,7 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
dev->caps.num_ports > dev->caps.comp_pool)
return;
- eq_per_port = rounddown_pow_of_two(dev->caps.comp_pool/
- dev->caps.num_ports);
+ eq_per_port = dev->caps.comp_pool / dev->caps.num_ports;
/* Init eq table */
added_eqs = 0;
@@ -2222,7 +2227,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS;
err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count,
MLX4_IB_UC_STEER_QPN_ALIGN,
- &ibdev->steer_qpn_base);
+ &ibdev->steer_qpn_base, 0);
if (err)
goto err_counter;
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 8f9325cfc85d..c36ccbd9a644 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -223,7 +223,6 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
if (flags & IB_MR_REREG_TRANS) {
int shift;
- int err;
int n;
mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 9c5150c3cb31..cf000b7ad64f 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -802,16 +802,21 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
}
}
} else {
- /* Raw packet QPNs must be aligned to 8 bits. If not, the WQE
- * BlueFlame setup flow wrongly causes VLAN insertion. */
+ /* Raw packet QPNs may not have bits 6,7 set in their qp_num;
+ * otherwise, the WQE BlueFlame setup flow wrongly causes
+ * VLAN insertion. */
if (init_attr->qp_type == IB_QPT_RAW_PACKET)
- err = mlx4_qp_reserve_range(dev->dev, 1, 1 << 8, &qpn);
+ err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn,
+ (init_attr->cap.max_send_wr ?
+ MLX4_RESERVE_ETH_BF_QP : 0) |
+ (init_attr->cap.max_recv_wr ?
+ MLX4_RESERVE_A0_QP : 0));
else
if (qp->flags & MLX4_IB_QP_NETIF)
err = mlx4_ib_steer_qp_alloc(dev, 1, &qpn);
else
err = mlx4_qp_reserve_range(dev->dev, 1, 1,
- &qpn);
+ &qpn, 0);
if (err)
goto err_proxy;
}
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index 4ea0135af484..27a70159e2ea 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -1,3 +1,4 @@
obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o
mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o
+mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index e4056279166d..c463e7bba5f4 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -752,7 +752,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,
return ERR_PTR(-EINVAL);
entries = roundup_pow_of_two(entries + 1);
- if (entries > dev->mdev->caps.max_cqes)
+ if (entries > dev->mdev->caps.gen.max_cqes)
return ERR_PTR(-EINVAL);
cq = kzalloc(sizeof(*cq), GFP_KERNEL);
@@ -805,14 +805,14 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,
}
- mlx5_vfree(cqb);
+ kvfree(cqb);
return &cq->ibcq;
err_cmd:
mlx5_core_destroy_cq(dev->mdev, &cq->mcq);
err_cqb:
- mlx5_vfree(cqb);
+ kvfree(cqb);
if (context)
destroy_cq_user(cq, context);
else
@@ -919,7 +919,7 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
int err;
u32 fsel;
- if (!(dev->mdev->caps.flags & MLX5_DEV_CAP_FLAG_CQ_MODER))
+ if (!(dev->mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_CQ_MODER))
return -ENOSYS;
in = kzalloc(sizeof(*in), GFP_KERNEL);
@@ -1074,7 +1074,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
int uninitialized_var(cqe_size);
unsigned long flags;
- if (!(dev->mdev->caps.flags & MLX5_DEV_CAP_FLAG_RESIZE_CQ)) {
+ if (!(dev->mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_RESIZE_CQ)) {
pr_info("Firmware does not support resize CQ\n");
return -ENOSYS;
}
@@ -1083,7 +1083,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
return -EINVAL;
entries = roundup_pow_of_two(entries + 1);
- if (entries > dev->mdev->caps.max_cqes + 1)
+ if (entries > dev->mdev->caps.gen.max_cqes + 1)
return -EINVAL;
if (entries == ibcq->cqe + 1)
@@ -1159,11 +1159,11 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
}
mutex_unlock(&cq->resize_mutex);
- mlx5_vfree(in);
+ kvfree(in);
return 0;
ex_alloc:
- mlx5_vfree(in);
+ kvfree(in);
ex_resize:
if (udata)
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index b514bbb5610f..657af9a1167c 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -129,7 +129,7 @@ int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port)
packet_error = be16_to_cpu(out_mad->status);
- dev->mdev->caps.ext_port_cap[port - 1] = (!err && !packet_error) ?
+ dev->mdev->caps.gen.ext_port_cap[port - 1] = (!err && !packet_error) ?
MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO : 0;
out:
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index d8907b20522a..8a87404e9c76 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -157,11 +157,13 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
+ struct mlx5_general_caps *gen;
int err = -ENOMEM;
int max_rq_sg;
int max_sq_sg;
u64 flags;
+ gen = &dev->mdev->caps.gen;
in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
if (!in_mad || !out_mad)
@@ -183,7 +185,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
IB_DEVICE_PORT_ACTIVE_EVENT |
IB_DEVICE_SYS_IMAGE_GUID |
IB_DEVICE_RC_RNR_NAK_GEN;
- flags = dev->mdev->caps.flags;
+ flags = gen->flags;
if (flags & MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR)
props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
if (flags & MLX5_DEV_CAP_FLAG_BAD_QKEY_CNTR)
@@ -213,34 +215,41 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
memcpy(&props->sys_image_guid, out_mad->data + 4, 8);
props->max_mr_size = ~0ull;
- props->page_size_cap = dev->mdev->caps.min_page_sz;
- props->max_qp = 1 << dev->mdev->caps.log_max_qp;
- props->max_qp_wr = dev->mdev->caps.max_wqes;
- max_rq_sg = dev->mdev->caps.max_rq_desc_sz / sizeof(struct mlx5_wqe_data_seg);
- max_sq_sg = (dev->mdev->caps.max_sq_desc_sz - sizeof(struct mlx5_wqe_ctrl_seg)) /
+ props->page_size_cap = gen->min_page_sz;
+ props->max_qp = 1 << gen->log_max_qp;
+ props->max_qp_wr = gen->max_wqes;
+ max_rq_sg = gen->max_rq_desc_sz / sizeof(struct mlx5_wqe_data_seg);
+ max_sq_sg = (gen->max_sq_desc_sz - sizeof(struct mlx5_wqe_ctrl_seg)) /
sizeof(struct mlx5_wqe_data_seg);
props->max_sge = min(max_rq_sg, max_sq_sg);
- props->max_cq = 1 << dev->mdev->caps.log_max_cq;
- props->max_cqe = dev->mdev->caps.max_cqes - 1;
- props->max_mr = 1 << dev->mdev->caps.log_max_mkey;
- props->max_pd = 1 << dev->mdev->caps.log_max_pd;
- props->max_qp_rd_atom = dev->mdev->caps.max_ra_req_qp;
- props->max_qp_init_rd_atom = dev->mdev->caps.max_ra_res_qp;
+ props->max_cq = 1 << gen->log_max_cq;
+ props->max_cqe = gen->max_cqes - 1;
+ props->max_mr = 1 << gen->log_max_mkey;
+ props->max_pd = 1 << gen->log_max_pd;
+ props->max_qp_rd_atom = 1 << gen->log_max_ra_req_qp;
+ props->max_qp_init_rd_atom = 1 << gen->log_max_ra_res_qp;
+ props->max_srq = 1 << gen->log_max_srq;
+ props->max_srq_wr = gen->max_srq_wqes - 1;
+ props->local_ca_ack_delay = gen->local_ca_ack_delay;
props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
- props->max_srq = 1 << dev->mdev->caps.log_max_srq;
- props->max_srq_wr = dev->mdev->caps.max_srq_wqes - 1;
props->max_srq_sge = max_rq_sg - 1;
props->max_fast_reg_page_list_len = (unsigned int)-1;
- props->local_ca_ack_delay = dev->mdev->caps.local_ca_ack_delay;
+ props->local_ca_ack_delay = gen->local_ca_ack_delay;
props->atomic_cap = IB_ATOMIC_NONE;
props->masked_atomic_cap = IB_ATOMIC_NONE;
props->max_pkeys = be16_to_cpup((__be16 *)(out_mad->data + 28));
- props->max_mcast_grp = 1 << dev->mdev->caps.log_max_mcg;
- props->max_mcast_qp_attach = dev->mdev->caps.max_qp_mcg;
+ props->max_mcast_grp = 1 << gen->log_max_mcg;
+ props->max_mcast_qp_attach = gen->max_qp_mcg;
props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
props->max_mcast_grp;
props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ if (dev->mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_ON_DMND_PG)
+ props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING;
+ props->odp_caps = dev->odp_caps;
+#endif
+
out:
kfree(in_mad);
kfree(out_mad);
@@ -254,10 +263,12 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
+ struct mlx5_general_caps *gen;
int ext_active_speed;
int err = -ENOMEM;
- if (port < 1 || port > dev->mdev->caps.num_ports) {
+ gen = &dev->mdev->caps.gen;
+ if (port < 1 || port > gen->num_ports) {
mlx5_ib_warn(dev, "invalid port number %d\n", port);
return -EINVAL;
}
@@ -288,8 +299,8 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
props->phys_state = out_mad->data[33] >> 4;
props->port_cap_flags = be32_to_cpup((__be32 *)(out_mad->data + 20));
props->gid_tbl_len = out_mad->data[50];
- props->max_msg_sz = 1 << to_mdev(ibdev)->mdev->caps.log_max_msg;
- props->pkey_tbl_len = to_mdev(ibdev)->mdev->caps.port[port - 1].pkey_table_len;
+ props->max_msg_sz = 1 << gen->log_max_msg;
+ props->pkey_tbl_len = gen->port[port - 1].pkey_table_len;
props->bad_pkey_cntr = be16_to_cpup((__be16 *)(out_mad->data + 46));
props->qkey_viol_cntr = be16_to_cpup((__be16 *)(out_mad->data + 48));
props->active_width = out_mad->data[31] & 0xf;
@@ -316,7 +327,7 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
/* If reported active speed is QDR, check if is FDR-10 */
if (props->active_speed == 4) {
- if (dev->mdev->caps.ext_port_cap[port - 1] &
+ if (gen->ext_port_cap[port - 1] &
MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO) {
init_query_mad(in_mad);
in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO;
@@ -470,6 +481,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
struct mlx5_ib_alloc_ucontext_req_v2 req;
struct mlx5_ib_alloc_ucontext_resp resp;
struct mlx5_ib_ucontext *context;
+ struct mlx5_general_caps *gen;
struct mlx5_uuar_info *uuari;
struct mlx5_uar *uars;
int gross_uuars;
@@ -480,6 +492,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
int i;
size_t reqlen;
+ gen = &dev->mdev->caps.gen;
if (!dev->ib_active)
return ERR_PTR(-EAGAIN);
@@ -512,14 +525,14 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE;
gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE;
- resp.qp_tab_size = 1 << dev->mdev->caps.log_max_qp;
- resp.bf_reg_size = dev->mdev->caps.bf_reg_size;
+ resp.qp_tab_size = 1 << gen->log_max_qp;
+ resp.bf_reg_size = gen->bf_reg_size;
resp.cache_line_size = L1_CACHE_BYTES;
- resp.max_sq_desc_sz = dev->mdev->caps.max_sq_desc_sz;
- resp.max_rq_desc_sz = dev->mdev->caps.max_rq_desc_sz;
- resp.max_send_wqebb = dev->mdev->caps.max_wqes;
- resp.max_recv_wr = dev->mdev->caps.max_wqes;
- resp.max_srq_recv_wr = dev->mdev->caps.max_srq_wqes;
+ resp.max_sq_desc_sz = gen->max_sq_desc_sz;
+ resp.max_rq_desc_sz = gen->max_rq_desc_sz;
+ resp.max_send_wqebb = gen->max_wqes;
+ resp.max_recv_wr = gen->max_wqes;
+ resp.max_srq_recv_wr = gen->max_srq_wqes;
context = kzalloc(sizeof(*context), GFP_KERNEL);
if (!context)
@@ -561,11 +574,15 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
goto out_count;
}
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
+#endif
+
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
resp.tot_uuars = req.total_num_uuars;
- resp.num_ports = dev->mdev->caps.num_ports;
+ resp.num_ports = gen->num_ports;
err = ib_copy_to_udata(udata, &resp,
sizeof(resp) - sizeof(resp.reserved));
if (err)
@@ -650,13 +667,13 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
return -EINVAL;
idx = get_index(vma->vm_pgoff);
+ if (idx >= uuari->num_uars)
+ return -EINVAL;
+
pfn = uar_index2pfn(dev, uuari->uars[idx].index);
mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx,
(unsigned long long)pfn);
- if (idx >= uuari->num_uars)
- return -EINVAL;
-
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
if (io_remap_pfn_range(vma, vma->vm_start, pfn,
PAGE_SIZE, vma->vm_page_prot))
@@ -851,7 +868,7 @@ static ssize_t show_reg_pages(struct device *device,
struct mlx5_ib_dev *dev =
container_of(device, struct mlx5_ib_dev, ib_dev.dev);
- return sprintf(buf, "%d\n", dev->mdev->priv.reg_pages);
+ return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
}
static ssize_t show_hca(struct device *device, struct device_attribute *attr,
@@ -967,9 +984,11 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
static void get_ext_port_caps(struct mlx5_ib_dev *dev)
{
+ struct mlx5_general_caps *gen;
int port;
- for (port = 1; port <= dev->mdev->caps.num_ports; port++)
+ gen = &dev->mdev->caps.gen;
+ for (port = 1; port <= gen->num_ports; port++)
mlx5_query_ext_port_caps(dev, port);
}
@@ -977,9 +996,11 @@ static int get_port_caps(struct mlx5_ib_dev *dev)
{
struct ib_device_attr *dprops = NULL;
struct ib_port_attr *pprops = NULL;
+ struct mlx5_general_caps *gen;
int err = 0;
int port;
+ gen = &dev->mdev->caps.gen;
pprops = kmalloc(sizeof(*pprops), GFP_KERNEL);
if (!pprops)
goto out;
@@ -994,14 +1015,14 @@ static int get_port_caps(struct mlx5_ib_dev *dev)
goto out;
}
- for (port = 1; port <= dev->mdev->caps.num_ports; port++) {
+ for (port = 1; port <= gen->num_ports; port++) {
err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
if (err) {
mlx5_ib_warn(dev, "query_port %d failed %d\n", port, err);
break;
}
- dev->mdev->caps.port[port - 1].pkey_table_len = dprops->max_pkeys;
- dev->mdev->caps.port[port - 1].gid_table_len = pprops->gid_tbl_len;
+ gen->port[port - 1].pkey_table_len = dprops->max_pkeys;
+ gen->port[port - 1].gid_table_len = pprops->gid_tbl_len;
mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n",
dprops->max_pkeys, pprops->gid_tbl_len);
}
@@ -1279,8 +1300,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX);
dev->ib_dev.owner = THIS_MODULE;
dev->ib_dev.node_type = RDMA_NODE_IB_CA;
- dev->ib_dev.local_dma_lkey = mdev->caps.reserved_lkey;
- dev->num_ports = mdev->caps.num_ports;
+ dev->ib_dev.local_dma_lkey = mdev->caps.gen.reserved_lkey;
+ dev->num_ports = mdev->caps.gen.num_ports;
dev->ib_dev.phys_port_cnt = dev->num_ports;
dev->ib_dev.num_comp_vectors = dev->num_comp_vectors;
dev->ib_dev.dma_device = &mdev->pdev->dev;
@@ -1310,6 +1331,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
(1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) |
(1ull << IB_USER_VERBS_CMD_OPEN_QP);
+ dev->ib_dev.uverbs_ex_cmd_mask =
+ (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE);
dev->ib_dev.query_device = mlx5_ib_query_device;
dev->ib_dev.query_port = mlx5_ib_query_port;
@@ -1355,7 +1378,9 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list;
dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
- if (mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC) {
+ mlx5_ib_internal_query_odp_caps(dev);
+
+ if (mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_XRC) {
dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
dev->ib_dev.uverbs_cmd_mask |=
@@ -1368,16 +1393,19 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
goto err_eqs;
mutex_init(&dev->cap_mask_mutex);
- spin_lock_init(&dev->mr_lock);
err = create_dev_resources(&dev->devr);
if (err)
goto err_eqs;
- err = ib_register_device(&dev->ib_dev, NULL);
+ err = mlx5_ib_odp_init_one(dev);
if (err)
goto err_rsrc;
+ err = ib_register_device(&dev->ib_dev, NULL);
+ if (err)
+ goto err_odp;
+
err = create_umr_res(dev);
if (err)
goto err_dev;
@@ -1399,6 +1427,9 @@ err_umrc:
err_dev:
ib_unregister_device(&dev->ib_dev);
+err_odp:
+ mlx5_ib_odp_remove_one(dev);
+
err_rsrc:
destroy_dev_resources(&dev->devr);
@@ -1414,8 +1445,10 @@ err_dealloc:
static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
{
struct mlx5_ib_dev *dev = context;
- destroy_umrc_res(dev);
+
ib_unregister_device(&dev->ib_dev);
+ destroy_umrc_res(dev);
+ mlx5_ib_odp_remove_one(dev);
destroy_dev_resources(&dev->devr);
free_comp_eqs(dev);
ib_dealloc_device(&dev->ib_dev);
@@ -1429,15 +1462,30 @@ static struct mlx5_interface mlx5_ib_interface = {
static int __init mlx5_ib_init(void)
{
+ int err;
+
if (deprecated_prof_sel != 2)
pr_warn("prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n");
- return mlx5_register_interface(&mlx5_ib_interface);
+ err = mlx5_ib_odp_init();
+ if (err)
+ return err;
+
+ err = mlx5_register_interface(&mlx5_ib_interface);
+ if (err)
+ goto clean_odp;
+
+ return err;
+
+clean_odp:
+ mlx5_ib_odp_cleanup();
+ return err;
}
static void __exit mlx5_ib_cleanup(void)
{
mlx5_unregister_interface(&mlx5_ib_interface);
+ mlx5_ib_odp_cleanup();
}
module_init(mlx5_ib_init);
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index a3e81444c825..b56e4c5593ee 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -32,6 +32,7 @@
#include <linux/module.h>
#include <rdma/ib_umem.h>
+#include <rdma/ib_umem_odp.h>
#include "mlx5_ib.h"
/* @umem: umem object to scan
@@ -55,16 +56,28 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
u64 pfn;
struct scatterlist *sg;
int entry;
+ unsigned long page_shift = ilog2(umem->page_size);
- addr = addr >> PAGE_SHIFT;
+ /* With ODP we must always match OS page size. */
+ if (umem->odp_data) {
+ *count = ib_umem_page_count(umem);
+ *shift = PAGE_SHIFT;
+ *ncont = *count;
+ if (order)
+ *order = ilog2(roundup_pow_of_two(*count));
+
+ return;
+ }
+
+ addr = addr >> page_shift;
tmp = (unsigned long)addr;
m = find_first_bit(&tmp, sizeof(tmp));
skip = 1 << m;
mask = skip - 1;
i = 0;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
- len = sg_dma_len(sg) >> PAGE_SHIFT;
- pfn = sg_dma_address(sg) >> PAGE_SHIFT;
+ len = sg_dma_len(sg) >> page_shift;
+ pfn = sg_dma_address(sg) >> page_shift;
for (k = 0; k < len; k++) {
if (!(i & mask)) {
tmp = (unsigned long)pfn;
@@ -103,14 +116,43 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
*ncont = 0;
}
- *shift = PAGE_SHIFT + m;
+ *shift = page_shift + m;
*count = i;
}
-void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
- int page_shift, __be64 *pas, int umr)
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+static u64 umem_dma_to_mtt(dma_addr_t umem_dma)
{
- int shift = page_shift - PAGE_SHIFT;
+ u64 mtt_entry = umem_dma & ODP_DMA_ADDR_MASK;
+
+ if (umem_dma & ODP_READ_ALLOWED_BIT)
+ mtt_entry |= MLX5_IB_MTT_READ;
+ if (umem_dma & ODP_WRITE_ALLOWED_BIT)
+ mtt_entry |= MLX5_IB_MTT_WRITE;
+
+ return mtt_entry;
+}
+#endif
+
+/*
+ * Populate the given array with bus addresses from the umem.
+ *
+ * dev - mlx5_ib device
+ * umem - umem to use to fill the pages
+ * page_shift - determines the page size used in the resulting array
+ * offset - offset into the umem to start from,
+ * only implemented for ODP umems
+ * num_pages - total number of pages to fill
+ * pas - bus addresses array to fill
+ * access_flags - access flags to set on all present pages.
+ use enum mlx5_ib_mtt_access_flags for this.
+ */
+void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
+ int page_shift, size_t offset, size_t num_pages,
+ __be64 *pas, int access_flags)
+{
+ unsigned long umem_page_shift = ilog2(umem->page_size);
+ int shift = page_shift - umem_page_shift;
int mask = (1 << shift) - 1;
int i, k;
u64 cur = 0;
@@ -118,28 +160,49 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
int len;
struct scatterlist *sg;
int entry;
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ const bool odp = umem->odp_data != NULL;
+
+ if (odp) {
+ WARN_ON(shift != 0);
+ WARN_ON(access_flags != (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE));
+
+ for (i = 0; i < num_pages; ++i) {
+ dma_addr_t pa = umem->odp_data->dma_list[offset + i];
+
+ pas[i] = cpu_to_be64(umem_dma_to_mtt(pa));
+ }
+ return;
+ }
+#endif
i = 0;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
- len = sg_dma_len(sg) >> PAGE_SHIFT;
+ len = sg_dma_len(sg) >> umem_page_shift;
base = sg_dma_address(sg);
for (k = 0; k < len; k++) {
if (!(i & mask)) {
- cur = base + (k << PAGE_SHIFT);
- if (umr)
- cur |= 3;
+ cur = base + (k << umem_page_shift);
+ cur |= access_flags;
pas[i >> shift] = cpu_to_be64(cur);
mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n",
i >> shift, be64_to_cpu(pas[i >> shift]));
} else
mlx5_ib_dbg(dev, "=====> 0x%llx\n",
- base + (k << PAGE_SHIFT));
+ base + (k << umem_page_shift));
i++;
}
}
}
+void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
+ int page_shift, __be64 *pas, int access_flags)
+{
+ return __mlx5_ib_populate_pas(dev, umem, page_shift, 0,
+ ib_umem_num_pages(umem), pas,
+ access_flags);
+}
int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset)
{
u64 page_size;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 386780f0d1e1..83f22fe297c8 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -111,6 +111,8 @@ struct mlx5_ib_pd {
*/
#define MLX5_IB_SEND_UMR_UNREG IB_SEND_RESERVED_START
+#define MLX5_IB_SEND_UMR_FAIL_IF_FREE (IB_SEND_RESERVED_START << 1)
+#define MLX5_IB_SEND_UMR_UPDATE_MTT (IB_SEND_RESERVED_START << 2)
#define MLX5_IB_QPT_REG_UMR IB_QPT_RESERVED1
#define MLX5_IB_WR_UMR IB_WR_RESERVED1
@@ -147,6 +149,29 @@ enum {
MLX5_QP_EMPTY
};
+/*
+ * Connect-IB can trigger up to four concurrent pagefaults
+ * per-QP.
+ */
+enum mlx5_ib_pagefault_context {
+ MLX5_IB_PAGEFAULT_RESPONDER_READ,
+ MLX5_IB_PAGEFAULT_REQUESTOR_READ,
+ MLX5_IB_PAGEFAULT_RESPONDER_WRITE,
+ MLX5_IB_PAGEFAULT_REQUESTOR_WRITE,
+ MLX5_IB_PAGEFAULT_CONTEXTS
+};
+
+static inline enum mlx5_ib_pagefault_context
+ mlx5_ib_get_pagefault_context(struct mlx5_pagefault *pagefault)
+{
+ return pagefault->flags & (MLX5_PFAULT_REQUESTOR | MLX5_PFAULT_WRITE);
+}
+
+struct mlx5_ib_pfault {
+ struct work_struct work;
+ struct mlx5_pagefault mpfault;
+};
+
struct mlx5_ib_qp {
struct ib_qp ibqp;
struct mlx5_core_qp mqp;
@@ -192,6 +217,21 @@ struct mlx5_ib_qp {
/* Store signature errors */
bool signature_en;
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ /*
+ * A flag that is true for QP's that are in a state that doesn't
+ * allow page faults, and shouldn't schedule any more faults.
+ */
+ int disable_page_faults;
+ /*
+ * The disable_page_faults_lock protects a QP's disable_page_faults
+ * field, allowing for a thread to atomically check whether the QP
+ * allows page faults, and if so schedule a page fault.
+ */
+ spinlock_t disable_page_faults_lock;
+ struct mlx5_ib_pfault pagefaults[MLX5_IB_PAGEFAULT_CONTEXTS];
+#endif
};
struct mlx5_ib_cq_buf {
@@ -206,6 +246,19 @@ enum mlx5_ib_qp_flags {
MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 1,
};
+struct mlx5_umr_wr {
+ union {
+ u64 virt_addr;
+ u64 offset;
+ } target;
+ struct ib_pd *pd;
+ unsigned int page_shift;
+ unsigned int npages;
+ u32 length;
+ int access_flags;
+ u32 mkey;
+};
+
struct mlx5_shared_mr_info {
int mr_id;
struct ib_umem *umem;
@@ -253,6 +306,13 @@ struct mlx5_ib_xrcd {
u32 xrcdn;
};
+enum mlx5_ib_mtt_access_flags {
+ MLX5_IB_MTT_READ = (1 << 0),
+ MLX5_IB_MTT_WRITE = (1 << 1),
+};
+
+#define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)
+
struct mlx5_ib_mr {
struct ib_mr ibmr;
struct mlx5_core_mr mmr;
@@ -261,12 +321,11 @@ struct mlx5_ib_mr {
struct list_head list;
int order;
int umred;
- __be64 *pas;
- dma_addr_t dma;
int npages;
struct mlx5_ib_dev *dev;
struct mlx5_create_mkey_mbox_out out;
struct mlx5_core_sig_ctx *sig;
+ int live;
};
struct mlx5_ib_fast_reg_page_list {
@@ -372,11 +431,18 @@ struct mlx5_ib_dev {
struct umr_common umrc;
/* sync used page count stats
*/
- spinlock_t mr_lock;
struct mlx5_ib_resources devr;
struct mlx5_mr_cache cache;
struct timer_list delay_timer;
int fill_delay;
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ struct ib_odp_caps odp_caps;
+ /*
+ * Sleepable RCU that prevents destruction of MRs while they are still
+ * being used by a page fault handler.
+ */
+ struct srcu_struct mr_srcu;
+#endif
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -490,6 +556,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr);
void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n);
+int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
+ void *buffer, u32 length);
struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,
int vector, struct ib_ucontext *context,
struct ib_udata *udata);
@@ -502,6 +570,8 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc);
struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata);
+int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index,
+ int npages, int zap);
int mlx5_ib_dereg_mr(struct ib_mr *ibmr);
int mlx5_ib_destroy_mr(struct ib_mr *ibmr);
struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
@@ -533,8 +603,11 @@ int mlx5_ib_init_fmr(struct mlx5_ib_dev *dev);
void mlx5_ib_cleanup_fmr(struct mlx5_ib_dev *dev);
void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
int *ncont, int *order);
+void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
+ int page_shift, size_t offset, size_t num_pages,
+ __be64 *pas, int access_flags);
void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
- int page_shift, __be64 *pas, int umr);
+ int page_shift, __be64 *pas, int access_flags);
void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq);
int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
@@ -544,6 +617,38 @@ void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context);
int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
struct ib_mr_status *mr_status);
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+extern struct workqueue_struct *mlx5_ib_page_fault_wq;
+
+int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev);
+void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp,
+ struct mlx5_ib_pfault *pfault);
+void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp);
+int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev);
+void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev);
+int __init mlx5_ib_odp_init(void);
+void mlx5_ib_odp_cleanup(void);
+void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp);
+void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp);
+void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
+ unsigned long end);
+
+#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
+static inline int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev)
+{
+ return 0;
+}
+
+static inline void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp) {}
+static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; }
+static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) {}
+static inline int mlx5_ib_odp_init(void) { return 0; }
+static inline void mlx5_ib_odp_cleanup(void) {}
+static inline void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp) {}
+static inline void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp) {}
+
+#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
+
static inline void init_query_mad(struct ib_smp *mad)
{
mad->base_version = 1;
@@ -561,4 +666,7 @@ static inline u8 convert_access(int acc)
MLX5_PERM_LOCAL_READ;
}
+#define MLX5_MAX_UMR_SHIFT 16
+#define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT)
+
#endif /* MLX5_IB_H */
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 80b3c63eab5d..32a28bd50b20 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -37,21 +37,34 @@
#include <linux/export.h>
#include <linux/delay.h>
#include <rdma/ib_umem.h>
+#include <rdma/ib_umem_odp.h>
+#include <rdma/ib_verbs.h>
#include "mlx5_ib.h"
enum {
MAX_PENDING_REG_MR = 8,
};
-enum {
- MLX5_UMR_ALIGN = 2048
-};
+#define MLX5_UMR_ALIGN 2048
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+static __be64 mlx5_ib_update_mtt_emergency_buffer[
+ MLX5_UMR_MTT_MIN_CHUNK_SIZE/sizeof(__be64)]
+ __aligned(MLX5_UMR_ALIGN);
+static DEFINE_MUTEX(mlx5_ib_update_mtt_emergency_buffer_mutex);
+#endif
+
+static int clean_mr(struct mlx5_ib_mr *mr);
-static __be64 *mr_align(__be64 *ptr, int align)
+static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
- unsigned long mask = align - 1;
+ int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
- return (__be64 *)(((unsigned long)ptr + mask) & ~mask);
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ /* Wait until all page fault handlers using the mr complete. */
+ synchronize_srcu(&dev->mr_srcu);
+#endif
+
+ return err;
}
static int order2idx(struct mlx5_ib_dev *dev, int order)
@@ -146,7 +159,7 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
mr->order = ent->order;
mr->umred = 1;
mr->dev = dev;
- in->seg.status = 1 << 6;
+ in->seg.status = MLX5_MKEY_STATUS_FREE;
in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
@@ -159,6 +172,9 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
sizeof(*in), reg_mr_callback,
mr, &mr->out);
if (err) {
+ spin_lock_irq(&ent->lock);
+ ent->pending--;
+ spin_unlock_irq(&ent->lock);
mlx5_ib_warn(dev, "create mkey failed %d\n", err);
kfree(mr);
break;
@@ -188,7 +204,7 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
ent->cur--;
ent->size--;
spin_unlock_irq(&ent->lock);
- err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
+ err = destroy_mkey(dev, mr);
if (err)
mlx5_ib_warn(dev, "failed destroy mkey\n");
else
@@ -479,7 +495,7 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
ent->cur--;
ent->size--;
spin_unlock_irq(&ent->lock);
- err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
+ err = destroy_mkey(dev, mr);
if (err)
mlx5_ib_warn(dev, "failed destroy mkey\n");
else
@@ -665,7 +681,7 @@ static int get_octo_len(u64 addr, u64 len, int page_size)
static int use_umr(int order)
{
- return order <= 17;
+ return order <= MLX5_MAX_UMR_SHIFT;
}
static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
@@ -675,6 +691,7 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct ib_mr *mr = dev->umrc.mr;
+ struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
sg->addr = dma;
sg->length = ALIGN(sizeof(u64) * n, 64);
@@ -689,21 +706,24 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
wr->num_sge = 0;
wr->opcode = MLX5_IB_WR_UMR;
- wr->wr.fast_reg.page_list_len = n;
- wr->wr.fast_reg.page_shift = page_shift;
- wr->wr.fast_reg.rkey = key;
- wr->wr.fast_reg.iova_start = virt_addr;
- wr->wr.fast_reg.length = len;
- wr->wr.fast_reg.access_flags = access_flags;
- wr->wr.fast_reg.page_list = (struct ib_fast_reg_page_list *)pd;
+
+ umrwr->npages = n;
+ umrwr->page_shift = page_shift;
+ umrwr->mkey = key;
+ umrwr->target.virt_addr = virt_addr;
+ umrwr->length = len;
+ umrwr->access_flags = access_flags;
+ umrwr->pd = pd;
}
static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
struct ib_send_wr *wr, u32 key)
{
- wr->send_flags = MLX5_IB_SEND_UMR_UNREG;
+ struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
+
+ wr->send_flags = MLX5_IB_SEND_UMR_UNREG | MLX5_IB_SEND_UMR_FAIL_IF_FREE;
wr->opcode = MLX5_IB_WR_UMR;
- wr->wr.fast_reg.rkey = key;
+ umrwr->mkey = key;
}
void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context)
@@ -739,7 +759,10 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
struct ib_send_wr wr, *bad;
struct mlx5_ib_mr *mr;
struct ib_sge sg;
- int size = sizeof(u64) * npages;
+ int size;
+ __be64 *mr_pas;
+ __be64 *pas;
+ dma_addr_t dma;
int err = 0;
int i;
@@ -758,25 +781,31 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
if (!mr)
return ERR_PTR(-EAGAIN);
- mr->pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
- if (!mr->pas) {
+ /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes.
+ * To avoid copying garbage after the pas array, we allocate
+ * a little more. */
+ size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT);
+ mr_pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
+ if (!mr_pas) {
err = -ENOMEM;
goto free_mr;
}
- mlx5_ib_populate_pas(dev, umem, page_shift,
- mr_align(mr->pas, MLX5_UMR_ALIGN), 1);
+ pas = PTR_ALIGN(mr_pas, MLX5_UMR_ALIGN);
+ mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT);
+ /* Clear padding after the actual pages. */
+ memset(pas + npages, 0, size - npages * sizeof(u64));
- mr->dma = dma_map_single(ddev, mr_align(mr->pas, MLX5_UMR_ALIGN), size,
- DMA_TO_DEVICE);
- if (dma_mapping_error(ddev, mr->dma)) {
+ dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE);
+ if (dma_mapping_error(ddev, dma)) {
err = -ENOMEM;
goto free_pas;
}
memset(&wr, 0, sizeof(wr));
wr.wr_id = (u64)(unsigned long)&umr_context;
- prep_umr_reg_wqe(pd, &wr, &sg, mr->dma, npages, mr->mmr.key, page_shift, virt_addr, len, access_flags);
+ prep_umr_reg_wqe(pd, &wr, &sg, dma, npages, mr->mmr.key, page_shift,
+ virt_addr, len, access_flags);
mlx5_ib_init_umr_context(&umr_context);
down(&umrc->sem);
@@ -796,12 +825,14 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
mr->mmr.size = len;
mr->mmr.pd = to_mpd(pd)->pdn;
+ mr->live = 1;
+
unmap_dma:
up(&umrc->sem);
- dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
+ dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
free_pas:
- kfree(mr->pas);
+ kfree(mr_pas);
free_mr:
if (err) {
@@ -812,6 +843,128 @@ free_mr:
return mr;
}
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
+ int zap)
+{
+ struct mlx5_ib_dev *dev = mr->dev;
+ struct device *ddev = dev->ib_dev.dma_device;
+ struct umr_common *umrc = &dev->umrc;
+ struct mlx5_ib_umr_context umr_context;
+ struct ib_umem *umem = mr->umem;
+ int size;
+ __be64 *pas;
+ dma_addr_t dma;
+ struct ib_send_wr wr, *bad;
+ struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr.wr.fast_reg;
+ struct ib_sge sg;
+ int err = 0;
+ const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64);
+ const int page_index_mask = page_index_alignment - 1;
+ size_t pages_mapped = 0;
+ size_t pages_to_map = 0;
+ size_t pages_iter = 0;
+ int use_emergency_buf = 0;
+
+ /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
+ * so we need to align the offset and length accordingly */
+ if (start_page_index & page_index_mask) {
+ npages += start_page_index & page_index_mask;
+ start_page_index &= ~page_index_mask;
+ }
+
+ pages_to_map = ALIGN(npages, page_index_alignment);
+
+ if (start_page_index + pages_to_map > MLX5_MAX_UMR_PAGES)
+ return -EINVAL;
+
+ size = sizeof(u64) * pages_to_map;
+ size = min_t(int, PAGE_SIZE, size);
+ /* We allocate with GFP_ATOMIC to avoid recursion into page-reclaim
+ * code, when we are called from an invalidation. The pas buffer must
+ * be 2k-aligned for Connect-IB. */
+ pas = (__be64 *)get_zeroed_page(GFP_ATOMIC);
+ if (!pas) {
+ mlx5_ib_warn(dev, "unable to allocate memory during MTT update, falling back to slower chunked mechanism.\n");
+ pas = mlx5_ib_update_mtt_emergency_buffer;
+ size = MLX5_UMR_MTT_MIN_CHUNK_SIZE;
+ use_emergency_buf = 1;
+ mutex_lock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
+ memset(pas, 0, size);
+ }
+ pages_iter = size / sizeof(u64);
+ dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE);
+ if (dma_mapping_error(ddev, dma)) {
+ mlx5_ib_err(dev, "unable to map DMA during MTT update.\n");
+ err = -ENOMEM;
+ goto free_pas;
+ }
+
+ for (pages_mapped = 0;
+ pages_mapped < pages_to_map && !err;
+ pages_mapped += pages_iter, start_page_index += pages_iter) {
+ dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE);
+
+ npages = min_t(size_t,
+ pages_iter,
+ ib_umem_num_pages(umem) - start_page_index);
+
+ if (!zap) {
+ __mlx5_ib_populate_pas(dev, umem, PAGE_SHIFT,
+ start_page_index, npages, pas,
+ MLX5_IB_MTT_PRESENT);
+ /* Clear padding after the pages brought from the
+ * umem. */
+ memset(pas + npages, 0, size - npages * sizeof(u64));
+ }
+
+ dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
+
+ memset(&wr, 0, sizeof(wr));
+ wr.wr_id = (u64)(unsigned long)&umr_context;
+
+ sg.addr = dma;
+ sg.length = ALIGN(npages * sizeof(u64),
+ MLX5_UMR_MTT_ALIGNMENT);
+ sg.lkey = dev->umrc.mr->lkey;
+
+ wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE |
+ MLX5_IB_SEND_UMR_UPDATE_MTT;
+ wr.sg_list = &sg;
+ wr.num_sge = 1;
+ wr.opcode = MLX5_IB_WR_UMR;
+ umrwr->npages = sg.length / sizeof(u64);
+ umrwr->page_shift = PAGE_SHIFT;
+ umrwr->mkey = mr->mmr.key;
+ umrwr->target.offset = start_page_index;
+
+ mlx5_ib_init_umr_context(&umr_context);
+ down(&umrc->sem);
+ err = ib_post_send(umrc->qp, &wr, &bad);
+ if (err) {
+ mlx5_ib_err(dev, "UMR post send failed, err %d\n", err);
+ } else {
+ wait_for_completion(&umr_context.done);
+ if (umr_context.status != IB_WC_SUCCESS) {
+ mlx5_ib_err(dev, "UMR completion failed, code %d\n",
+ umr_context.status);
+ err = -EFAULT;
+ }
+ }
+ up(&umrc->sem);
+ }
+ dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
+
+free_pas:
+ if (!use_emergency_buf)
+ free_page((unsigned long)pas);
+ else
+ mutex_unlock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
+
+ return err;
+}
+#endif
+
static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
u64 length, struct ib_umem *umem,
int npages, int page_shift,
@@ -822,6 +975,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
struct mlx5_ib_mr *mr;
int inlen;
int err;
+ bool pg_cap = !!(dev->mdev->caps.gen.flags &
+ MLX5_DEV_CAP_FLAG_ON_DMND_PG);
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
@@ -833,8 +988,12 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
err = -ENOMEM;
goto err_1;
}
- mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, 0);
+ mlx5_ib_populate_pas(dev, umem, page_shift, in->pas,
+ pg_cap ? MLX5_IB_MTT_PRESENT : 0);
+ /* The MLX5_MKEY_INBOX_PG_ACCESS bit allows setting the access flags
+ * in the page list submitted with the command. */
+ in->flags = pg_cap ? cpu_to_be32(MLX5_MKEY_INBOX_PG_ACCESS) : 0;
in->seg.flags = convert_access(access_flags) |
MLX5_ACCESS_MODE_MTT;
in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
@@ -853,14 +1012,15 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
goto err_2;
}
mr->umem = umem;
- mlx5_vfree(in);
+ mr->live = 1;
+ kvfree(in);
mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key);
return mr;
err_2:
- mlx5_vfree(in);
+ kvfree(in);
err_1:
kfree(mr);
@@ -881,12 +1041,12 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
int order;
int err;
- mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx\n",
- start, virt_addr, length);
+ mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
+ start, virt_addr, length, access_flags);
umem = ib_umem_get(pd->uobject->context, start, length, access_flags,
0);
if (IS_ERR(umem)) {
- mlx5_ib_dbg(dev, "umem get failed\n");
+ mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(umem));
return (void *)umem;
}
@@ -907,6 +1067,10 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mlx5_ib_dbg(dev, "cache empty for order %d", order);
mr = NULL;
}
+ } else if (access_flags & IB_ACCESS_ON_DEMAND) {
+ err = -EINVAL;
+ pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB");
+ goto error;
}
if (!mr)
@@ -922,16 +1086,51 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mr->umem = umem;
mr->npages = npages;
- spin_lock(&dev->mr_lock);
- dev->mdev->priv.reg_pages += npages;
- spin_unlock(&dev->mr_lock);
+ atomic_add(npages, &dev->mdev->priv.reg_pages);
mr->ibmr.lkey = mr->mmr.key;
mr->ibmr.rkey = mr->mmr.key;
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ if (umem->odp_data) {
+ /*
+ * This barrier prevents the compiler from moving the
+ * setting of umem->odp_data->private to point to our
+ * MR, before reg_umr finished, to ensure that the MR
+ * initialization have finished before starting to
+ * handle invalidations.
+ */
+ smp_wmb();
+ mr->umem->odp_data->private = mr;
+ /*
+ * Make sure we will see the new
+ * umem->odp_data->private value in the invalidation
+ * routines, before we can get page faults on the
+ * MR. Page faults can happen once we put the MR in
+ * the tree, below this line. Without the barrier,
+ * there can be a fault handling and an invalidation
+ * before umem->odp_data->private == mr is visible to
+ * the invalidation handler.
+ */
+ smp_wmb();
+ }
+#endif
+
return &mr->ibmr;
error:
+ /*
+ * Destroy the umem *before* destroying the MR, to ensure we
+ * will not have any in-flight notifiers when destroying the
+ * MR.
+ *
+ * As the MR is completely invalid to begin with, and this
+ * error path is only taken if we can't push the mr entry into
+ * the pagefault tree, this is safe.
+ */
+
ib_umem_release(umem);
+ /* Kill the MR, and return an error code. */
+ clean_mr(mr);
return ERR_PTR(err);
}
@@ -968,17 +1167,14 @@ error:
return err;
}
-int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
+static int clean_mr(struct mlx5_ib_mr *mr)
{
- struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
- struct mlx5_ib_mr *mr = to_mmr(ibmr);
- struct ib_umem *umem = mr->umem;
- int npages = mr->npages;
+ struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
int umred = mr->umred;
int err;
if (!umred) {
- err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
+ err = destroy_mkey(dev, mr);
if (err) {
mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
mr->mmr.key, err);
@@ -993,15 +1189,47 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
free_cached_mr(dev, mr);
}
- if (umem) {
+ if (!umred)
+ kfree(mr);
+
+ return 0;
+}
+
+int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ int npages = mr->npages;
+ struct ib_umem *umem = mr->umem;
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ if (umem && umem->odp_data) {
+ /* Prevent new page faults from succeeding */
+ mr->live = 0;
+ /* Wait for all running page-fault handlers to finish. */
+ synchronize_srcu(&dev->mr_srcu);
+ /* Destroy all page mappings */
+ mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
+ ib_umem_end(umem));
+ /*
+ * We kill the umem before the MR for ODP,
+ * so that there will not be any invalidations in
+ * flight, looking at the *mr struct.
+ */
ib_umem_release(umem);
- spin_lock(&dev->mr_lock);
- dev->mdev->priv.reg_pages -= npages;
- spin_unlock(&dev->mr_lock);
+ atomic_sub(npages, &dev->mdev->priv.reg_pages);
+
+ /* Avoid double-freeing the umem. */
+ umem = NULL;
}
+#endif
- if (!umred)
- kfree(mr);
+ clean_mr(mr);
+
+ if (umem) {
+ ib_umem_release(umem);
+ atomic_sub(npages, &dev->mdev->priv.reg_pages);
+ }
return 0;
}
@@ -1025,7 +1253,7 @@ struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
goto err_free;
}
- in->seg.status = 1 << 6; /* free */
+ in->seg.status = MLX5_MKEY_STATUS_FREE;
in->seg.xlt_oct_size = cpu_to_be32(ndescs);
in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
@@ -1110,7 +1338,7 @@ int mlx5_ib_destroy_mr(struct ib_mr *ibmr)
kfree(mr->sig);
}
- err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
+ err = destroy_mkey(dev, mr);
if (err) {
mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
mr->mmr.key, err);
@@ -1140,7 +1368,7 @@ struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
goto err_free;
}
- in->seg.status = 1 << 6; /* free */
+ in->seg.status = MLX5_MKEY_STATUS_FREE;
in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2);
in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT;
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
new file mode 100644
index 000000000000..a2c541c4809a
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -0,0 +1,798 @@
+/*
+ * Copyright (c) 2014 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/ib_umem.h>
+#include <rdma/ib_umem_odp.h>
+
+#include "mlx5_ib.h"
+
+#define MAX_PREFETCH_LEN (4*1024*1024U)
+
+/* Timeout in ms to wait for an active mmu notifier to complete when handling
+ * a pagefault. */
+#define MMU_NOTIFIER_TIMEOUT 1000
+
+struct workqueue_struct *mlx5_ib_page_fault_wq;
+
+void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
+ unsigned long end)
+{
+ struct mlx5_ib_mr *mr;
+ const u64 umr_block_mask = (MLX5_UMR_MTT_ALIGNMENT / sizeof(u64)) - 1;
+ u64 idx = 0, blk_start_idx = 0;
+ int in_block = 0;
+ u64 addr;
+
+ if (!umem || !umem->odp_data) {
+ pr_err("invalidation called on NULL umem or non-ODP umem\n");
+ return;
+ }
+
+ mr = umem->odp_data->private;
+
+ if (!mr || !mr->ibmr.pd)
+ return;
+
+ start = max_t(u64, ib_umem_start(umem), start);
+ end = min_t(u64, ib_umem_end(umem), end);
+
+ /*
+ * Iteration one - zap the HW's MTTs. The notifiers_count ensures that
+ * while we are doing the invalidation, no page fault will attempt to
+ * overwrite the same MTTs. Concurent invalidations might race us,
+ * but they will write 0s as well, so no difference in the end result.
+ */
+
+ for (addr = start; addr < end; addr += (u64)umem->page_size) {
+ idx = (addr - ib_umem_start(umem)) / PAGE_SIZE;
+ /*
+ * Strive to write the MTTs in chunks, but avoid overwriting
+ * non-existing MTTs. The huristic here can be improved to
+ * estimate the cost of another UMR vs. the cost of bigger
+ * UMR.
+ */
+ if (umem->odp_data->dma_list[idx] &
+ (ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT)) {
+ if (!in_block) {
+ blk_start_idx = idx;
+ in_block = 1;
+ }
+ } else {
+ u64 umr_offset = idx & umr_block_mask;
+
+ if (in_block && umr_offset == 0) {
+ mlx5_ib_update_mtt(mr, blk_start_idx,
+ idx - blk_start_idx, 1);
+ in_block = 0;
+ }
+ }
+ }
+ if (in_block)
+ mlx5_ib_update_mtt(mr, blk_start_idx, idx - blk_start_idx + 1,
+ 1);
+
+ /*
+ * We are now sure that the device will not access the
+ * memory. We can safely unmap it, and mark it as dirty if
+ * needed.
+ */
+
+ ib_umem_odp_unmap_dma_pages(umem, start, end);
+}
+
+#define COPY_ODP_BIT_MLX_TO_IB(reg, ib_caps, field_name, bit_name) do { \
+ if (be32_to_cpu(reg.field_name) & MLX5_ODP_SUPPORT_##bit_name) \
+ ib_caps->field_name |= IB_ODP_SUPPORT_##bit_name; \
+} while (0)
+
+int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev)
+{
+ int err;
+ struct mlx5_odp_caps hw_caps;
+ struct ib_odp_caps *caps = &dev->odp_caps;
+
+ memset(caps, 0, sizeof(*caps));
+
+ if (!(dev->mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_ON_DMND_PG))
+ return 0;
+
+ err = mlx5_query_odp_caps(dev->mdev, &hw_caps);
+ if (err)
+ goto out;
+
+ caps->general_caps = IB_ODP_SUPPORT;
+ COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.ud_odp_caps,
+ SEND);
+ COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.rc_odp_caps,
+ SEND);
+ COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.rc_odp_caps,
+ RECV);
+ COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.rc_odp_caps,
+ WRITE);
+ COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.rc_odp_caps,
+ READ);
+
+out:
+ return err;
+}
+
+static struct mlx5_ib_mr *mlx5_ib_odp_find_mr_lkey(struct mlx5_ib_dev *dev,
+ u32 key)
+{
+ u32 base_key = mlx5_base_mkey(key);
+ struct mlx5_core_mr *mmr = __mlx5_mr_lookup(dev->mdev, base_key);
+ struct mlx5_ib_mr *mr = container_of(mmr, struct mlx5_ib_mr, mmr);
+
+ if (!mmr || mmr->key != key || !mr->live)
+ return NULL;
+
+ return container_of(mmr, struct mlx5_ib_mr, mmr);
+}
+
+static void mlx5_ib_page_fault_resume(struct mlx5_ib_qp *qp,
+ struct mlx5_ib_pfault *pfault,
+ int error) {
+ struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
+ int ret = mlx5_core_page_fault_resume(dev->mdev, qp->mqp.qpn,
+ pfault->mpfault.flags,
+ error);
+ if (ret)
+ pr_err("Failed to resolve the page fault on QP 0x%x\n",
+ qp->mqp.qpn);
+}
+
+/*
+ * Handle a single data segment in a page-fault WQE.
+ *
+ * Returns number of pages retrieved on success. The caller will continue to
+ * the next data segment.
+ * Can return the following error codes:
+ * -EAGAIN to designate a temporary error. The caller will abort handling the
+ * page fault and resolve it.
+ * -EFAULT when there's an error mapping the requested pages. The caller will
+ * abort the page fault handling and possibly move the QP to an error state.
+ * On other errors the QP should also be closed with an error.
+ */
+static int pagefault_single_data_segment(struct mlx5_ib_qp *qp,
+ struct mlx5_ib_pfault *pfault,
+ u32 key, u64 io_virt, size_t bcnt,
+ u32 *bytes_mapped)
+{
+ struct mlx5_ib_dev *mib_dev = to_mdev(qp->ibqp.pd->device);
+ int srcu_key;
+ unsigned int current_seq;
+ u64 start_idx;
+ int npages = 0, ret = 0;
+ struct mlx5_ib_mr *mr;
+ u64 access_mask = ODP_READ_ALLOWED_BIT;
+
+ srcu_key = srcu_read_lock(&mib_dev->mr_srcu);
+ mr = mlx5_ib_odp_find_mr_lkey(mib_dev, key);
+ /*
+ * If we didn't find the MR, it means the MR was closed while we were
+ * handling the ODP event. In this case we return -EFAULT so that the
+ * QP will be closed.
+ */
+ if (!mr || !mr->ibmr.pd) {
+ pr_err("Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n",
+ key);
+ ret = -EFAULT;
+ goto srcu_unlock;
+ }
+ if (!mr->umem->odp_data) {
+ pr_debug("skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
+ key);
+ if (bytes_mapped)
+ *bytes_mapped +=
+ (bcnt - pfault->mpfault.bytes_committed);
+ goto srcu_unlock;
+ }
+ if (mr->ibmr.pd != qp->ibqp.pd) {
+ pr_err("Page-fault with different PDs for QP and MR.\n");
+ ret = -EFAULT;
+ goto srcu_unlock;
+ }
+
+ current_seq = ACCESS_ONCE(mr->umem->odp_data->notifiers_seq);
+ /*
+ * Ensure the sequence number is valid for some time before we call
+ * gup.
+ */
+ smp_rmb();
+
+ /*
+ * Avoid branches - this code will perform correctly
+ * in all iterations (in iteration 2 and above,
+ * bytes_committed == 0).
+ */
+ io_virt += pfault->mpfault.bytes_committed;
+ bcnt -= pfault->mpfault.bytes_committed;
+
+ start_idx = (io_virt - (mr->mmr.iova & PAGE_MASK)) >> PAGE_SHIFT;
+
+ if (mr->umem->writable)
+ access_mask |= ODP_WRITE_ALLOWED_BIT;
+ npages = ib_umem_odp_map_dma_pages(mr->umem, io_virt, bcnt,
+ access_mask, current_seq);
+ if (npages < 0) {
+ ret = npages;
+ goto srcu_unlock;
+ }
+
+ if (npages > 0) {
+ mutex_lock(&mr->umem->odp_data->umem_mutex);
+ if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) {
+ /*
+ * No need to check whether the MTTs really belong to
+ * this MR, since ib_umem_odp_map_dma_pages already
+ * checks this.
+ */
+ ret = mlx5_ib_update_mtt(mr, start_idx, npages, 0);
+ } else {
+ ret = -EAGAIN;
+ }
+ mutex_unlock(&mr->umem->odp_data->umem_mutex);
+ if (ret < 0) {
+ if (ret != -EAGAIN)
+ pr_err("Failed to update mkey page tables\n");
+ goto srcu_unlock;
+ }
+
+ if (bytes_mapped) {
+ u32 new_mappings = npages * PAGE_SIZE -
+ (io_virt - round_down(io_virt, PAGE_SIZE));
+ *bytes_mapped += min_t(u32, new_mappings, bcnt);
+ }
+ }
+
+srcu_unlock:
+ if (ret == -EAGAIN) {
+ if (!mr->umem->odp_data->dying) {
+ struct ib_umem_odp *odp_data = mr->umem->odp_data;
+ unsigned long timeout =
+ msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT);
+
+ if (!wait_for_completion_timeout(
+ &odp_data->notifier_completion,
+ timeout)) {
+ pr_warn("timeout waiting for mmu notifier completion\n");
+ }
+ } else {
+ /* The MR is being killed, kill the QP as well. */
+ ret = -EFAULT;
+ }
+ }
+ srcu_read_unlock(&mib_dev->mr_srcu, srcu_key);
+ pfault->mpfault.bytes_committed = 0;
+ return ret ? ret : npages;
+}
+
+/**
+ * Parse a series of data segments for page fault handling.
+ *
+ * @qp the QP on which the fault occurred.
+ * @pfault contains page fault information.
+ * @wqe points at the first data segment in the WQE.
+ * @wqe_end points after the end of the WQE.
+ * @bytes_mapped receives the number of bytes that the function was able to
+ * map. This allows the caller to decide intelligently whether
+ * enough memory was mapped to resolve the page fault
+ * successfully (e.g. enough for the next MTU, or the entire
+ * WQE).
+ * @total_wqe_bytes receives the total data size of this WQE in bytes (minus
+ * the committed bytes).
+ *
+ * Returns the number of pages loaded if positive, zero for an empty WQE, or a
+ * negative error code.
+ */
+static int pagefault_data_segments(struct mlx5_ib_qp *qp,
+ struct mlx5_ib_pfault *pfault, void *wqe,
+ void *wqe_end, u32 *bytes_mapped,
+ u32 *total_wqe_bytes, int receive_queue)
+{
+ int ret = 0, npages = 0;
+ u64 io_virt;
+ u32 key;
+ u32 byte_count;
+ size_t bcnt;
+ int inline_segment;
+
+ /* Skip SRQ next-WQE segment. */
+ if (receive_queue && qp->ibqp.srq)
+ wqe += sizeof(struct mlx5_wqe_srq_next_seg);
+
+ if (bytes_mapped)
+ *bytes_mapped = 0;
+ if (total_wqe_bytes)
+ *total_wqe_bytes = 0;
+
+ while (wqe < wqe_end) {
+ struct mlx5_wqe_data_seg *dseg = wqe;
+
+ io_virt = be64_to_cpu(dseg->addr);
+ key = be32_to_cpu(dseg->lkey);
+ byte_count = be32_to_cpu(dseg->byte_count);
+ inline_segment = !!(byte_count & MLX5_INLINE_SEG);
+ bcnt = byte_count & ~MLX5_INLINE_SEG;
+
+ if (inline_segment) {
+ bcnt = bcnt & MLX5_WQE_INLINE_SEG_BYTE_COUNT_MASK;
+ wqe += ALIGN(sizeof(struct mlx5_wqe_inline_seg) + bcnt,
+ 16);
+ } else {
+ wqe += sizeof(*dseg);
+ }
+
+ /* receive WQE end of sg list. */
+ if (receive_queue && bcnt == 0 && key == MLX5_INVALID_LKEY &&
+ io_virt == 0)
+ break;
+
+ if (!inline_segment && total_wqe_bytes) {
+ *total_wqe_bytes += bcnt - min_t(size_t, bcnt,
+ pfault->mpfault.bytes_committed);
+ }
+
+ /* A zero length data segment designates a length of 2GB. */
+ if (bcnt == 0)
+ bcnt = 1U << 31;
+
+ if (inline_segment || bcnt <= pfault->mpfault.bytes_committed) {
+ pfault->mpfault.bytes_committed -=
+ min_t(size_t, bcnt,
+ pfault->mpfault.bytes_committed);
+ continue;
+ }
+
+ ret = pagefault_single_data_segment(qp, pfault, key, io_virt,
+ bcnt, bytes_mapped);
+ if (ret < 0)
+ break;
+ npages += ret;
+ }
+
+ return ret < 0 ? ret : npages;
+}
+
+/*
+ * Parse initiator WQE. Advances the wqe pointer to point at the
+ * scatter-gather list, and set wqe_end to the end of the WQE.
+ */
+static int mlx5_ib_mr_initiator_pfault_handler(
+ struct mlx5_ib_qp *qp, struct mlx5_ib_pfault *pfault,
+ void **wqe, void **wqe_end, int wqe_length)
+{
+ struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
+ struct mlx5_wqe_ctrl_seg *ctrl = *wqe;
+ u16 wqe_index = pfault->mpfault.wqe.wqe_index;
+ unsigned ds, opcode;
+#if defined(DEBUG)
+ u32 ctrl_wqe_index, ctrl_qpn;
+#endif
+
+ ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK;
+ if (ds * MLX5_WQE_DS_UNITS > wqe_length) {
+ mlx5_ib_err(dev, "Unable to read the complete WQE. ds = 0x%x, ret = 0x%x\n",
+ ds, wqe_length);
+ return -EFAULT;
+ }
+
+ if (ds == 0) {
+ mlx5_ib_err(dev, "Got WQE with zero DS. wqe_index=%x, qpn=%x\n",
+ wqe_index, qp->mqp.qpn);
+ return -EFAULT;
+ }
+
+#if defined(DEBUG)
+ ctrl_wqe_index = (be32_to_cpu(ctrl->opmod_idx_opcode) &
+ MLX5_WQE_CTRL_WQE_INDEX_MASK) >>
+ MLX5_WQE_CTRL_WQE_INDEX_SHIFT;
+ if (wqe_index != ctrl_wqe_index) {
+ mlx5_ib_err(dev, "Got WQE with invalid wqe_index. wqe_index=0x%x, qpn=0x%x ctrl->wqe_index=0x%x\n",
+ wqe_index, qp->mqp.qpn,
+ ctrl_wqe_index);
+ return -EFAULT;
+ }
+
+ ctrl_qpn = (be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_QPN_MASK) >>
+ MLX5_WQE_CTRL_QPN_SHIFT;
+ if (qp->mqp.qpn != ctrl_qpn) {
+ mlx5_ib_err(dev, "Got WQE with incorrect QP number. wqe_index=0x%x, qpn=0x%x ctrl->qpn=0x%x\n",
+ wqe_index, qp->mqp.qpn,
+ ctrl_qpn);
+ return -EFAULT;
+ }
+#endif /* DEBUG */
+
+ *wqe_end = *wqe + ds * MLX5_WQE_DS_UNITS;
+ *wqe += sizeof(*ctrl);
+
+ opcode = be32_to_cpu(ctrl->opmod_idx_opcode) &
+ MLX5_WQE_CTRL_OPCODE_MASK;
+ switch (qp->ibqp.qp_type) {
+ case IB_QPT_RC:
+ switch (opcode) {
+ case MLX5_OPCODE_SEND:
+ case MLX5_OPCODE_SEND_IMM:
+ case MLX5_OPCODE_SEND_INVAL:
+ if (!(dev->odp_caps.per_transport_caps.rc_odp_caps &
+ IB_ODP_SUPPORT_SEND))
+ goto invalid_transport_or_opcode;
+ break;
+ case MLX5_OPCODE_RDMA_WRITE:
+ case MLX5_OPCODE_RDMA_WRITE_IMM:
+ if (!(dev->odp_caps.per_transport_caps.rc_odp_caps &
+ IB_ODP_SUPPORT_WRITE))
+ goto invalid_transport_or_opcode;
+ *wqe += sizeof(struct mlx5_wqe_raddr_seg);
+ break;
+ case MLX5_OPCODE_RDMA_READ:
+ if (!(dev->odp_caps.per_transport_caps.rc_odp_caps &
+ IB_ODP_SUPPORT_READ))
+ goto invalid_transport_or_opcode;
+ *wqe += sizeof(struct mlx5_wqe_raddr_seg);
+ break;
+ default:
+ goto invalid_transport_or_opcode;
+ }
+ break;
+ case IB_QPT_UD:
+ switch (opcode) {
+ case MLX5_OPCODE_SEND:
+ case MLX5_OPCODE_SEND_IMM:
+ if (!(dev->odp_caps.per_transport_caps.ud_odp_caps &
+ IB_ODP_SUPPORT_SEND))
+ goto invalid_transport_or_opcode;
+ *wqe += sizeof(struct mlx5_wqe_datagram_seg);
+ break;
+ default:
+ goto invalid_transport_or_opcode;
+ }
+ break;
+ default:
+invalid_transport_or_opcode:
+ mlx5_ib_err(dev, "ODP fault on QP of an unsupported opcode or transport. transport: 0x%x opcode: 0x%x.\n",
+ qp->ibqp.qp_type, opcode);
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+/*
+ * Parse responder WQE. Advances the wqe pointer to point at the
+ * scatter-gather list, and set wqe_end to the end of the WQE.
+ */
+static int mlx5_ib_mr_responder_pfault_handler(
+ struct mlx5_ib_qp *qp, struct mlx5_ib_pfault *pfault,
+ void **wqe, void **wqe_end, int wqe_length)
+{
+ struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
+ struct mlx5_ib_wq *wq = &qp->rq;
+ int wqe_size = 1 << wq->wqe_shift;
+
+ if (qp->ibqp.srq) {
+ mlx5_ib_err(dev, "ODP fault on SRQ is not supported\n");
+ return -EFAULT;
+ }
+
+ if (qp->wq_sig) {
+ mlx5_ib_err(dev, "ODP fault with WQE signatures is not supported\n");
+ return -EFAULT;
+ }
+
+ if (wqe_size > wqe_length) {
+ mlx5_ib_err(dev, "Couldn't read all of the receive WQE's content\n");
+ return -EFAULT;
+ }
+
+ switch (qp->ibqp.qp_type) {
+ case IB_QPT_RC:
+ if (!(dev->odp_caps.per_transport_caps.rc_odp_caps &
+ IB_ODP_SUPPORT_RECV))
+ goto invalid_transport_or_opcode;
+ break;
+ default:
+invalid_transport_or_opcode:
+ mlx5_ib_err(dev, "ODP fault on QP of an unsupported transport. transport: 0x%x\n",
+ qp->ibqp.qp_type);
+ return -EFAULT;
+ }
+
+ *wqe_end = *wqe + wqe_size;
+
+ return 0;
+}
+
+static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp,
+ struct mlx5_ib_pfault *pfault)
+{
+ struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
+ int ret;
+ void *wqe, *wqe_end;
+ u32 bytes_mapped, total_wqe_bytes;
+ char *buffer = NULL;
+ int resume_with_error = 0;
+ u16 wqe_index = pfault->mpfault.wqe.wqe_index;
+ int requestor = pfault->mpfault.flags & MLX5_PFAULT_REQUESTOR;
+
+ buffer = (char *)__get_free_page(GFP_KERNEL);
+ if (!buffer) {
+ mlx5_ib_err(dev, "Error allocating memory for IO page fault handling.\n");
+ resume_with_error = 1;
+ goto resolve_page_fault;
+ }
+
+ ret = mlx5_ib_read_user_wqe(qp, requestor, wqe_index, buffer,
+ PAGE_SIZE);
+ if (ret < 0) {
+ mlx5_ib_err(dev, "Failed reading a WQE following page fault, error=%x, wqe_index=%x, qpn=%x\n",
+ -ret, wqe_index, qp->mqp.qpn);
+ resume_with_error = 1;
+ goto resolve_page_fault;
+ }
+
+ wqe = buffer;
+ if (requestor)
+ ret = mlx5_ib_mr_initiator_pfault_handler(qp, pfault, &wqe,
+ &wqe_end, ret);
+ else
+ ret = mlx5_ib_mr_responder_pfault_handler(qp, pfault, &wqe,
+ &wqe_end, ret);
+ if (ret < 0) {
+ resume_with_error = 1;
+ goto resolve_page_fault;
+ }
+
+ if (wqe >= wqe_end) {
+ mlx5_ib_err(dev, "ODP fault on invalid WQE.\n");
+ resume_with_error = 1;
+ goto resolve_page_fault;
+ }
+
+ ret = pagefault_data_segments(qp, pfault, wqe, wqe_end, &bytes_mapped,
+ &total_wqe_bytes, !requestor);
+ if (ret == -EAGAIN) {
+ goto resolve_page_fault;
+ } else if (ret < 0 || total_wqe_bytes > bytes_mapped) {
+ mlx5_ib_err(dev, "Error getting user pages for page fault. Error: 0x%x\n",
+ -ret);
+ resume_with_error = 1;
+ goto resolve_page_fault;
+ }
+
+resolve_page_fault:
+ mlx5_ib_page_fault_resume(qp, pfault, resume_with_error);
+ mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, flags: 0x%x\n",
+ qp->mqp.qpn, resume_with_error, pfault->mpfault.flags);
+
+ free_page((unsigned long)buffer);
+}
+
+static int pages_in_range(u64 address, u32 length)
+{
+ return (ALIGN(address + length, PAGE_SIZE) -
+ (address & PAGE_MASK)) >> PAGE_SHIFT;
+}
+
+static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_qp *qp,
+ struct mlx5_ib_pfault *pfault)
+{
+ struct mlx5_pagefault *mpfault = &pfault->mpfault;
+ u64 address;
+ u32 length;
+ u32 prefetch_len = mpfault->bytes_committed;
+ int prefetch_activated = 0;
+ u32 rkey = mpfault->rdma.r_key;
+ int ret;
+
+ /* The RDMA responder handler handles the page fault in two parts.
+ * First it brings the necessary pages for the current packet
+ * (and uses the pfault context), and then (after resuming the QP)
+ * prefetches more pages. The second operation cannot use the pfault
+ * context and therefore uses the dummy_pfault context allocated on
+ * the stack */
+ struct mlx5_ib_pfault dummy_pfault = {};
+
+ dummy_pfault.mpfault.bytes_committed = 0;
+
+ mpfault->rdma.rdma_va += mpfault->bytes_committed;
+ mpfault->rdma.rdma_op_len -= min(mpfault->bytes_committed,
+ mpfault->rdma.rdma_op_len);
+ mpfault->bytes_committed = 0;
+
+ address = mpfault->rdma.rdma_va;
+ length = mpfault->rdma.rdma_op_len;
+
+ /* For some operations, the hardware cannot tell the exact message
+ * length, and in those cases it reports zero. Use prefetch
+ * logic. */
+ if (length == 0) {
+ prefetch_activated = 1;
+ length = mpfault->rdma.packet_size;
+ prefetch_len = min(MAX_PREFETCH_LEN, prefetch_len);
+ }
+
+ ret = pagefault_single_data_segment(qp, pfault, rkey, address, length,
+ NULL);
+ if (ret == -EAGAIN) {
+ /* We're racing with an invalidation, don't prefetch */
+ prefetch_activated = 0;
+ } else if (ret < 0 || pages_in_range(address, length) > ret) {
+ mlx5_ib_page_fault_resume(qp, pfault, 1);
+ return;
+ }
+
+ mlx5_ib_page_fault_resume(qp, pfault, 0);
+
+ /* At this point, there might be a new pagefault already arriving in
+ * the eq, switch to the dummy pagefault for the rest of the
+ * processing. We're still OK with the objects being alive as the
+ * work-queue is being fenced. */
+
+ if (prefetch_activated) {
+ ret = pagefault_single_data_segment(qp, &dummy_pfault, rkey,
+ address,
+ prefetch_len,
+ NULL);
+ if (ret < 0) {
+ pr_warn("Prefetch failed (ret = %d, prefetch_activated = %d) for QPN %d, address: 0x%.16llx, length = 0x%.16x\n",
+ ret, prefetch_activated,
+ qp->ibqp.qp_num, address, prefetch_len);
+ }
+ }
+}
+
+void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp,
+ struct mlx5_ib_pfault *pfault)
+{
+ u8 event_subtype = pfault->mpfault.event_subtype;
+
+ switch (event_subtype) {
+ case MLX5_PFAULT_SUBTYPE_WQE:
+ mlx5_ib_mr_wqe_pfault_handler(qp, pfault);
+ break;
+ case MLX5_PFAULT_SUBTYPE_RDMA:
+ mlx5_ib_mr_rdma_pfault_handler(qp, pfault);
+ break;
+ default:
+ pr_warn("Invalid page fault event subtype: 0x%x\n",
+ event_subtype);
+ mlx5_ib_page_fault_resume(qp, pfault, 1);
+ break;
+ }
+}
+
+static void mlx5_ib_qp_pfault_action(struct work_struct *work)
+{
+ struct mlx5_ib_pfault *pfault = container_of(work,
+ struct mlx5_ib_pfault,
+ work);
+ enum mlx5_ib_pagefault_context context =
+ mlx5_ib_get_pagefault_context(&pfault->mpfault);
+ struct mlx5_ib_qp *qp = container_of(pfault, struct mlx5_ib_qp,
+ pagefaults[context]);
+ mlx5_ib_mr_pfault_handler(qp, pfault);
+}
+
+void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->disable_page_faults_lock, flags);
+ qp->disable_page_faults = 1;
+ spin_unlock_irqrestore(&qp->disable_page_faults_lock, flags);
+
+ /*
+ * Note that at this point, we are guarenteed that no more
+ * work queue elements will be posted to the work queue with
+ * the QP we are closing.
+ */
+ flush_workqueue(mlx5_ib_page_fault_wq);
+}
+
+void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->disable_page_faults_lock, flags);
+ qp->disable_page_faults = 0;
+ spin_unlock_irqrestore(&qp->disable_page_faults_lock, flags);
+}
+
+static void mlx5_ib_pfault_handler(struct mlx5_core_qp *qp,
+ struct mlx5_pagefault *pfault)
+{
+ /*
+ * Note that we will only get one fault event per QP per context
+ * (responder/initiator, read/write), until we resolve the page fault
+ * with the mlx5_ib_page_fault_resume command. Since this function is
+ * called from within the work element, there is no risk of missing
+ * events.
+ */
+ struct mlx5_ib_qp *mibqp = to_mibqp(qp);
+ enum mlx5_ib_pagefault_context context =
+ mlx5_ib_get_pagefault_context(pfault);
+ struct mlx5_ib_pfault *qp_pfault = &mibqp->pagefaults[context];
+
+ qp_pfault->mpfault = *pfault;
+
+ /* No need to stop interrupts here since we are in an interrupt */
+ spin_lock(&mibqp->disable_page_faults_lock);
+ if (!mibqp->disable_page_faults)
+ queue_work(mlx5_ib_page_fault_wq, &qp_pfault->work);
+ spin_unlock(&mibqp->disable_page_faults_lock);
+}
+
+void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp)
+{
+ int i;
+
+ qp->disable_page_faults = 1;
+ spin_lock_init(&qp->disable_page_faults_lock);
+
+ qp->mqp.pfault_handler = mlx5_ib_pfault_handler;
+
+ for (i = 0; i < MLX5_IB_PAGEFAULT_CONTEXTS; ++i)
+ INIT_WORK(&qp->pagefaults[i].work, mlx5_ib_qp_pfault_action);
+}
+
+int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev)
+{
+ int ret;
+
+ ret = init_srcu_struct(&ibdev->mr_srcu);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev)
+{
+ cleanup_srcu_struct(&ibdev->mr_srcu);
+}
+
+int __init mlx5_ib_odp_init(void)
+{
+ mlx5_ib_page_fault_wq =
+ create_singlethread_workqueue("mlx5_ib_page_faults");
+ if (!mlx5_ib_page_fault_wq)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void mlx5_ib_odp_cleanup(void)
+{
+ destroy_workqueue(mlx5_ib_page_fault_wq);
+}
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 8c574b63d77b..be0cd358b080 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -70,15 +70,6 @@ static const u32 mlx5_ib_opcode[] = {
[MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR,
};
-struct umr_wr {
- u64 virt_addr;
- struct ib_pd *pd;
- unsigned int page_shift;
- unsigned int npages;
- u32 length;
- int access_flags;
- u32 mkey;
-};
static int is_qp0(enum ib_qp_type qp_type)
{
@@ -110,6 +101,77 @@ void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n)
return get_wqe(qp, qp->sq.offset + (n << MLX5_IB_SQ_STRIDE));
}
+/**
+ * mlx5_ib_read_user_wqe() - Copy a user-space WQE to kernel space.
+ *
+ * @qp: QP to copy from.
+ * @send: copy from the send queue when non-zero, use the receive queue
+ * otherwise.
+ * @wqe_index: index to start copying from. For send work queues, the
+ * wqe_index is in units of MLX5_SEND_WQE_BB.
+ * For receive work queue, it is the number of work queue
+ * element in the queue.
+ * @buffer: destination buffer.
+ * @length: maximum number of bytes to copy.
+ *
+ * Copies at least a single WQE, but may copy more data.
+ *
+ * Return: the number of bytes copied, or an error code.
+ */
+int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
+ void *buffer, u32 length)
+{
+ struct ib_device *ibdev = qp->ibqp.device;
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ struct mlx5_ib_wq *wq = send ? &qp->sq : &qp->rq;
+ size_t offset;
+ size_t wq_end;
+ struct ib_umem *umem = qp->umem;
+ u32 first_copy_length;
+ int wqe_length;
+ int ret;
+
+ if (wq->wqe_cnt == 0) {
+ mlx5_ib_dbg(dev, "mlx5_ib_read_user_wqe for a QP with wqe_cnt == 0. qp_type: 0x%x\n",
+ qp->ibqp.qp_type);
+ return -EINVAL;
+ }
+
+ offset = wq->offset + ((wqe_index % wq->wqe_cnt) << wq->wqe_shift);
+ wq_end = wq->offset + (wq->wqe_cnt << wq->wqe_shift);
+
+ if (send && length < sizeof(struct mlx5_wqe_ctrl_seg))
+ return -EINVAL;
+
+ if (offset > umem->length ||
+ (send && offset + sizeof(struct mlx5_wqe_ctrl_seg) > umem->length))
+ return -EINVAL;
+
+ first_copy_length = min_t(u32, offset + length, wq_end) - offset;
+ ret = ib_umem_copy_from(buffer, umem, offset, first_copy_length);
+ if (ret)
+ return ret;
+
+ if (send) {
+ struct mlx5_wqe_ctrl_seg *ctrl = buffer;
+ int ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK;
+
+ wqe_length = ds * MLX5_WQE_DS_UNITS;
+ } else {
+ wqe_length = 1 << wq->wqe_shift;
+ }
+
+ if (wqe_length <= first_copy_length)
+ return first_copy_length;
+
+ ret = ib_umem_copy_from(buffer + first_copy_length, umem, wq->offset,
+ wqe_length - first_copy_length);
+ if (ret)
+ return ret;
+
+ return wqe_length;
+}
+
static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type)
{
struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
@@ -158,11 +220,13 @@ static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type)
static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
int has_rq, struct mlx5_ib_qp *qp, struct mlx5_ib_create_qp *ucmd)
{
+ struct mlx5_general_caps *gen;
int wqe_size;
int wq_size;
+ gen = &dev->mdev->caps.gen;
/* Sanity check RQ size before proceeding */
- if (cap->max_recv_wr > dev->mdev->caps.max_wqes)
+ if (cap->max_recv_wr > gen->max_wqes)
return -EINVAL;
if (!has_rq) {
@@ -182,10 +246,10 @@ static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
wq_size = roundup_pow_of_two(cap->max_recv_wr) * wqe_size;
wq_size = max_t(int, wq_size, MLX5_SEND_WQE_BB);
qp->rq.wqe_cnt = wq_size / wqe_size;
- if (wqe_size > dev->mdev->caps.max_rq_desc_sz) {
+ if (wqe_size > gen->max_rq_desc_sz) {
mlx5_ib_dbg(dev, "wqe_size %d, max %d\n",
wqe_size,
- dev->mdev->caps.max_rq_desc_sz);
+ gen->max_rq_desc_sz);
return -EINVAL;
}
qp->rq.wqe_shift = ilog2(wqe_size);
@@ -266,9 +330,11 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr)
static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
struct mlx5_ib_qp *qp)
{
+ struct mlx5_general_caps *gen;
int wqe_size;
int wq_size;
+ gen = &dev->mdev->caps.gen;
if (!attr->cap.max_send_wr)
return 0;
@@ -277,9 +343,9 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
if (wqe_size < 0)
return wqe_size;
- if (wqe_size > dev->mdev->caps.max_sq_desc_sz) {
+ if (wqe_size > gen->max_sq_desc_sz) {
mlx5_ib_dbg(dev, "wqe_size(%d) > max_sq_desc_sz(%d)\n",
- wqe_size, dev->mdev->caps.max_sq_desc_sz);
+ wqe_size, gen->max_sq_desc_sz);
return -EINVAL;
}
@@ -292,9 +358,9 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size);
qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB;
- if (qp->sq.wqe_cnt > dev->mdev->caps.max_wqes) {
+ if (qp->sq.wqe_cnt > gen->max_wqes) {
mlx5_ib_dbg(dev, "wqe count(%d) exceeds limits(%d)\n",
- qp->sq.wqe_cnt, dev->mdev->caps.max_wqes);
+ qp->sq.wqe_cnt, gen->max_wqes);
return -ENOMEM;
}
qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
@@ -309,11 +375,13 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev,
struct mlx5_ib_qp *qp,
struct mlx5_ib_create_qp *ucmd)
{
+ struct mlx5_general_caps *gen;
int desc_sz = 1 << qp->sq.wqe_shift;
- if (desc_sz > dev->mdev->caps.max_sq_desc_sz) {
+ gen = &dev->mdev->caps.gen;
+ if (desc_sz > gen->max_sq_desc_sz) {
mlx5_ib_warn(dev, "desc_sz %d, max_sq_desc_sz %d\n",
- desc_sz, dev->mdev->caps.max_sq_desc_sz);
+ desc_sz, gen->max_sq_desc_sz);
return -EINVAL;
}
@@ -325,9 +393,9 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev,
qp->sq.wqe_cnt = ucmd->sq_wqe_count;
- if (qp->sq.wqe_cnt > dev->mdev->caps.max_wqes) {
+ if (qp->sq.wqe_cnt > gen->max_wqes) {
mlx5_ib_warn(dev, "wqe_cnt %d, max_wqes %d\n",
- qp->sq.wqe_cnt, dev->mdev->caps.max_wqes);
+ qp->sq.wqe_cnt, gen->max_wqes);
return -EINVAL;
}
@@ -641,7 +709,7 @@ err_unmap:
mlx5_ib_db_unmap_user(context, &qp->db);
err_free:
- mlx5_vfree(*in);
+ kvfree(*in);
err_umem:
if (qp->umem)
@@ -755,7 +823,7 @@ err_wrid:
kfree(qp->rq.wrid);
err_free:
- mlx5_vfree(*in);
+ kvfree(*in);
err_buf:
mlx5_buf_free(dev->mdev, &qp->buf);
@@ -803,16 +871,20 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
struct mlx5_ib_resources *devr = &dev->devr;
struct mlx5_ib_create_qp_resp resp;
struct mlx5_create_qp_mbox_in *in;
+ struct mlx5_general_caps *gen;
struct mlx5_ib_create_qp ucmd;
int inlen = sizeof(*in);
int err;
+ mlx5_ib_odp_create_qp(qp);
+
+ gen = &dev->mdev->caps.gen;
mutex_init(&qp->mutex);
spin_lock_init(&qp->sq.lock);
spin_lock_init(&qp->rq.lock);
if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
- if (!(dev->mdev->caps.flags & MLX5_DEV_CAP_FLAG_BLOCK_MCAST)) {
+ if (!(gen->flags & MLX5_DEV_CAP_FLAG_BLOCK_MCAST)) {
mlx5_ib_dbg(dev, "block multicast loopback isn't supported\n");
return -EINVAL;
} else {
@@ -851,9 +923,9 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
mlx5_ib_dbg(dev, "invalid rq params\n");
return -EINVAL;
}
- if (ucmd.sq_wqe_count > dev->mdev->caps.max_wqes) {
+ if (ucmd.sq_wqe_count > gen->max_wqes) {
mlx5_ib_dbg(dev, "requested sq_wqe_count (%d) > max allowed (%d)\n",
- ucmd.sq_wqe_count, dev->mdev->caps.max_wqes);
+ ucmd.sq_wqe_count, gen->max_wqes);
return -EINVAL;
}
err = create_user_qp(dev, pd, qp, udata, &in, &resp, &inlen);
@@ -963,7 +1035,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
goto err_create;
}
- mlx5_vfree(in);
+ kvfree(in);
/* Hardware wants QPN written in big-endian order (after
* shifting) for send doorbell. Precompute this value to save
* a little bit when posting sends.
@@ -980,7 +1052,7 @@ err_create:
else if (qp->create_type == MLX5_QP_KERNEL)
destroy_qp_kernel(dev, qp);
- mlx5_vfree(in);
+ kvfree(in);
return err;
}
@@ -1003,9 +1075,14 @@ static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv
}
} else {
spin_lock_irq(&send_cq->lock);
+ __acquire(&recv_cq->lock);
}
} else if (recv_cq) {
spin_lock_irq(&recv_cq->lock);
+ __acquire(&send_cq->lock);
+ } else {
+ __acquire(&send_cq->lock);
+ __acquire(&recv_cq->lock);
}
}
@@ -1025,10 +1102,15 @@ static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *re
spin_unlock_irq(&recv_cq->lock);
}
} else {
+ __release(&recv_cq->lock);
spin_unlock_irq(&send_cq->lock);
}
} else if (recv_cq) {
+ __release(&send_cq->lock);
spin_unlock_irq(&recv_cq->lock);
+ } else {
+ __release(&recv_cq->lock);
+ __release(&send_cq->lock);
}
}
@@ -1080,11 +1162,13 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
in = kzalloc(sizeof(*in), GFP_KERNEL);
if (!in)
return;
- if (qp->state != IB_QPS_RESET)
+ if (qp->state != IB_QPS_RESET) {
+ mlx5_ib_qp_disable_pagefaults(qp);
if (mlx5_core_qp_modify(dev->mdev, to_mlx5_state(qp->state),
MLX5_QP_STATE_RST, in, sizeof(*in), &qp->mqp))
mlx5_ib_warn(dev, "mlx5_ib: modify QP %06x to RESET failed\n",
qp->mqp.qpn);
+ }
get_cqs(qp, &send_cq, &recv_cq);
@@ -1144,6 +1228,7 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata)
{
+ struct mlx5_general_caps *gen;
struct mlx5_ib_dev *dev;
struct mlx5_ib_qp *qp;
u16 xrcdn = 0;
@@ -1161,11 +1246,12 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
}
dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device);
}
+ gen = &dev->mdev->caps.gen;
switch (init_attr->qp_type) {
case IB_QPT_XRC_TGT:
case IB_QPT_XRC_INI:
- if (!(dev->mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC)) {
+ if (!(gen->flags & MLX5_DEV_CAP_FLAG_XRC)) {
mlx5_ib_dbg(dev, "XRC not supported\n");
return ERR_PTR(-ENOSYS);
}
@@ -1272,6 +1358,9 @@ enum {
static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
{
+ struct mlx5_general_caps *gen;
+
+ gen = &dev->mdev->caps.gen;
if (rate == IB_RATE_PORT_CURRENT) {
return 0;
} else if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS) {
@@ -1279,7 +1368,7 @@ static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
} else {
while (rate != IB_RATE_2_5_GBPS &&
!(1 << (rate + MLX5_STAT_RATE_OFFSET) &
- dev->mdev->caps.stat_rate_support))
+ gen->stat_rate_support))
--rate;
}
@@ -1290,8 +1379,10 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
struct mlx5_qp_path *path, u8 port, int attr_mask,
u32 path_flags, const struct ib_qp_attr *attr)
{
+ struct mlx5_general_caps *gen;
int err;
+ gen = &dev->mdev->caps.gen;
path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 : 0;
@@ -1302,6 +1393,11 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
path->rlid = cpu_to_be16(ah->dlid);
if (ah->ah_flags & IB_AH_GRH) {
+ if (ah->grh.sgid_index >= gen->port[port - 1].gid_table_len) {
+ pr_err(KERN_ERR "sgid_index (%u) too large. max is %d\n",
+ ah->grh.sgid_index, gen->port[port - 1].gid_table_len);
+ return -EINVAL;
+ }
path->grh_mlid |= 1 << 7;
path->mgid_index = ah->grh.sgid_index;
path->hop_limit = ah->grh.hop_limit;
@@ -1317,22 +1413,6 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
path->static_rate = err;
path->port = port;
- if (ah->ah_flags & IB_AH_GRH) {
- if (ah->grh.sgid_index >= dev->mdev->caps.port[port - 1].gid_table_len) {
- pr_err(KERN_ERR "sgid_index (%u) too large. max is %d\n",
- ah->grh.sgid_index, dev->mdev->caps.port[port - 1].gid_table_len);
- return -EINVAL;
- }
-
- path->grh_mlid |= 1 << 7;
- path->mgid_index = ah->grh.sgid_index;
- path->hop_limit = ah->grh.hop_limit;
- path->tclass_flowlabel =
- cpu_to_be32((ah->grh.traffic_class << 20) |
- (ah->grh.flow_label));
- memcpy(path->rgid, ah->grh.dgid.raw, 16);
- }
-
if (attr_mask & IB_QP_TIMEOUT)
path->ackto_lt = attr->timeout << 3;
@@ -1492,6 +1572,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
struct mlx5_ib_qp *qp = to_mqp(ibqp);
struct mlx5_ib_cq *send_cq, *recv_cq;
struct mlx5_qp_context *context;
+ struct mlx5_general_caps *gen;
struct mlx5_modify_qp_mbox_in *in;
struct mlx5_ib_pd *pd;
enum mlx5_qp_state mlx5_cur, mlx5_new;
@@ -1500,6 +1581,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
int mlx5_st;
int err;
+ gen = &dev->mdev->caps.gen;
in = kzalloc(sizeof(*in), GFP_KERNEL);
if (!in)
return -ENOMEM;
@@ -1539,7 +1621,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
err = -EINVAL;
goto out;
}
- context->mtu_msgmax = (attr->path_mtu << 5) | dev->mdev->caps.log_max_msg;
+ context->mtu_msgmax = (attr->path_mtu << 5) | gen->log_max_msg;
}
if (attr_mask & IB_QP_DEST_QPN)
@@ -1634,6 +1716,15 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
if (mlx5_st < 0)
goto out;
+ /* If moving to a reset or error state, we must disable page faults on
+ * this QP and flush all current page faults. Otherwise a stale page
+ * fault may attempt to work on this QP after it is reset and moved
+ * again to RTS, and may cause the driver and the device to get out of
+ * sync. */
+ if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR &&
+ (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR))
+ mlx5_ib_qp_disable_pagefaults(qp);
+
optpar = ib_mask_to_mlx5_opt(attr_mask);
optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
in->optparam = cpu_to_be32(optpar);
@@ -1643,6 +1734,9 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
if (err)
goto out;
+ if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+ mlx5_ib_qp_enable_pagefaults(qp);
+
qp->state = new_state;
if (attr_mask & IB_QP_ACCESS_FLAGS)
@@ -1685,9 +1779,11 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
struct mlx5_ib_qp *qp = to_mqp(ibqp);
enum ib_qp_state cur_state, new_state;
+ struct mlx5_general_caps *gen;
int err = -EINVAL;
int port;
+ gen = &dev->mdev->caps.gen;
mutex_lock(&qp->mutex);
cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
@@ -1699,21 +1795,21 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
goto out;
if ((attr_mask & IB_QP_PORT) &&
- (attr->port_num == 0 || attr->port_num > dev->mdev->caps.num_ports))
+ (attr->port_num == 0 || attr->port_num > gen->num_ports))
goto out;
if (attr_mask & IB_QP_PKEY_INDEX) {
port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
- if (attr->pkey_index >= dev->mdev->caps.port[port - 1].pkey_table_len)
+ if (attr->pkey_index >= gen->port[port - 1].pkey_table_len)
goto out;
}
if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
- attr->max_rd_atomic > dev->mdev->caps.max_ra_res_qp)
+ attr->max_rd_atomic > (1 << gen->log_max_ra_res_qp))
goto out;
if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
- attr->max_dest_rd_atomic > dev->mdev->caps.max_ra_req_qp)
+ attr->max_dest_rd_atomic > (1 << gen->log_max_ra_req_qp))
goto out;
if (cur_state == new_state && cur_state == IB_QPS_RESET) {
@@ -1830,37 +1926,70 @@ static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
umr->mkey_mask = frwr_mkey_mask();
}
+static __be64 get_umr_reg_mr_mask(void)
+{
+ u64 result;
+
+ result = MLX5_MKEY_MASK_LEN |
+ MLX5_MKEY_MASK_PAGE_SIZE |
+ MLX5_MKEY_MASK_START_ADDR |
+ MLX5_MKEY_MASK_PD |
+ MLX5_MKEY_MASK_LR |
+ MLX5_MKEY_MASK_LW |
+ MLX5_MKEY_MASK_KEY |
+ MLX5_MKEY_MASK_RR |
+ MLX5_MKEY_MASK_RW |
+ MLX5_MKEY_MASK_A |
+ MLX5_MKEY_MASK_FREE;
+
+ return cpu_to_be64(result);
+}
+
+static __be64 get_umr_unreg_mr_mask(void)
+{
+ u64 result;
+
+ result = MLX5_MKEY_MASK_FREE;
+
+ return cpu_to_be64(result);
+}
+
+static __be64 get_umr_update_mtt_mask(void)
+{
+ u64 result;
+
+ result = MLX5_MKEY_MASK_FREE;
+
+ return cpu_to_be64(result);
+}
+
static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
struct ib_send_wr *wr)
{
- struct umr_wr *umrwr = (struct umr_wr *)&wr->wr.fast_reg;
- u64 mask;
+ struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
memset(umr, 0, sizeof(*umr));
+ if (wr->send_flags & MLX5_IB_SEND_UMR_FAIL_IF_FREE)
+ umr->flags = MLX5_UMR_CHECK_FREE; /* fail if free */
+ else
+ umr->flags = MLX5_UMR_CHECK_NOT_FREE; /* fail if not free */
+
if (!(wr->send_flags & MLX5_IB_SEND_UMR_UNREG)) {
- umr->flags = 1 << 5; /* fail if not free */
umr->klm_octowords = get_klm_octo(umrwr->npages);
- mask = MLX5_MKEY_MASK_LEN |
- MLX5_MKEY_MASK_PAGE_SIZE |
- MLX5_MKEY_MASK_START_ADDR |
- MLX5_MKEY_MASK_PD |
- MLX5_MKEY_MASK_LR |
- MLX5_MKEY_MASK_LW |
- MLX5_MKEY_MASK_KEY |
- MLX5_MKEY_MASK_RR |
- MLX5_MKEY_MASK_RW |
- MLX5_MKEY_MASK_A |
- MLX5_MKEY_MASK_FREE;
- umr->mkey_mask = cpu_to_be64(mask);
+ if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_MTT) {
+ umr->mkey_mask = get_umr_update_mtt_mask();
+ umr->bsf_octowords = get_klm_octo(umrwr->target.offset);
+ umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
+ } else {
+ umr->mkey_mask = get_umr_reg_mr_mask();
+ }
} else {
- umr->flags = 2 << 5; /* fail if free */
- mask = MLX5_MKEY_MASK_FREE;
- umr->mkey_mask = cpu_to_be64(mask);
+ umr->mkey_mask = get_umr_unreg_mr_mask();
}
if (!wr->num_sge)
- umr->flags |= (1 << 7); /* inline */
+ umr->flags |= MLX5_UMR_INLINE;
}
static u8 get_umr_flags(int acc)
@@ -1877,7 +2006,7 @@ static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr,
{
memset(seg, 0, sizeof(*seg));
if (li) {
- seg->status = 1 << 6;
+ seg->status = MLX5_MKEY_STATUS_FREE;
return;
}
@@ -1894,19 +2023,23 @@ static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr,
static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr)
{
+ struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
+
memset(seg, 0, sizeof(*seg));
if (wr->send_flags & MLX5_IB_SEND_UMR_UNREG) {
- seg->status = 1 << 6;
+ seg->status = MLX5_MKEY_STATUS_FREE;
return;
}
- seg->flags = convert_access(wr->wr.fast_reg.access_flags);
- seg->flags_pd = cpu_to_be32(to_mpd((struct ib_pd *)wr->wr.fast_reg.page_list)->pdn);
- seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
- seg->len = cpu_to_be64(wr->wr.fast_reg.length);
- seg->log2_page_size = wr->wr.fast_reg.page_shift;
+ seg->flags = convert_access(umrwr->access_flags);
+ if (!(wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_MTT)) {
+ seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn);
+ seg->start_addr = cpu_to_be64(umrwr->target.virt_addr);
+ }
+ seg->len = cpu_to_be64(umrwr->length);
+ seg->log2_page_size = umrwr->page_shift;
seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 |
- mlx5_mkey_variant(wr->wr.fast_reg.rkey));
+ mlx5_mkey_variant(umrwr->mkey));
}
static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg,
@@ -2020,56 +2153,31 @@ static u8 bs_selector(int block_size)
}
}
-static int format_selector(struct ib_sig_attrs *attr,
- struct ib_sig_domain *domain,
- int *selector)
+static void mlx5_fill_inl_bsf(struct ib_sig_domain *domain,
+ struct mlx5_bsf_inl *inl)
{
+ /* Valid inline section and allow BSF refresh */
+ inl->vld_refresh = cpu_to_be16(MLX5_BSF_INL_VALID |
+ MLX5_BSF_REFRESH_DIF);
+ inl->dif_apptag = cpu_to_be16(domain->sig.dif.app_tag);
+ inl->dif_reftag = cpu_to_be32(domain->sig.dif.ref_tag);
+ /* repeating block */
+ inl->rp_inv_seed = MLX5_BSF_REPEAT_BLOCK;
+ inl->sig_type = domain->sig.dif.bg_type == IB_T10DIF_CRC ?
+ MLX5_DIF_CRC : MLX5_DIF_IPCS;
-#define FORMAT_DIF_NONE 0
-#define FORMAT_DIF_CRC_INC 8
-#define FORMAT_DIF_CRC_NO_INC 12
-#define FORMAT_DIF_CSUM_INC 13
-#define FORMAT_DIF_CSUM_NO_INC 14
+ if (domain->sig.dif.ref_remap)
+ inl->dif_inc_ref_guard_check |= MLX5_BSF_INC_REFTAG;
- switch (domain->sig.dif.type) {
- case IB_T10DIF_NONE:
- /* No DIF */
- *selector = FORMAT_DIF_NONE;
- break;
- case IB_T10DIF_TYPE1: /* Fall through */
- case IB_T10DIF_TYPE2:
- switch (domain->sig.dif.bg_type) {
- case IB_T10DIF_CRC:
- *selector = FORMAT_DIF_CRC_INC;
- break;
- case IB_T10DIF_CSUM:
- *selector = FORMAT_DIF_CSUM_INC;
- break;
- default:
- return 1;
- }
- break;
- case IB_T10DIF_TYPE3:
- switch (domain->sig.dif.bg_type) {
- case IB_T10DIF_CRC:
- *selector = domain->sig.dif.type3_inc_reftag ?
- FORMAT_DIF_CRC_INC :
- FORMAT_DIF_CRC_NO_INC;
- break;
- case IB_T10DIF_CSUM:
- *selector = domain->sig.dif.type3_inc_reftag ?
- FORMAT_DIF_CSUM_INC :
- FORMAT_DIF_CSUM_NO_INC;
- break;
- default:
- return 1;
- }
- break;
- default:
- return 1;
+ if (domain->sig.dif.app_escape) {
+ if (domain->sig.dif.ref_escape)
+ inl->dif_inc_ref_guard_check |= MLX5_BSF_APPREF_ESCAPE;
+ else
+ inl->dif_inc_ref_guard_check |= MLX5_BSF_APPTAG_ESCAPE;
}
- return 0;
+ inl->dif_app_bitmask_check =
+ cpu_to_be16(domain->sig.dif.apptag_check_mask);
}
static int mlx5_set_bsf(struct ib_mr *sig_mr,
@@ -2080,45 +2188,49 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr,
struct mlx5_bsf_basic *basic = &bsf->basic;
struct ib_sig_domain *mem = &sig_attrs->mem;
struct ib_sig_domain *wire = &sig_attrs->wire;
- int ret, selector;
memset(bsf, 0, sizeof(*bsf));
+
+ /* Basic + Extended + Inline */
+ basic->bsf_size_sbs = 1 << 7;
+ /* Input domain check byte mask */
+ basic->check_byte_mask = sig_attrs->check_mask;
+ basic->raw_data_size = cpu_to_be32(data_size);
+
+ /* Memory domain */
switch (sig_attrs->mem.sig_type) {
+ case IB_SIG_TYPE_NONE:
+ break;
case IB_SIG_TYPE_T10_DIF:
- if (sig_attrs->wire.sig_type != IB_SIG_TYPE_T10_DIF)
- return -EINVAL;
+ basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval);
+ basic->m_bfs_psv = cpu_to_be32(msig->psv_memory.psv_idx);
+ mlx5_fill_inl_bsf(mem, &bsf->m_inl);
+ break;
+ default:
+ return -EINVAL;
+ }
- /* Input domain check byte mask */
- basic->check_byte_mask = sig_attrs->check_mask;
+ /* Wire domain */
+ switch (sig_attrs->wire.sig_type) {
+ case IB_SIG_TYPE_NONE:
+ break;
+ case IB_SIG_TYPE_T10_DIF:
if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval &&
- mem->sig.dif.type == wire->sig.dif.type) {
+ mem->sig_type == wire->sig_type) {
/* Same block structure */
- basic->bsf_size_sbs = 1 << 4;
+ basic->bsf_size_sbs |= 1 << 4;
if (mem->sig.dif.bg_type == wire->sig.dif.bg_type)
- basic->wire.copy_byte_mask |= 0xc0;
+ basic->wire.copy_byte_mask |= MLX5_CPY_GRD_MASK;
if (mem->sig.dif.app_tag == wire->sig.dif.app_tag)
- basic->wire.copy_byte_mask |= 0x30;
+ basic->wire.copy_byte_mask |= MLX5_CPY_APP_MASK;
if (mem->sig.dif.ref_tag == wire->sig.dif.ref_tag)
- basic->wire.copy_byte_mask |= 0x0f;
+ basic->wire.copy_byte_mask |= MLX5_CPY_REF_MASK;
} else
basic->wire.bs_selector = bs_selector(wire->sig.dif.pi_interval);
- basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval);
- basic->raw_data_size = cpu_to_be32(data_size);
-
- ret = format_selector(sig_attrs, mem, &selector);
- if (ret)
- return -EINVAL;
- basic->m_bfs_psv = cpu_to_be32(selector << 24 |
- msig->psv_memory.psv_idx);
-
- ret = format_selector(sig_attrs, wire, &selector);
- if (ret)
- return -EINVAL;
- basic->w_bfs_psv = cpu_to_be32(selector << 24 |
- msig->psv_wire.psv_idx);
+ basic->w_bfs_psv = cpu_to_be32(msig->psv_wire.psv_idx);
+ mlx5_fill_inl_bsf(wire, &bsf->w_inl);
break;
-
default:
return -EINVAL;
}
@@ -2317,20 +2429,21 @@ static int set_psv_wr(struct ib_sig_domain *domain,
memset(psv_seg, 0, sizeof(*psv_seg));
psv_seg->psv_num = cpu_to_be32(psv_idx);
switch (domain->sig_type) {
+ case IB_SIG_TYPE_NONE:
+ break;
case IB_SIG_TYPE_T10_DIF:
psv_seg->transient_sig = cpu_to_be32(domain->sig.dif.bg << 16 |
domain->sig.dif.app_tag);
psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag);
-
- *seg += sizeof(*psv_seg);
- *size += sizeof(*psv_seg) / 16;
break;
-
default:
pr_err("Bad signature type given.\n");
return 1;
}
+ *seg += sizeof(*psv_seg);
+ *size += sizeof(*psv_seg) / 16;
+
return 0;
}
@@ -2423,7 +2536,7 @@ static u8 get_fence(u8 fence, struct ib_send_wr *wr)
static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
struct mlx5_wqe_ctrl_seg **ctrl,
- struct ib_send_wr *wr, int *idx,
+ struct ib_send_wr *wr, unsigned *idx,
int *size, int nreq)
{
int err = 0;
@@ -2749,6 +2862,8 @@ out:
if (bf->need_lock)
spin_lock(&bf->lock);
+ else
+ __acquire(&bf->lock);
/* TBD enable WC */
if (0 && nreq == 1 && bf->uuarn && inl && size > 1 && size <= bf->buf_size / 16) {
@@ -2765,6 +2880,8 @@ out:
bf->offset ^= bf->buf_size;
if (bf->need_lock)
spin_unlock(&bf->lock);
+ else
+ __release(&bf->lock);
}
spin_unlock_irqrestore(&qp->sq.lock, flags);
@@ -2893,7 +3010,8 @@ static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_at
memset(ib_ah_attr, 0, sizeof(*ib_ah_attr));
ib_ah_attr->port_num = path->port;
- if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports)
+ if (ib_ah_attr->port_num == 0 ||
+ ib_ah_attr->port_num > dev->caps.gen.num_ports)
return;
ib_ah_attr->sl = path->sl & 0xf;
@@ -2924,6 +3042,14 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
int mlx5_state;
int err = 0;
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ /*
+ * Wait for any outstanding page faults, in case the user frees memory
+ * based upon this query's result.
+ */
+ flush_workqueue(mlx5_ib_page_fault_wq);
+#endif
+
mutex_lock(&qp->mutex);
outb = kzalloc(sizeof(*outb), GFP_KERNEL);
if (!outb) {
@@ -3011,10 +3137,12 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ struct mlx5_general_caps *gen;
struct mlx5_ib_xrcd *xrcd;
int err;
- if (!(dev->mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC))
+ gen = &dev->mdev->caps.gen;
+ if (!(gen->flags & MLX5_DEV_CAP_FLAG_XRC))
return ERR_PTR(-ENOSYS);
xrcd = kmalloc(sizeof(*xrcd), GFP_KERNEL);
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 70bd131ba646..41fec66217dd 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -141,7 +141,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
return 0;
err_in:
- mlx5_vfree(*in);
+ kvfree(*in);
err_umem:
ib_umem_release(srq->umem);
@@ -209,7 +209,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
return 0;
err_in:
- mlx5_vfree(*in);
+ kvfree(*in);
err_buf:
mlx5_buf_free(dev->mdev, &srq->buf);
@@ -238,6 +238,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct mlx5_general_caps *gen;
struct mlx5_ib_srq *srq;
int desc_size;
int buf_size;
@@ -247,11 +248,12 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
int is_xrc;
u32 flgs, xrcdn;
+ gen = &dev->mdev->caps.gen;
/* Sanity check SRQ size before proceeding */
- if (init_attr->attr.max_wr >= dev->mdev->caps.max_srq_wqes) {
+ if (init_attr->attr.max_wr >= gen->max_srq_wqes) {
mlx5_ib_dbg(dev, "max_wr %d, cap %d\n",
init_attr->attr.max_wr,
- dev->mdev->caps.max_srq_wqes);
+ gen->max_srq_wqes);
return ERR_PTR(-EINVAL);
}
@@ -304,7 +306,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
in->ctx.pd = cpu_to_be32(to_mpd(pd)->pdn);
in->ctx.db_record = cpu_to_be64(srq->db.dma);
err = mlx5_core_create_srq(dev->mdev, &srq->msrq, in, inlen);
- mlx5_vfree(in);
+ kvfree(in);
if (err) {
mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err);
goto err_usr_kern_srq;
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index fef067c959fc..c0d0296e7a00 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -2341,9 +2341,9 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
nes_debug(NES_DBG_MR, "User base = 0x%lX, Virt base = 0x%lX, length = %u,"
" offset = %u, page size = %u.\n",
(unsigned long int)start, (unsigned long int)virt, (u32)length,
- region->offset, region->page_size);
+ ib_umem_offset(region), region->page_size);
- skip_pages = ((u32)region->offset) >> 12;
+ skip_pages = ((u32)ib_umem_offset(region)) >> 12;
if (ib_copy_from_udata(&req, udata, sizeof(req))) {
ib_umem_release(region);
@@ -2408,7 +2408,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
region_length -= skip_pages << 12;
for (page_index = skip_pages; page_index < chunk_pages; page_index++) {
skip_pages = 0;
- if ((page_count != 0) && (page_count<<12)-(region->offset&(4096-1)) >= region->length)
+ if ((page_count != 0) && (page_count << 12) - (ib_umem_offset(region) & (4096 - 1)) >= region->length)
goto enough_pages;
if ((page_count&0x01FF) == 0) {
if (page_count >= 1024 * 512) {
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index ac02ce4e8040..f3cc8c9e65ae 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -96,7 +96,6 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
union ib_gid sgid;
- u8 zmac[ETH_ALEN];
if (!(attr->ah_flags & IB_AH_GRH))
return ERR_PTR(-EINVAL);
@@ -118,9 +117,7 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
goto av_conf_err;
}
- memset(&zmac, 0, ETH_ALEN);
- if (pd->uctx &&
- memcmp(attr->dmac, &zmac, ETH_ALEN)) {
+ if (pd->uctx) {
status = rdma_addr_find_dmac_by_grh(&sgid, &attr->grh.dgid,
attr->dmac, &attr->vlan_id);
if (status) {
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index dd35ae558ae1..638bff1ffc6c 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -348,11 +348,6 @@ static void *ocrdma_init_emb_mqe(u8 opcode, u32 cmd_len)
return mqe;
}
-static void *ocrdma_alloc_mqe(void)
-{
- return kzalloc(sizeof(struct ocrdma_mqe), GFP_KERNEL);
-}
-
static void ocrdma_free_q(struct ocrdma_dev *dev, struct ocrdma_queue_info *q)
{
dma_free_coherent(&dev->nic_info.pdev->dev, q->size, q->va, q->dma);
@@ -566,8 +561,8 @@ static int ocrdma_mbx_create_mq(struct ocrdma_dev *dev,
cmd->cqid_pages |= (cq->id << OCRDMA_CREATE_MQ_CQ_ID_SHIFT);
cmd->async_cqid_valid = OCRDMA_CREATE_MQ_ASYNC_CQ_VALID;
- cmd->async_event_bitmap = Bit(OCRDMA_ASYNC_GRP5_EVE_CODE);
- cmd->async_event_bitmap |= Bit(OCRDMA_ASYNC_RDMA_EVE_CODE);
+ cmd->async_event_bitmap = BIT(OCRDMA_ASYNC_GRP5_EVE_CODE);
+ cmd->async_event_bitmap |= BIT(OCRDMA_ASYNC_RDMA_EVE_CODE);
cmd->async_cqid_ringsize = cq->id;
cmd->async_cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) <<
@@ -1189,10 +1184,10 @@ int ocrdma_mbx_rdma_stats(struct ocrdma_dev *dev, bool reset)
{
struct ocrdma_rdma_stats_req *req = dev->stats_mem.va;
struct ocrdma_mqe *mqe = &dev->stats_mem.mqe;
- struct ocrdma_rdma_stats_resp *old_stats = NULL;
+ struct ocrdma_rdma_stats_resp *old_stats;
int status;
- old_stats = kzalloc(sizeof(*old_stats), GFP_KERNEL);
+ old_stats = kmalloc(sizeof(*old_stats), GFP_KERNEL);
if (old_stats == NULL)
return -ENOMEM;
@@ -1235,10 +1230,9 @@ static int ocrdma_mbx_get_ctrl_attribs(struct ocrdma_dev *dev)
struct ocrdma_get_ctrl_attribs_rsp *ctrl_attr_rsp;
struct mgmt_hba_attribs *hba_attribs;
- mqe = ocrdma_alloc_mqe();
+ mqe = kzalloc(sizeof(struct ocrdma_mqe), GFP_KERNEL);
if (!mqe)
return status;
- memset(mqe, 0, sizeof(*mqe));
dma.size = sizeof(struct ocrdma_get_ctrl_attribs_rsp);
dma.va = dma_alloc_coherent(&dev->nic_info.pdev->dev,
@@ -2279,7 +2273,8 @@ mbx_err:
static int ocrdma_set_av_params(struct ocrdma_qp *qp,
struct ocrdma_modify_qp *cmd,
- struct ib_qp_attr *attrs)
+ struct ib_qp_attr *attrs,
+ int attr_mask)
{
int status;
struct ib_ah_attr *ah_attr = &attrs->ah_attr;
@@ -2319,8 +2314,8 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
ocrdma_cpu_to_le32(&cmd->params.dgid[0], sizeof(cmd->params.dgid));
ocrdma_cpu_to_le32(&cmd->params.sgid[0], sizeof(cmd->params.sgid));
cmd->params.vlan_dmac_b4_to_b5 = mac_addr[4] | (mac_addr[5] << 8);
- vlan_id = ah_attr->vlan_id;
- if (vlan_id && (vlan_id < 0x1000)) {
+ if (attr_mask & IB_QP_VID) {
+ vlan_id = attrs->vlan_id;
cmd->params.vlan_dmac_b4_to_b5 |=
vlan_id << OCRDMA_QP_PARAMS_VLAN_SHIFT;
cmd->flags |= OCRDMA_QP_PARA_VLAN_EN_VALID;
@@ -2347,7 +2342,7 @@ static int ocrdma_set_qp_params(struct ocrdma_qp *qp,
cmd->flags |= OCRDMA_QP_PARA_QKEY_VALID;
}
if (attr_mask & IB_QP_AV) {
- status = ocrdma_set_av_params(qp, cmd, attrs);
+ status = ocrdma_set_av_params(qp, cmd, attrs, attr_mask);
if (status)
return status;
} else if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_UD) {
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 256a06bc0b68..b0b2257b8e04 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -388,6 +388,15 @@ static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev)
device_remove_file(&dev->ibdev.dev, ocrdma_attributes[i]);
}
+static void ocrdma_add_default_sgid(struct ocrdma_dev *dev)
+{
+ /* GID Index 0 - Invariant manufacturer-assigned EUI-64 */
+ union ib_gid *sgid = &dev->sgid_tbl[0];
+
+ sgid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
+ ocrdma_get_guid(dev, &sgid->raw[8]);
+}
+
static void ocrdma_init_ipv4_gids(struct ocrdma_dev *dev,
struct net_device *net)
{
@@ -434,6 +443,7 @@ static void ocrdma_init_gid_table(struct ocrdma_dev *dev)
rdma_vlan_dev_real_dev(net_dev) : net_dev;
if (real_dev == dev->nic_info.netdev) {
+ ocrdma_add_default_sgid(dev);
ocrdma_init_ipv4_gids(dev, net_dev);
ocrdma_init_ipv6_gids(dev, net_dev);
}
@@ -646,8 +656,10 @@ static int __init ocrdma_init_module(void)
return 0;
err_be_reg:
+#if IS_ENABLED(CONFIG_IPV6)
ocrdma_unregister_inet6addr_notifier();
err_notifier6:
+#endif
ocrdma_unregister_inetaddr_notifier();
return status;
}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
index 904989ec5eaa..4e036480c1a8 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
@@ -28,8 +28,6 @@
#ifndef __OCRDMA_SLI_H__
#define __OCRDMA_SLI_H__
-#define Bit(_b) (1 << (_b))
-
enum {
OCRDMA_ASIC_GEN_SKH_R = 0x04,
OCRDMA_ASIC_GEN_LANCER = 0x0B
@@ -103,7 +101,7 @@ enum {
QTYPE_MCCQ = 3
};
-#define OCRDMA_MAX_SGID (8)
+#define OCRDMA_MAX_SGID 8
#define OCRDMA_MAX_QP 2048
#define OCRDMA_MAX_CQ 2048
@@ -128,33 +126,33 @@ enum {
#define OCRDMA_DB_CQ_RING_ID_EXT_MASK 0x0C00 /* bits 10-11 of qid at 12-11 */
/* qid #2 msbits at 12-11 */
#define OCRDMA_DB_CQ_RING_ID_EXT_MASK_SHIFT 0x1
-#define OCRDMA_DB_CQ_NUM_POPPED_SHIFT (16) /* bits 16 - 28 */
+#define OCRDMA_DB_CQ_NUM_POPPED_SHIFT 16 /* bits 16 - 28 */
/* Rearm bit */
-#define OCRDMA_DB_CQ_REARM_SHIFT (29) /* bit 29 */
+#define OCRDMA_DB_CQ_REARM_SHIFT 29 /* bit 29 */
/* solicited bit */
-#define OCRDMA_DB_CQ_SOLICIT_SHIFT (31) /* bit 31 */
+#define OCRDMA_DB_CQ_SOLICIT_SHIFT 31 /* bit 31 */
#define OCRDMA_EQ_ID_MASK 0x1FF /* bits 0 - 8 */
#define OCRDMA_EQ_ID_EXT_MASK 0x3e00 /* bits 9-13 */
-#define OCRDMA_EQ_ID_EXT_MASK_SHIFT (2) /* qid bits 9-13 at 11-15 */
+#define OCRDMA_EQ_ID_EXT_MASK_SHIFT 2 /* qid bits 9-13 at 11-15 */
/* Clear the interrupt for this eq */
-#define OCRDMA_EQ_CLR_SHIFT (9) /* bit 9 */
+#define OCRDMA_EQ_CLR_SHIFT 9 /* bit 9 */
/* Must be 1 */
-#define OCRDMA_EQ_TYPE_SHIFT (10) /* bit 10 */
+#define OCRDMA_EQ_TYPE_SHIFT 10 /* bit 10 */
/* Number of event entries processed */
-#define OCRDMA_NUM_EQE_SHIFT (16) /* bits 16 - 28 */
+#define OCRDMA_NUM_EQE_SHIFT 16 /* bits 16 - 28 */
/* Rearm bit */
-#define OCRDMA_REARM_SHIFT (29) /* bit 29 */
+#define OCRDMA_REARM_SHIFT 29 /* bit 29 */
#define OCRDMA_MQ_ID_MASK 0x7FF /* bits 0 - 10 */
/* Number of entries posted */
-#define OCRDMA_MQ_NUM_MQE_SHIFT (16) /* bits 16 - 29 */
+#define OCRDMA_MQ_NUM_MQE_SHIFT 16 /* bits 16 - 29 */
-#define OCRDMA_MIN_HPAGE_SIZE (4096)
+#define OCRDMA_MIN_HPAGE_SIZE 4096
-#define OCRDMA_MIN_Q_PAGE_SIZE (4096)
-#define OCRDMA_MAX_Q_PAGES (8)
+#define OCRDMA_MIN_Q_PAGE_SIZE 4096
+#define OCRDMA_MAX_Q_PAGES 8
#define OCRDMA_SLI_ASIC_ID_OFFSET 0x9C
#define OCRDMA_SLI_ASIC_REV_MASK 0x000000FF
@@ -170,14 +168,14 @@ enum {
# 6: 256K Bytes
# 7: 512K Bytes
*/
-#define OCRDMA_MAX_Q_PAGE_SIZE_CNT (8)
+#define OCRDMA_MAX_Q_PAGE_SIZE_CNT 8
#define OCRDMA_Q_PAGE_BASE_SIZE (OCRDMA_MIN_Q_PAGE_SIZE * OCRDMA_MAX_Q_PAGES)
-#define MAX_OCRDMA_QP_PAGES (8)
+#define MAX_OCRDMA_QP_PAGES 8
#define OCRDMA_MAX_WQE_MEM_SIZE (MAX_OCRDMA_QP_PAGES * OCRDMA_MIN_HQ_PAGE_SIZE)
-#define OCRDMA_CREATE_CQ_MAX_PAGES (4)
-#define OCRDMA_DPP_CQE_SIZE (4)
+#define OCRDMA_CREATE_CQ_MAX_PAGES 4
+#define OCRDMA_DPP_CQE_SIZE 4
#define OCRDMA_GEN2_MAX_CQE 1024
#define OCRDMA_GEN2_CQ_PAGE_SIZE 4096
@@ -238,7 +236,7 @@ struct ocrdma_mqe_sge {
enum {
OCRDMA_MQE_HDR_EMB_SHIFT = 0,
- OCRDMA_MQE_HDR_EMB_MASK = Bit(0),
+ OCRDMA_MQE_HDR_EMB_MASK = BIT(0),
OCRDMA_MQE_HDR_SGE_CNT_SHIFT = 3,
OCRDMA_MQE_HDR_SGE_CNT_MASK = 0x1F << OCRDMA_MQE_HDR_SGE_CNT_SHIFT,
OCRDMA_MQE_HDR_SPECIAL_SHIFT = 24,
@@ -292,7 +290,7 @@ struct ocrdma_pa {
u32 hi;
};
-#define MAX_OCRDMA_EQ_PAGES (8)
+#define MAX_OCRDMA_EQ_PAGES 8
struct ocrdma_create_eq_req {
struct ocrdma_mbx_hdr req;
u32 num_pages;
@@ -304,7 +302,7 @@ struct ocrdma_create_eq_req {
};
enum {
- OCRDMA_CREATE_EQ_VALID = Bit(29),
+ OCRDMA_CREATE_EQ_VALID = BIT(29),
OCRDMA_CREATE_EQ_CNT_SHIFT = 26,
OCRDMA_CREATE_CQ_DELAY_SHIFT = 13,
};
@@ -314,7 +312,7 @@ struct ocrdma_create_eq_rsp {
u32 vector_eqid;
};
-#define OCRDMA_EQ_MINOR_OTHER (0x1)
+#define OCRDMA_EQ_MINOR_OTHER 0x1
enum {
OCRDMA_MCQE_STATUS_SHIFT = 0,
@@ -322,13 +320,13 @@ enum {
OCRDMA_MCQE_ESTATUS_SHIFT = 16,
OCRDMA_MCQE_ESTATUS_MASK = 0xFFFF << OCRDMA_MCQE_ESTATUS_SHIFT,
OCRDMA_MCQE_CONS_SHIFT = 27,
- OCRDMA_MCQE_CONS_MASK = Bit(27),
+ OCRDMA_MCQE_CONS_MASK = BIT(27),
OCRDMA_MCQE_CMPL_SHIFT = 28,
- OCRDMA_MCQE_CMPL_MASK = Bit(28),
+ OCRDMA_MCQE_CMPL_MASK = BIT(28),
OCRDMA_MCQE_AE_SHIFT = 30,
- OCRDMA_MCQE_AE_MASK = Bit(30),
+ OCRDMA_MCQE_AE_MASK = BIT(30),
OCRDMA_MCQE_VALID_SHIFT = 31,
- OCRDMA_MCQE_VALID_MASK = Bit(31)
+ OCRDMA_MCQE_VALID_MASK = BIT(31)
};
struct ocrdma_mcqe {
@@ -339,13 +337,13 @@ struct ocrdma_mcqe {
};
enum {
- OCRDMA_AE_MCQE_QPVALID = Bit(31),
+ OCRDMA_AE_MCQE_QPVALID = BIT(31),
OCRDMA_AE_MCQE_QPID_MASK = 0xFFFF,
- OCRDMA_AE_MCQE_CQVALID = Bit(31),
+ OCRDMA_AE_MCQE_CQVALID = BIT(31),
OCRDMA_AE_MCQE_CQID_MASK = 0xFFFF,
- OCRDMA_AE_MCQE_VALID = Bit(31),
- OCRDMA_AE_MCQE_AE = Bit(30),
+ OCRDMA_AE_MCQE_VALID = BIT(31),
+ OCRDMA_AE_MCQE_AE = BIT(30),
OCRDMA_AE_MCQE_EVENT_TYPE_SHIFT = 16,
OCRDMA_AE_MCQE_EVENT_TYPE_MASK =
0xFF << OCRDMA_AE_MCQE_EVENT_TYPE_SHIFT,
@@ -386,9 +384,9 @@ enum {
OCRDMA_AE_MPA_MCQE_EVENT_TYPE_MASK = 0xFF <<
OCRDMA_AE_MPA_MCQE_EVENT_TYPE_SHIFT,
OCRDMA_AE_MPA_MCQE_EVENT_AE_SHIFT = 30,
- OCRDMA_AE_MPA_MCQE_EVENT_AE_MASK = Bit(30),
+ OCRDMA_AE_MPA_MCQE_EVENT_AE_MASK = BIT(30),
OCRDMA_AE_MPA_MCQE_EVENT_VALID_SHIFT = 31,
- OCRDMA_AE_MPA_MCQE_EVENT_VALID_MASK = Bit(31)
+ OCRDMA_AE_MPA_MCQE_EVENT_VALID_MASK = BIT(31)
};
struct ocrdma_ae_mpa_mcqe {
@@ -412,9 +410,9 @@ enum {
OCRDMA_AE_QP_MCQE_EVENT_TYPE_MASK = 0xFF <<
OCRDMA_AE_QP_MCQE_EVENT_TYPE_SHIFT,
OCRDMA_AE_QP_MCQE_EVENT_AE_SHIFT = 30,
- OCRDMA_AE_QP_MCQE_EVENT_AE_MASK = Bit(30),
+ OCRDMA_AE_QP_MCQE_EVENT_AE_MASK = BIT(30),
OCRDMA_AE_QP_MCQE_EVENT_VALID_SHIFT = 31,
- OCRDMA_AE_QP_MCQE_EVENT_VALID_MASK = Bit(31)
+ OCRDMA_AE_QP_MCQE_EVENT_VALID_MASK = BIT(31)
};
struct ocrdma_ae_qp_mcqe {
@@ -449,9 +447,9 @@ enum OCRDMA_ASYNC_EVENT_TYPE {
/* mailbox command request and responses */
enum {
OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_SHIFT = 2,
- OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_MASK = Bit(2),
+ OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_MASK = BIT(2),
OCRDMA_MBX_QUERY_CFG_SRQ_SUPPORTED_SHIFT = 3,
- OCRDMA_MBX_QUERY_CFG_SRQ_SUPPORTED_MASK = Bit(3),
+ OCRDMA_MBX_QUERY_CFG_SRQ_SUPPORTED_MASK = BIT(3),
OCRDMA_MBX_QUERY_CFG_MAX_QP_SHIFT = 8,
OCRDMA_MBX_QUERY_CFG_MAX_QP_MASK = 0xFFFFFF <<
OCRDMA_MBX_QUERY_CFG_MAX_QP_SHIFT,
@@ -672,9 +670,9 @@ enum {
OCRDMA_CREATE_CQ_PAGE_SIZE_MASK = 0xFF,
OCRDMA_CREATE_CQ_COALESCWM_SHIFT = 12,
- OCRDMA_CREATE_CQ_COALESCWM_MASK = Bit(13) | Bit(12),
- OCRDMA_CREATE_CQ_FLAGS_NODELAY = Bit(14),
- OCRDMA_CREATE_CQ_FLAGS_AUTO_VALID = Bit(15),
+ OCRDMA_CREATE_CQ_COALESCWM_MASK = BIT(13) | BIT(12),
+ OCRDMA_CREATE_CQ_FLAGS_NODELAY = BIT(14),
+ OCRDMA_CREATE_CQ_FLAGS_AUTO_VALID = BIT(15),
OCRDMA_CREATE_CQ_EQ_ID_MASK = 0xFFFF,
OCRDMA_CREATE_CQ_CQE_COUNT_MASK = 0xFFFF
@@ -687,8 +685,8 @@ enum {
OCRDMA_CREATE_CQ_EQID_SHIFT = 22,
OCRDMA_CREATE_CQ_CNT_SHIFT = 27,
- OCRDMA_CREATE_CQ_FLAGS_VALID = Bit(29),
- OCRDMA_CREATE_CQ_FLAGS_EVENTABLE = Bit(31),
+ OCRDMA_CREATE_CQ_FLAGS_VALID = BIT(29),
+ OCRDMA_CREATE_CQ_FLAGS_EVENTABLE = BIT(31),
OCRDMA_CREATE_CQ_DEF_FLAGS = OCRDMA_CREATE_CQ_FLAGS_VALID |
OCRDMA_CREATE_CQ_FLAGS_EVENTABLE |
OCRDMA_CREATE_CQ_FLAGS_NODELAY
@@ -731,8 +729,8 @@ enum {
OCRDMA_CREATE_MQ_V0_CQ_ID_SHIFT = 22,
OCRDMA_CREATE_MQ_CQ_ID_SHIFT = 16,
OCRDMA_CREATE_MQ_RING_SIZE_SHIFT = 16,
- OCRDMA_CREATE_MQ_VALID = Bit(31),
- OCRDMA_CREATE_MQ_ASYNC_CQ_VALID = Bit(0)
+ OCRDMA_CREATE_MQ_VALID = BIT(31),
+ OCRDMA_CREATE_MQ_ASYNC_CQ_VALID = BIT(0)
};
struct ocrdma_create_mq_req {
@@ -783,7 +781,7 @@ enum {
OCRDMA_CREATE_QP_REQ_SQ_PAGE_SIZE_SHIFT = 16,
OCRDMA_CREATE_QP_REQ_RQ_PAGE_SIZE_SHIFT = 19,
OCRDMA_CREATE_QP_REQ_QPT_SHIFT = 29,
- OCRDMA_CREATE_QP_REQ_QPT_MASK = Bit(31) | Bit(30) | Bit(29),
+ OCRDMA_CREATE_QP_REQ_QPT_MASK = BIT(31) | BIT(30) | BIT(29),
OCRDMA_CREATE_QP_REQ_MAX_RQE_SHIFT = 0,
OCRDMA_CREATE_QP_REQ_MAX_RQE_MASK = 0xFFFF,
@@ -798,23 +796,23 @@ enum {
OCRDMA_CREATE_QP_REQ_MAX_SGE_SEND_SHIFT,
OCRDMA_CREATE_QP_REQ_FMR_EN_SHIFT = 0,
- OCRDMA_CREATE_QP_REQ_FMR_EN_MASK = Bit(0),
+ OCRDMA_CREATE_QP_REQ_FMR_EN_MASK = BIT(0),
OCRDMA_CREATE_QP_REQ_ZERO_LKEYEN_SHIFT = 1,
- OCRDMA_CREATE_QP_REQ_ZERO_LKEYEN_MASK = Bit(1),
+ OCRDMA_CREATE_QP_REQ_ZERO_LKEYEN_MASK = BIT(1),
OCRDMA_CREATE_QP_REQ_BIND_MEMWIN_SHIFT = 2,
- OCRDMA_CREATE_QP_REQ_BIND_MEMWIN_MASK = Bit(2),
+ OCRDMA_CREATE_QP_REQ_BIND_MEMWIN_MASK = BIT(2),
OCRDMA_CREATE_QP_REQ_INB_WREN_SHIFT = 3,
- OCRDMA_CREATE_QP_REQ_INB_WREN_MASK = Bit(3),
+ OCRDMA_CREATE_QP_REQ_INB_WREN_MASK = BIT(3),
OCRDMA_CREATE_QP_REQ_INB_RDEN_SHIFT = 4,
- OCRDMA_CREATE_QP_REQ_INB_RDEN_MASK = Bit(4),
+ OCRDMA_CREATE_QP_REQ_INB_RDEN_MASK = BIT(4),
OCRDMA_CREATE_QP_REQ_USE_SRQ_SHIFT = 5,
- OCRDMA_CREATE_QP_REQ_USE_SRQ_MASK = Bit(5),
+ OCRDMA_CREATE_QP_REQ_USE_SRQ_MASK = BIT(5),
OCRDMA_CREATE_QP_REQ_ENABLE_RPIR_SHIFT = 6,
- OCRDMA_CREATE_QP_REQ_ENABLE_RPIR_MASK = Bit(6),
+ OCRDMA_CREATE_QP_REQ_ENABLE_RPIR_MASK = BIT(6),
OCRDMA_CREATE_QP_REQ_ENABLE_DPP_SHIFT = 7,
- OCRDMA_CREATE_QP_REQ_ENABLE_DPP_MASK = Bit(7),
+ OCRDMA_CREATE_QP_REQ_ENABLE_DPP_MASK = BIT(7),
OCRDMA_CREATE_QP_REQ_ENABLE_DPP_CQ_SHIFT = 8,
- OCRDMA_CREATE_QP_REQ_ENABLE_DPP_CQ_MASK = Bit(8),
+ OCRDMA_CREATE_QP_REQ_ENABLE_DPP_CQ_MASK = BIT(8),
OCRDMA_CREATE_QP_REQ_MAX_SGE_RECV_SHIFT = 16,
OCRDMA_CREATE_QP_REQ_MAX_SGE_RECV_MASK = 0xFFFF <<
OCRDMA_CREATE_QP_REQ_MAX_SGE_RECV_SHIFT,
@@ -927,7 +925,7 @@ enum {
OCRDMA_CREATE_QP_RSP_SQ_ID_MASK = 0xFFFF <<
OCRDMA_CREATE_QP_RSP_SQ_ID_SHIFT,
- OCRDMA_CREATE_QP_RSP_DPP_ENABLED_MASK = Bit(0),
+ OCRDMA_CREATE_QP_RSP_DPP_ENABLED_MASK = BIT(0),
OCRDMA_CREATE_QP_RSP_DPP_PAGE_OFFSET_SHIFT = 1,
OCRDMA_CREATE_QP_RSP_DPP_PAGE_OFFSET_MASK = 0x7FFF <<
OCRDMA_CREATE_QP_RSP_DPP_PAGE_OFFSET_SHIFT,
@@ -964,38 +962,38 @@ enum {
OCRDMA_MODIFY_QP_ID_SHIFT = 0,
OCRDMA_MODIFY_QP_ID_MASK = 0xFFFF,
- OCRDMA_QP_PARA_QPS_VALID = Bit(0),
- OCRDMA_QP_PARA_SQD_ASYNC_VALID = Bit(1),
- OCRDMA_QP_PARA_PKEY_VALID = Bit(2),
- OCRDMA_QP_PARA_QKEY_VALID = Bit(3),
- OCRDMA_QP_PARA_PMTU_VALID = Bit(4),
- OCRDMA_QP_PARA_ACK_TO_VALID = Bit(5),
- OCRDMA_QP_PARA_RETRY_CNT_VALID = Bit(6),
- OCRDMA_QP_PARA_RRC_VALID = Bit(7),
- OCRDMA_QP_PARA_RQPSN_VALID = Bit(8),
- OCRDMA_QP_PARA_MAX_IRD_VALID = Bit(9),
- OCRDMA_QP_PARA_MAX_ORD_VALID = Bit(10),
- OCRDMA_QP_PARA_RNT_VALID = Bit(11),
- OCRDMA_QP_PARA_SQPSN_VALID = Bit(12),
- OCRDMA_QP_PARA_DST_QPN_VALID = Bit(13),
- OCRDMA_QP_PARA_MAX_WQE_VALID = Bit(14),
- OCRDMA_QP_PARA_MAX_RQE_VALID = Bit(15),
- OCRDMA_QP_PARA_SGE_SEND_VALID = Bit(16),
- OCRDMA_QP_PARA_SGE_RECV_VALID = Bit(17),
- OCRDMA_QP_PARA_SGE_WR_VALID = Bit(18),
- OCRDMA_QP_PARA_INB_RDEN_VALID = Bit(19),
- OCRDMA_QP_PARA_INB_WREN_VALID = Bit(20),
- OCRDMA_QP_PARA_FLOW_LBL_VALID = Bit(21),
- OCRDMA_QP_PARA_BIND_EN_VALID = Bit(22),
- OCRDMA_QP_PARA_ZLKEY_EN_VALID = Bit(23),
- OCRDMA_QP_PARA_FMR_EN_VALID = Bit(24),
- OCRDMA_QP_PARA_INBAT_EN_VALID = Bit(25),
- OCRDMA_QP_PARA_VLAN_EN_VALID = Bit(26),
-
- OCRDMA_MODIFY_QP_FLAGS_RD = Bit(0),
- OCRDMA_MODIFY_QP_FLAGS_WR = Bit(1),
- OCRDMA_MODIFY_QP_FLAGS_SEND = Bit(2),
- OCRDMA_MODIFY_QP_FLAGS_ATOMIC = Bit(3)
+ OCRDMA_QP_PARA_QPS_VALID = BIT(0),
+ OCRDMA_QP_PARA_SQD_ASYNC_VALID = BIT(1),
+ OCRDMA_QP_PARA_PKEY_VALID = BIT(2),
+ OCRDMA_QP_PARA_QKEY_VALID = BIT(3),
+ OCRDMA_QP_PARA_PMTU_VALID = BIT(4),
+ OCRDMA_QP_PARA_ACK_TO_VALID = BIT(5),
+ OCRDMA_QP_PARA_RETRY_CNT_VALID = BIT(6),
+ OCRDMA_QP_PARA_RRC_VALID = BIT(7),
+ OCRDMA_QP_PARA_RQPSN_VALID = BIT(8),
+ OCRDMA_QP_PARA_MAX_IRD_VALID = BIT(9),
+ OCRDMA_QP_PARA_MAX_ORD_VALID = BIT(10),
+ OCRDMA_QP_PARA_RNT_VALID = BIT(11),
+ OCRDMA_QP_PARA_SQPSN_VALID = BIT(12),
+ OCRDMA_QP_PARA_DST_QPN_VALID = BIT(13),
+ OCRDMA_QP_PARA_MAX_WQE_VALID = BIT(14),
+ OCRDMA_QP_PARA_MAX_RQE_VALID = BIT(15),
+ OCRDMA_QP_PARA_SGE_SEND_VALID = BIT(16),
+ OCRDMA_QP_PARA_SGE_RECV_VALID = BIT(17),
+ OCRDMA_QP_PARA_SGE_WR_VALID = BIT(18),
+ OCRDMA_QP_PARA_INB_RDEN_VALID = BIT(19),
+ OCRDMA_QP_PARA_INB_WREN_VALID = BIT(20),
+ OCRDMA_QP_PARA_FLOW_LBL_VALID = BIT(21),
+ OCRDMA_QP_PARA_BIND_EN_VALID = BIT(22),
+ OCRDMA_QP_PARA_ZLKEY_EN_VALID = BIT(23),
+ OCRDMA_QP_PARA_FMR_EN_VALID = BIT(24),
+ OCRDMA_QP_PARA_INBAT_EN_VALID = BIT(25),
+ OCRDMA_QP_PARA_VLAN_EN_VALID = BIT(26),
+
+ OCRDMA_MODIFY_QP_FLAGS_RD = BIT(0),
+ OCRDMA_MODIFY_QP_FLAGS_WR = BIT(1),
+ OCRDMA_MODIFY_QP_FLAGS_SEND = BIT(2),
+ OCRDMA_MODIFY_QP_FLAGS_ATOMIC = BIT(3)
};
enum {
@@ -1014,15 +1012,15 @@ enum {
OCRDMA_QP_PARAMS_MAX_SGE_SEND_MASK = 0xFFFF <<
OCRDMA_QP_PARAMS_MAX_SGE_SEND_SHIFT,
- OCRDMA_QP_PARAMS_FLAGS_FMR_EN = Bit(0),
- OCRDMA_QP_PARAMS_FLAGS_LKEY_0_EN = Bit(1),
- OCRDMA_QP_PARAMS_FLAGS_BIND_MW_EN = Bit(2),
- OCRDMA_QP_PARAMS_FLAGS_INBWR_EN = Bit(3),
- OCRDMA_QP_PARAMS_FLAGS_INBRD_EN = Bit(4),
+ OCRDMA_QP_PARAMS_FLAGS_FMR_EN = BIT(0),
+ OCRDMA_QP_PARAMS_FLAGS_LKEY_0_EN = BIT(1),
+ OCRDMA_QP_PARAMS_FLAGS_BIND_MW_EN = BIT(2),
+ OCRDMA_QP_PARAMS_FLAGS_INBWR_EN = BIT(3),
+ OCRDMA_QP_PARAMS_FLAGS_INBRD_EN = BIT(4),
OCRDMA_QP_PARAMS_STATE_SHIFT = 5,
- OCRDMA_QP_PARAMS_STATE_MASK = Bit(5) | Bit(6) | Bit(7),
- OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC = Bit(8),
- OCRDMA_QP_PARAMS_FLAGS_INB_ATEN = Bit(9),
+ OCRDMA_QP_PARAMS_STATE_MASK = BIT(5) | BIT(6) | BIT(7),
+ OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC = BIT(8),
+ OCRDMA_QP_PARAMS_FLAGS_INB_ATEN = BIT(9),
OCRDMA_QP_PARAMS_MAX_SGE_RECV_SHIFT = 16,
OCRDMA_QP_PARAMS_MAX_SGE_RECV_MASK = 0xFFFF <<
OCRDMA_QP_PARAMS_MAX_SGE_RECV_SHIFT,
@@ -1277,7 +1275,7 @@ struct ocrdma_alloc_pd {
};
enum {
- OCRDMA_ALLOC_PD_RSP_DPP = Bit(16),
+ OCRDMA_ALLOC_PD_RSP_DPP = BIT(16),
OCRDMA_ALLOC_PD_RSP_DPP_PAGE_SHIFT = 20,
OCRDMA_ALLOC_PD_RSP_PDID_MASK = 0xFFFF,
};
@@ -1309,18 +1307,18 @@ enum {
OCRDMA_ALLOC_LKEY_PD_ID_MASK = 0xFFFF,
OCRDMA_ALLOC_LKEY_ADDR_CHECK_SHIFT = 0,
- OCRDMA_ALLOC_LKEY_ADDR_CHECK_MASK = Bit(0),
+ OCRDMA_ALLOC_LKEY_ADDR_CHECK_MASK = BIT(0),
OCRDMA_ALLOC_LKEY_FMR_SHIFT = 1,
- OCRDMA_ALLOC_LKEY_FMR_MASK = Bit(1),
+ OCRDMA_ALLOC_LKEY_FMR_MASK = BIT(1),
OCRDMA_ALLOC_LKEY_REMOTE_INV_SHIFT = 2,
- OCRDMA_ALLOC_LKEY_REMOTE_INV_MASK = Bit(2),
+ OCRDMA_ALLOC_LKEY_REMOTE_INV_MASK = BIT(2),
OCRDMA_ALLOC_LKEY_REMOTE_WR_SHIFT = 3,
- OCRDMA_ALLOC_LKEY_REMOTE_WR_MASK = Bit(3),
+ OCRDMA_ALLOC_LKEY_REMOTE_WR_MASK = BIT(3),
OCRDMA_ALLOC_LKEY_REMOTE_RD_SHIFT = 4,
- OCRDMA_ALLOC_LKEY_REMOTE_RD_MASK = Bit(4),
+ OCRDMA_ALLOC_LKEY_REMOTE_RD_MASK = BIT(4),
OCRDMA_ALLOC_LKEY_LOCAL_WR_SHIFT = 5,
- OCRDMA_ALLOC_LKEY_LOCAL_WR_MASK = Bit(5),
- OCRDMA_ALLOC_LKEY_REMOTE_ATOMIC_MASK = Bit(6),
+ OCRDMA_ALLOC_LKEY_LOCAL_WR_MASK = BIT(5),
+ OCRDMA_ALLOC_LKEY_REMOTE_ATOMIC_MASK = BIT(6),
OCRDMA_ALLOC_LKEY_REMOTE_ATOMIC_SHIFT = 6,
OCRDMA_ALLOC_LKEY_PBL_SIZE_SHIFT = 16,
OCRDMA_ALLOC_LKEY_PBL_SIZE_MASK = 0xFFFF <<
@@ -1379,21 +1377,21 @@ enum {
OCRDMA_REG_NSMR_HPAGE_SIZE_MASK = 0xFF <<
OCRDMA_REG_NSMR_HPAGE_SIZE_SHIFT,
OCRDMA_REG_NSMR_BIND_MEMWIN_SHIFT = 24,
- OCRDMA_REG_NSMR_BIND_MEMWIN_MASK = Bit(24),
+ OCRDMA_REG_NSMR_BIND_MEMWIN_MASK = BIT(24),
OCRDMA_REG_NSMR_ZB_SHIFT = 25,
- OCRDMA_REG_NSMR_ZB_SHIFT_MASK = Bit(25),
+ OCRDMA_REG_NSMR_ZB_SHIFT_MASK = BIT(25),
OCRDMA_REG_NSMR_REMOTE_INV_SHIFT = 26,
- OCRDMA_REG_NSMR_REMOTE_INV_MASK = Bit(26),
+ OCRDMA_REG_NSMR_REMOTE_INV_MASK = BIT(26),
OCRDMA_REG_NSMR_REMOTE_WR_SHIFT = 27,
- OCRDMA_REG_NSMR_REMOTE_WR_MASK = Bit(27),
+ OCRDMA_REG_NSMR_REMOTE_WR_MASK = BIT(27),
OCRDMA_REG_NSMR_REMOTE_RD_SHIFT = 28,
- OCRDMA_REG_NSMR_REMOTE_RD_MASK = Bit(28),
+ OCRDMA_REG_NSMR_REMOTE_RD_MASK = BIT(28),
OCRDMA_REG_NSMR_LOCAL_WR_SHIFT = 29,
- OCRDMA_REG_NSMR_LOCAL_WR_MASK = Bit(29),
+ OCRDMA_REG_NSMR_LOCAL_WR_MASK = BIT(29),
OCRDMA_REG_NSMR_REMOTE_ATOMIC_SHIFT = 30,
- OCRDMA_REG_NSMR_REMOTE_ATOMIC_MASK = Bit(30),
+ OCRDMA_REG_NSMR_REMOTE_ATOMIC_MASK = BIT(30),
OCRDMA_REG_NSMR_LAST_SHIFT = 31,
- OCRDMA_REG_NSMR_LAST_MASK = Bit(31)
+ OCRDMA_REG_NSMR_LAST_MASK = BIT(31)
};
struct ocrdma_reg_nsmr {
@@ -1420,7 +1418,7 @@ enum {
OCRDMA_REG_NSMR_CONT_NUM_PBL_SHIFT,
OCRDMA_REG_NSMR_CONT_LAST_SHIFT = 31,
- OCRDMA_REG_NSMR_CONT_LAST_MASK = Bit(31)
+ OCRDMA_REG_NSMR_CONT_LAST_MASK = BIT(31)
};
struct ocrdma_reg_nsmr_cont {
@@ -1566,7 +1564,7 @@ struct ocrdma_delete_ah_tbl_rsp {
enum {
OCRDMA_EQE_VALID_SHIFT = 0,
- OCRDMA_EQE_VALID_MASK = Bit(0),
+ OCRDMA_EQE_VALID_MASK = BIT(0),
OCRDMA_EQE_FOR_CQE_MASK = 0xFFFE,
OCRDMA_EQE_RESOURCE_ID_SHIFT = 16,
OCRDMA_EQE_RESOURCE_ID_MASK = 0xFFFF <<
@@ -1624,11 +1622,11 @@ enum {
OCRDMA_CQE_UD_STATUS_MASK = 0x7 << OCRDMA_CQE_UD_STATUS_SHIFT,
OCRDMA_CQE_STATUS_SHIFT = 16,
OCRDMA_CQE_STATUS_MASK = 0xFF << OCRDMA_CQE_STATUS_SHIFT,
- OCRDMA_CQE_VALID = Bit(31),
- OCRDMA_CQE_INVALIDATE = Bit(30),
- OCRDMA_CQE_QTYPE = Bit(29),
- OCRDMA_CQE_IMM = Bit(28),
- OCRDMA_CQE_WRITE_IMM = Bit(27),
+ OCRDMA_CQE_VALID = BIT(31),
+ OCRDMA_CQE_INVALIDATE = BIT(30),
+ OCRDMA_CQE_QTYPE = BIT(29),
+ OCRDMA_CQE_IMM = BIT(28),
+ OCRDMA_CQE_WRITE_IMM = BIT(27),
OCRDMA_CQE_QTYPE_SQ = 0,
OCRDMA_CQE_QTYPE_RQ = 1,
OCRDMA_CQE_SRCQP_MASK = 0xFFFFFF
@@ -1772,8 +1770,8 @@ struct ocrdma_grh {
u16 rsvd;
} __packed;
-#define OCRDMA_AV_VALID Bit(7)
-#define OCRDMA_AV_VLAN_VALID Bit(1)
+#define OCRDMA_AV_VALID BIT(7)
+#define OCRDMA_AV_VLAN_VALID BIT(1)
struct ocrdma_av {
struct ocrdma_eth_vlan eth_hdr;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 8f5f2577f288..fb8d8c4dfbb9 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -388,7 +388,7 @@ struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev,
memset(&resp, 0, sizeof(resp));
resp.ah_tbl_len = ctx->ah_tbl.len;
- resp.ah_tbl_page = ctx->ah_tbl.pa;
+ resp.ah_tbl_page = virt_to_phys(ctx->ah_tbl.va);
status = ocrdma_add_mmap(ctx, resp.ah_tbl_page, resp.ah_tbl_len);
if (status)
@@ -805,7 +805,7 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
goto umem_err;
mr->hwmr.pbe_size = mr->umem->page_size;
- mr->hwmr.fbo = mr->umem->offset;
+ mr->hwmr.fbo = ib_umem_offset(mr->umem);
mr->hwmr.va = usr_addr;
mr->hwmr.len = len;
mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
@@ -870,7 +870,7 @@ static int ocrdma_copy_cq_uresp(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
uresp.page_size = PAGE_ALIGN(cq->len);
uresp.num_pages = 1;
uresp.max_hw_cqe = cq->max_hw_cqe;
- uresp.page_addr[0] = cq->pa;
+ uresp.page_addr[0] = virt_to_phys(cq->va);
uresp.db_page_addr = ocrdma_get_db_addr(dev, uctx->cntxt_pd->id);
uresp.db_page_size = dev->nic_info.db_page_size;
uresp.phase_change = cq->phase_change ? 1 : 0;
@@ -1123,13 +1123,13 @@ static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp,
uresp.sq_dbid = qp->sq.dbid;
uresp.num_sq_pages = 1;
uresp.sq_page_size = PAGE_ALIGN(qp->sq.len);
- uresp.sq_page_addr[0] = qp->sq.pa;
+ uresp.sq_page_addr[0] = virt_to_phys(qp->sq.va);
uresp.num_wqe_allocated = qp->sq.max_cnt;
if (!srq) {
uresp.rq_dbid = qp->rq.dbid;
uresp.num_rq_pages = 1;
uresp.rq_page_size = PAGE_ALIGN(qp->rq.len);
- uresp.rq_page_addr[0] = qp->rq.pa;
+ uresp.rq_page_addr[0] = virt_to_phys(qp->rq.va);
uresp.num_rqe_allocated = qp->rq.max_cnt;
}
uresp.db_page_addr = usr_db;
@@ -1410,6 +1410,8 @@ int ocrdma_query_qp(struct ib_qp *ibqp,
mutex_unlock(&dev->dev_lock);
if (status)
goto mbx_err;
+ if (qp->qp_type == IB_QPT_UD)
+ qp_attr->qkey = params.qkey;
qp_attr->qp_state = get_ibqp_state(IB_QPS_INIT);
qp_attr->cur_qp_state = get_ibqp_state(IB_QPS_INIT);
qp_attr->path_mtu =
@@ -1680,7 +1682,7 @@ static int ocrdma_copy_srq_uresp(struct ocrdma_dev *dev, struct ocrdma_srq *srq,
memset(&uresp, 0, sizeof(uresp));
uresp.rq_dbid = srq->rq.dbid;
uresp.num_rq_pages = 1;
- uresp.rq_page_addr[0] = srq->rq.pa;
+ uresp.rq_page_addr[0] = virt_to_phys(srq->rq.va);
uresp.rq_page_size = srq->rq.len;
uresp.db_page_addr = dev->nic_info.unmapped_db +
(srq->pd->id * dev->nic_info.db_page_size);
@@ -1870,7 +1872,7 @@ static int ocrdma_build_inline_sges(struct ocrdma_qp *qp,
hdr->total_len = ocrdma_sglist_len(wr->sg_list, wr->num_sge);
if (unlikely(hdr->total_len > qp->max_inline_data)) {
pr_err("%s() supported_len=0x%x,\n"
- " unspported len req=0x%x\n", __func__,
+ " unsupported len req=0x%x\n", __func__,
qp->max_inline_data, hdr->total_len);
return -EINVAL;
}
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index cab610ccd50e..81854586c081 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -89,7 +89,6 @@ static int create_file(const char *name, umode_t mode,
{
int error;
- *dentry = NULL;
mutex_lock(&parent->d_inode->i_mutex);
*dentry = lookup_one_len(name, parent, strlen(name));
if (!IS_ERR(*dentry))
diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c
index 9bbb55347cc1..a77fb4fb14e4 100644
--- a/drivers/infiniband/hw/qib/qib_mr.c
+++ b/drivers/infiniband/hw/qib/qib_mr.c
@@ -258,7 +258,7 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mr->mr.user_base = start;
mr->mr.iova = virt_addr;
mr->mr.length = length;
- mr->mr.offset = umem->offset;
+ mr->mr.offset = ib_umem_offset(umem);
mr->mr.access_flags = mr_access_flags;
mr->umem = umem;
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
index f8dfd76be89f..db3588df3546 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
@@ -511,7 +511,7 @@ int usnic_ib_qp_grp_modify(struct usnic_ib_qp_grp *qp_grp,
usnic_ib_qp_grp_state_to_string(old_state),
usnic_ib_qp_grp_state_to_string(new_state));
} else {
- usnic_err("Failed to transistion %u from %s to %s",
+ usnic_err("Failed to transition %u from %s to %s",
qp_grp->grp_id,
usnic_ib_qp_grp_state_to_string(old_state),
usnic_ib_qp_grp_state_to_string(new_state));
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index 801a1d6937e4..417de1f32960 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -507,7 +507,7 @@ int usnic_uiom_attach_dev_to_pd(struct usnic_uiom_pd *pd, struct device *dev)
if (err)
goto out_free_dev;
- if (!iommu_domain_has_cap(pd->domain, IOMMU_CAP_CACHE_COHERENCY)) {
+ if (!iommu_capable(dev->bus, IOMMU_CAP_CACHE_COHERENCY)) {
usnic_err("IOMMU of %s does not support cache coherency\n",
dev_name(dev));
err = -EINVAL;