summaryrefslogtreecommitdiff
path: root/net/smc
diff options
context:
space:
mode:
Diffstat (limited to 'net/smc')
-rw-r--r--net/smc/af_smc.c103
-rw-r--r--net/smc/smc_clc.c1
-rw-r--r--net/smc/smc_core.c56
-rw-r--r--net/smc/smc_core.h21
-rw-r--r--net/smc/smc_diag.c16
-rw-r--r--net/smc/smc_ib.c2
-rw-r--r--net/smc/smc_ib.h7
-rw-r--r--net/smc/smc_ism.c1
-rw-r--r--net/smc/smc_llc.c19
-rw-r--r--net/smc/smc_pnet.c30
-rw-r--r--net/smc/smc_tracepoint.h23
-rw-r--r--net/smc/smc_wr.c15
12 files changed, 221 insertions, 73 deletions
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 1c9289f56dc4..aa3bcaaeabf7 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -89,8 +89,8 @@ int smc_hash_sk(struct sock *sk)
write_lock_bh(&h->lock);
sk_add_node(sk, head);
- sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
write_unlock_bh(&h->lock);
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
return 0;
}
@@ -632,10 +632,12 @@ static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code,
static void smc_conn_abort(struct smc_sock *smc, int local_first)
{
+ struct smc_connection *conn = &smc->conn;
+ struct smc_link_group *lgr = conn->lgr;
+
+ smc_conn_free(conn);
if (local_first)
- smc_lgr_cleanup_early(&smc->conn);
- else
- smc_conn_free(&smc->conn);
+ smc_lgr_cleanup_early(lgr);
}
/* check if there is a rdma device available for this connection. */
@@ -2700,8 +2702,8 @@ static const struct proto_ops smc_sock_ops = {
.splice_read = smc_splice_read,
};
-static int smc_create(struct net *net, struct socket *sock, int protocol,
- int kern)
+static int __smc_create(struct net *net, struct socket *sock, int protocol,
+ int kern, struct socket *clcsock)
{
int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET;
struct smc_sock *smc;
@@ -2726,12 +2728,19 @@ static int smc_create(struct net *net, struct socket *sock, int protocol,
smc = smc_sk(sk);
smc->use_fallback = false; /* assume rdma capability first */
smc->fallback_rsn = 0;
- rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
- &smc->clcsock);
- if (rc) {
- sk_common_release(sk);
- goto out;
+
+ rc = 0;
+ if (!clcsock) {
+ rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
+ &smc->clcsock);
+ if (rc) {
+ sk_common_release(sk);
+ goto out;
+ }
+ } else {
+ smc->clcsock = clcsock;
}
+
smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE);
smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE);
@@ -2739,12 +2748,76 @@ out:
return rc;
}
+static int smc_create(struct net *net, struct socket *sock, int protocol,
+ int kern)
+{
+ return __smc_create(net, sock, protocol, kern, NULL);
+}
+
static const struct net_proto_family smc_sock_family_ops = {
.family = PF_SMC,
.owner = THIS_MODULE,
.create = smc_create,
};
+static int smc_ulp_init(struct sock *sk)
+{
+ struct socket *tcp = sk->sk_socket;
+ struct net *net = sock_net(sk);
+ struct socket *smcsock;
+ int protocol, ret;
+
+ /* only TCP can be replaced */
+ if (tcp->type != SOCK_STREAM || sk->sk_protocol != IPPROTO_TCP ||
+ (sk->sk_family != AF_INET && sk->sk_family != AF_INET6))
+ return -ESOCKTNOSUPPORT;
+ /* don't handle wq now */
+ if (tcp->state != SS_UNCONNECTED || !tcp->file || tcp->wq.fasync_list)
+ return -ENOTCONN;
+
+ if (sk->sk_family == AF_INET)
+ protocol = SMCPROTO_SMC;
+ else
+ protocol = SMCPROTO_SMC6;
+
+ smcsock = sock_alloc();
+ if (!smcsock)
+ return -ENFILE;
+
+ smcsock->type = SOCK_STREAM;
+ __module_get(THIS_MODULE); /* tried in __tcp_ulp_find_autoload */
+ ret = __smc_create(net, smcsock, protocol, 1, tcp);
+ if (ret) {
+ sock_release(smcsock); /* module_put() which ops won't be NULL */
+ return ret;
+ }
+
+ /* replace tcp socket to smc */
+ smcsock->file = tcp->file;
+ smcsock->file->private_data = smcsock;
+ smcsock->file->f_inode = SOCK_INODE(smcsock); /* replace inode when sock_close */
+ smcsock->file->f_path.dentry->d_inode = SOCK_INODE(smcsock); /* dput() in __fput */
+ tcp->file = NULL;
+
+ return ret;
+}
+
+static void smc_ulp_clone(const struct request_sock *req, struct sock *newsk,
+ const gfp_t priority)
+{
+ struct inet_connection_sock *icsk = inet_csk(newsk);
+
+ /* don't inherit ulp ops to child when listen */
+ icsk->icsk_ulp_ops = NULL;
+}
+
+static struct tcp_ulp_ops smc_ulp_ops __read_mostly = {
+ .name = "smc",
+ .owner = THIS_MODULE,
+ .init = smc_ulp_init,
+ .clone = smc_ulp_clone,
+};
+
unsigned int smc_net_id;
static __net_init int smc_net_init(struct net *net)
@@ -2855,6 +2928,12 @@ static int __init smc_init(void)
goto out_sock;
}
+ rc = tcp_register_ulp(&smc_ulp_ops);
+ if (rc) {
+ pr_err("%s: tcp_ulp_register fails with %d\n", __func__, rc);
+ goto out_sock;
+ }
+
static_branch_enable(&tcp_have_smc);
return 0;
@@ -2883,6 +2962,7 @@ out_pernet_subsys:
static void __exit smc_exit(void)
{
static_branch_disable(&tcp_have_smc);
+ tcp_unregister_ulp(&smc_ulp_ops);
sock_unregister(PF_SMC);
smc_core_exit();
smc_ib_unregister_client();
@@ -2905,3 +2985,4 @@ MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>");
MODULE_DESCRIPTION("smc socket address family");
MODULE_LICENSE("GPL");
MODULE_ALIAS_NETPROTO(PF_SMC);
+MODULE_ALIAS_TCP_ULP("smc");
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 8409ab71a5e4..6be95a2a7b25 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -1021,7 +1021,6 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
struct smc_link *link = conn->lnk;
/* SMC-R specific settings */
- link = conn->lnk;
memcpy(clc->hdr.eyecatcher, SMC_EYECATCHER,
sizeof(SMC_EYECATCHER));
clc->hdr.typev1 = SMC_TYPE_R;
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index a6849362f4dd..8935ef4811b0 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -171,8 +171,10 @@ static int smc_lgr_register_conn(struct smc_connection *conn, bool first)
if (!conn->lgr->is_smcd) {
rc = smcr_lgr_conn_assign_link(conn, first);
- if (rc)
+ if (rc) {
+ conn->lgr = NULL;
return rc;
+ }
}
/* find a new alert_token_local value not yet used by some connection
* in this link group
@@ -348,6 +350,9 @@ static int smc_nl_fill_lgr(struct smc_link_group *lgr,
goto errattr;
if (nla_put_u8(skb, SMC_NLA_LGR_R_VLAN_ID, lgr->vlan_id))
goto errattr;
+ if (nla_put_u64_64bit(skb, SMC_NLA_LGR_R_NET_COOKIE,
+ lgr->net->net_cookie, SMC_NLA_LGR_R_PAD))
+ goto errattr;
memcpy(smc_target, lgr->pnet_id, SMC_MAX_PNETID_LEN);
smc_target[SMC_MAX_PNETID_LEN] = 0;
if (nla_put_string(skb, SMC_NLA_LGR_R_PNETID, smc_target))
@@ -622,15 +627,13 @@ int smcd_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
-void smc_lgr_cleanup_early(struct smc_connection *conn)
+void smc_lgr_cleanup_early(struct smc_link_group *lgr)
{
- struct smc_link_group *lgr = conn->lgr;
spinlock_t *lgr_lock;
if (!lgr)
return;
- smc_conn_free(conn);
smc_lgr_list_head(lgr, &lgr_lock);
spin_lock_bh(lgr_lock);
/* do not use this link group for new connections */
@@ -897,6 +900,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
smc_wr_free_lgr_mem(lgr);
goto free_wq;
}
+ lgr->net = smc_ib_net(lnk->smcibdev);
lgr_list = &smc_lgr_list.list;
lgr_lock = &smc_lgr_list.lock;
atomic_inc(&lgr_cnt);
@@ -1101,18 +1105,24 @@ static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc,
smc_buf_free(lgr, true, rmb_desc);
} else {
rmb_desc->used = 0;
+ memset(rmb_desc->cpu_addr, 0, rmb_desc->len);
}
}
static void smc_buf_unuse(struct smc_connection *conn,
struct smc_link_group *lgr)
{
- if (conn->sndbuf_desc)
+ if (conn->sndbuf_desc) {
conn->sndbuf_desc->used = 0;
- if (conn->rmb_desc && lgr->is_smcd)
+ memset(conn->sndbuf_desc->cpu_addr, 0, conn->sndbuf_desc->len);
+ }
+ if (conn->rmb_desc && lgr->is_smcd) {
conn->rmb_desc->used = 0;
- else if (conn->rmb_desc)
+ memset(conn->rmb_desc->cpu_addr, 0, conn->rmb_desc->len +
+ sizeof(struct smcd_cdc_msg));
+ } else if (conn->rmb_desc) {
smcr_buf_unuse(conn->rmb_desc, lgr);
+ }
}
/* remove a finished connection from its link group */
@@ -1542,9 +1552,9 @@ void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type)
lgr_type = "ASYMMETRIC_LOCAL";
break;
}
- pr_warn_ratelimited("smc: SMC-R lg %*phN state changed: "
+ pr_warn_ratelimited("smc: SMC-R lg %*phN net %llu state changed: "
"%s, pnetid %.16s\n", SMC_LGR_ID_SIZE, &lgr->id,
- lgr_type, lgr->pnet_id);
+ lgr->net->net_cookie, lgr_type, lgr->pnet_id);
}
/* set new lgr type and tag a link as asymmetric */
@@ -1579,7 +1589,8 @@ void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
SMC_MAX_PNETID_LEN) ||
lgr->type == SMC_LGR_SYMMETRIC ||
- lgr->type == SMC_LGR_ASYMMETRIC_PEER)
+ lgr->type == SMC_LGR_ASYMMETRIC_PEER ||
+ !rdma_dev_access_netns(smcibdev->ibdev, lgr->net))
continue;
/* trigger local add link processing */
@@ -1737,8 +1748,10 @@ static bool smcr_lgr_match(struct smc_link_group *lgr, u8 smcr_version,
u8 peer_systemid[],
u8 peer_gid[],
u8 peer_mac_v1[],
- enum smc_lgr_role role, u32 clcqpn)
+ enum smc_lgr_role role, u32 clcqpn,
+ struct net *net)
{
+ struct smc_link *lnk;
int i;
if (memcmp(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN) ||
@@ -1746,12 +1759,17 @@ static bool smcr_lgr_match(struct smc_link_group *lgr, u8 smcr_version,
return false;
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
- if (!smc_link_active(&lgr->lnk[i]))
+ lnk = &lgr->lnk[i];
+
+ if (!smc_link_active(lnk))
continue;
- if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) &&
- !memcmp(lgr->lnk[i].peer_gid, peer_gid, SMC_GID_SIZE) &&
+ /* use verbs API to check netns, instead of lgr->net */
+ if (!rdma_dev_access_netns(lnk->smcibdev->ibdev, net))
+ return false;
+ if ((lgr->role == SMC_SERV || lnk->peer_qpn == clcqpn) &&
+ !memcmp(lnk->peer_gid, peer_gid, SMC_GID_SIZE) &&
(smcr_version == SMC_V2 ||
- !memcmp(lgr->lnk[i].peer_mac, peer_mac_v1, ETH_ALEN)))
+ !memcmp(lnk->peer_mac, peer_mac_v1, ETH_ALEN)))
return true;
}
return false;
@@ -1767,6 +1785,7 @@ static bool smcd_lgr_match(struct smc_link_group *lgr,
int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
{
struct smc_connection *conn = &smc->conn;
+ struct net *net = sock_net(&smc->sk);
struct list_head *lgr_list;
struct smc_link_group *lgr;
enum smc_lgr_role role;
@@ -1793,7 +1812,7 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
smcr_lgr_match(lgr, ini->smcr_version,
ini->peer_systemid,
ini->peer_gid, ini->peer_mac, role,
- ini->ib_clcqpn)) &&
+ ini->ib_clcqpn, net)) &&
!lgr->sync_err &&
(ini->smcd_version == SMC_V2 ||
lgr->vlan_id == ini->vlan_id) &&
@@ -1832,8 +1851,10 @@ create:
write_lock_bh(&lgr->conns_lock);
rc = smc_lgr_register_conn(conn, true);
write_unlock_bh(&lgr->conns_lock);
- if (rc)
+ if (rc) {
+ smc_lgr_cleanup_early(lgr);
goto out;
+ }
}
conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
@@ -2163,7 +2184,6 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
if (buf_desc) {
SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize);
SMC_STAT_BUF_REUSE(smc, is_smcd, is_rmb);
- memset(buf_desc->cpu_addr, 0, bufsize);
break; /* found reusable slot */
}
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index d63b08274197..521c64a3d8d3 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -306,6 +306,8 @@ struct smc_link_group {
u8 nexthop_mac[ETH_ALEN];
u8 uses_gateway;
__be32 saddr;
+ /* net namespace */
+ struct net *net;
};
struct { /* SMC-D */
u64 peer_gid;
@@ -407,7 +409,13 @@ static inline struct smc_connection *smc_lgr_find_conn(
return res;
}
-/* returns true if the specified link is usable */
+/*
+ * Returns true if the specified link is usable.
+ *
+ * usable means the link is ready to receive RDMA messages, map memory
+ * on the link, etc. This doesn't ensure we are able to send RDMA messages
+ * on this link, if sending RDMA messages is needed, use smc_link_sendable()
+ */
static inline bool smc_link_usable(struct smc_link *lnk)
{
if (lnk->state == SMC_LNK_UNUSED || lnk->state == SMC_LNK_INACTIVE)
@@ -415,6 +423,15 @@ static inline bool smc_link_usable(struct smc_link *lnk)
return true;
}
+/*
+ * Returns true if the specified link is ready to receive AND send RDMA
+ * messages.
+ *
+ * For the client side in first contact, the underlying QP may still in
+ * RESET or RTR when the link state is ACTIVATING, checks in smc_link_usable()
+ * is not strong enough. For those places that need to send any CDC or LLC
+ * messages, use smc_link_sendable(), otherwise, use smc_link_usable() instead
+ */
static inline bool smc_link_sendable(struct smc_link *lnk)
{
return smc_link_usable(lnk) &&
@@ -468,7 +485,7 @@ static inline void smc_set_pci_values(struct pci_dev *pci_dev,
struct smc_sock;
struct smc_clc_msg_accept_confirm;
-void smc_lgr_cleanup_early(struct smc_connection *conn);
+void smc_lgr_cleanup_early(struct smc_link_group *lgr);
void smc_lgr_terminate_sched(struct smc_link_group *lgr);
void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport);
void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport);
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index c952986a6aca..7c8dad28c18d 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -145,19 +145,21 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
if (smc->conn.lgr && !smc->conn.lgr->is_smcd &&
(req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) &&
!list_empty(&smc->conn.lgr->list)) {
+ struct smc_link *link = smc->conn.lnk;
+ struct net *net = read_pnet(&link->smcibdev->ibdev->coredev.rdma_net);
+
struct smc_diag_lgrinfo linfo = {
.role = smc->conn.lgr->role,
- .lnk[0].ibport = smc->conn.lnk->ibport,
- .lnk[0].link_id = smc->conn.lnk->link_id,
+ .lnk[0].ibport = link->ibport,
+ .lnk[0].link_id = link->link_id,
+ .lnk[0].net_cookie = net->net_cookie,
};
memcpy(linfo.lnk[0].ibname,
smc->conn.lgr->lnk[0].smcibdev->ibdev->name,
- sizeof(smc->conn.lnk->smcibdev->ibdev->name));
- smc_gid_be16_convert(linfo.lnk[0].gid,
- smc->conn.lnk->gid);
- smc_gid_be16_convert(linfo.lnk[0].peer_gid,
- smc->conn.lnk->peer_gid);
+ sizeof(link->smcibdev->ibdev->name));
+ smc_gid_be16_convert(linfo.lnk[0].gid, link->gid);
+ smc_gid_be16_convert(linfo.lnk[0].peer_gid, link->peer_gid);
if (nla_put(skb, SMC_DIAG_LGRINFO, sizeof(linfo), &linfo) < 0)
goto errout;
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index fe5d5399c4e8..a3e2d3b89568 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -12,6 +12,8 @@
* Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
*/
+#include <linux/etherdevice.h>
+#include <linux/if_vlan.h>
#include <linux/random.h>
#include <linux/workqueue.h>
#include <linux/scatterlist.h>
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index bfa1c6bf6313..5d8b49c57f50 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -69,6 +69,13 @@ static inline __be32 smc_ib_gid_to_ipv4(u8 gid[SMC_GID_SIZE])
return cpu_to_be32(INADDR_NONE);
}
+static inline struct net *smc_ib_net(struct smc_ib_device *smcibdev)
+{
+ if (smcibdev && smcibdev->ibdev)
+ return read_pnet(&smcibdev->ibdev->coredev.rdma_net);
+ return NULL;
+}
+
struct smc_init_info_smcrv2;
struct smc_buf_desc;
struct smc_link;
diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index fd28cc498b98..a2084ecdb97e 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -6,6 +6,7 @@
* Copyright IBM Corp. 2018
*/
+#include <linux/if_vlan.h>
#include <linux/spinlock.h>
#include <linux/mutex.h>
#include <linux/slab.h>
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 3e9fd8a3124c..c4d057b2941d 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -242,9 +242,10 @@ static void smc_llc_flow_parallel(struct smc_link_group *lgr, u8 flow_type,
}
/* drop parallel or already-in-progress llc requests */
if (flow_type != msg_type)
- pr_warn_once("smc: SMC-R lg %*phN dropped parallel "
+ pr_warn_once("smc: SMC-R lg %*phN net %llu dropped parallel "
"LLC msg: msg %d flow %d role %d\n",
SMC_LGR_ID_SIZE, &lgr->id,
+ lgr->net->net_cookie,
qentry->msg.raw.hdr.common.type,
flow_type, lgr->role);
kfree(qentry);
@@ -359,9 +360,10 @@ struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr,
smc_llc_flow_qentry_clr(flow));
return NULL;
}
- pr_warn_once("smc: SMC-R lg %*phN dropped unexpected LLC msg: "
+ pr_warn_once("smc: SMC-R lg %*phN net %llu dropped unexpected LLC msg: "
"msg %d exp %d flow %d role %d flags %x\n",
- SMC_LGR_ID_SIZE, &lgr->id, rcv_msg, exp_msg,
+ SMC_LGR_ID_SIZE, &lgr->id, lgr->net->net_cookie,
+ rcv_msg, exp_msg,
flow->type, lgr->role,
flow->qentry->msg.raw.hdr.flags);
smc_llc_flow_qentry_del(flow);
@@ -1816,8 +1818,9 @@ finish:
static void smc_llc_protocol_violation(struct smc_link_group *lgr, u8 type)
{
- pr_warn_ratelimited("smc: SMC-R lg %*phN LLC protocol violation: "
- "llc_type %d\n", SMC_LGR_ID_SIZE, &lgr->id, type);
+ pr_warn_ratelimited("smc: SMC-R lg %*phN net %llu LLC protocol violation: "
+ "llc_type %d\n", SMC_LGR_ID_SIZE, &lgr->id,
+ lgr->net->net_cookie, type);
smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_PROT_VIOL);
smc_lgr_terminate_sched(lgr);
}
@@ -2146,9 +2149,10 @@ int smc_llc_link_init(struct smc_link *link)
void smc_llc_link_active(struct smc_link *link)
{
- pr_warn_ratelimited("smc: SMC-R lg %*phN link added: id %*phN, "
+ pr_warn_ratelimited("smc: SMC-R lg %*phN net %llu link added: id %*phN, "
"peerid %*phN, ibdev %s, ibport %d\n",
SMC_LGR_ID_SIZE, &link->lgr->id,
+ link->lgr->net->net_cookie,
SMC_LGR_ID_SIZE, &link->link_uid,
SMC_LGR_ID_SIZE, &link->peer_link_uid,
link->smcibdev->ibdev->name, link->ibport);
@@ -2164,9 +2168,10 @@ void smc_llc_link_active(struct smc_link *link)
void smc_llc_link_clear(struct smc_link *link, bool log)
{
if (log)
- pr_warn_ratelimited("smc: SMC-R lg %*phN link removed: id %*phN"
+ pr_warn_ratelimited("smc: SMC-R lg %*phN net %llu link removed: id %*phN"
", peerid %*phN, ibdev %s, ibport %d\n",
SMC_LGR_ID_SIZE, &link->lgr->id,
+ link->lgr->net->net_cookie,
SMC_LGR_ID_SIZE, &link->link_uid,
SMC_LGR_ID_SIZE, &link->peer_link_uid,
link->smcibdev->ibdev->name, link->ibport);
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 67e9d9fde085..db9825c01e0a 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -64,6 +64,7 @@ struct smc_pnetentry {
struct {
char eth_name[IFNAMSIZ + 1];
struct net_device *ndev;
+ netdevice_tracker dev_tracker;
};
struct {
char ib_name[IB_DEVICE_NAME_MAX + 1];
@@ -119,7 +120,7 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
smc_pnet_match(pnetelem->pnet_name, pnet_name)) {
list_del(&pnetelem->list);
if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev) {
- dev_put(pnetelem->ndev);
+ dev_put_track(pnetelem->ndev, &pnetelem->dev_tracker);
pr_warn_ratelimited("smc: net device %s "
"erased user defined "
"pnetid %.16s\n",
@@ -195,7 +196,7 @@ static int smc_pnet_add_by_ndev(struct net_device *ndev)
list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) {
if (pnetelem->type == SMC_PNET_ETH && !pnetelem->ndev &&
!strncmp(pnetelem->eth_name, ndev->name, IFNAMSIZ)) {
- dev_hold(ndev);
+ dev_hold_track(ndev, &pnetelem->dev_tracker, GFP_ATOMIC);
pnetelem->ndev = ndev;
rc = 0;
pr_warn_ratelimited("smc: adding net device %s with "
@@ -226,7 +227,7 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev)
write_lock(&pnettable->lock);
list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) {
if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev == ndev) {
- dev_put(pnetelem->ndev);
+ dev_put_track(pnetelem->ndev, &pnetelem->dev_tracker);
pnetelem->ndev = NULL;
rc = 0;
pr_warn_ratelimited("smc: removing net device %s with "
@@ -368,7 +369,7 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net,
memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN);
strncpy(new_pe->eth_name, eth_name, IFNAMSIZ);
new_pe->ndev = ndev;
-
+ netdev_tracker_alloc(ndev, &new_pe->dev_tracker, GFP_KERNEL);
rc = -EEXIST;
new_netdev = true;
write_lock(&pnettable->lock);
@@ -976,14 +977,16 @@ static int smc_pnet_determine_gid(struct smc_ib_device *ibdev, int i,
/* find a roce device for the given pnetid */
static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id,
struct smc_init_info *ini,
- struct smc_ib_device *known_dev)
+ struct smc_ib_device *known_dev,
+ struct net *net)
{
struct smc_ib_device *ibdev;
int i;
mutex_lock(&smc_ib_devices.mutex);
list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
- if (ibdev == known_dev)
+ if (ibdev == known_dev ||
+ !rdma_dev_access_netns(ibdev->ibdev, net))
continue;
for (i = 1; i <= SMC_MAX_PORTS; i++) {
if (!rdma_is_port_valid(ibdev->ibdev, i))
@@ -1000,12 +1003,14 @@ out:
mutex_unlock(&smc_ib_devices.mutex);
}
-/* find alternate roce device with same pnet_id and vlan_id */
+/* find alternate roce device with same pnet_id, vlan_id and net namespace */
void smc_pnet_find_alt_roce(struct smc_link_group *lgr,
struct smc_init_info *ini,
struct smc_ib_device *known_dev)
{
- _smc_pnet_find_roce_by_pnetid(lgr->pnet_id, ini, known_dev);
+ struct net *net = lgr->net;
+
+ _smc_pnet_find_roce_by_pnetid(lgr->pnet_id, ini, known_dev, net);
}
/* if handshake network device belongs to a roce device, return its
@@ -1014,6 +1019,7 @@ void smc_pnet_find_alt_roce(struct smc_link_group *lgr,
static void smc_pnet_find_rdma_dev(struct net_device *netdev,
struct smc_init_info *ini)
{
+ struct net *net = dev_net(netdev);
struct smc_ib_device *ibdev;
mutex_lock(&smc_ib_devices.mutex);
@@ -1021,6 +1027,10 @@ static void smc_pnet_find_rdma_dev(struct net_device *netdev,
struct net_device *ndev;
int i;
+ /* check rdma net namespace */
+ if (!rdma_dev_access_netns(ibdev->ibdev, net))
+ continue;
+
for (i = 1; i <= SMC_MAX_PORTS; i++) {
if (!rdma_is_port_valid(ibdev->ibdev, i))
continue;
@@ -1051,15 +1061,17 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
struct smc_init_info *ini)
{
u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
+ struct net *net;
ndev = pnet_find_base_ndev(ndev);
+ net = dev_net(ndev);
if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
ndev_pnetid) &&
smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) {
smc_pnet_find_rdma_dev(ndev, ini);
return; /* pnetid could not be determined */
}
- _smc_pnet_find_roce_by_pnetid(ndev_pnetid, ini, NULL);
+ _smc_pnet_find_roce_by_pnetid(ndev_pnetid, ini, NULL, net);
}
static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
diff --git a/net/smc/smc_tracepoint.h b/net/smc/smc_tracepoint.h
index ec17f29646f5..9fc5e586d24a 100644
--- a/net/smc/smc_tracepoint.h
+++ b/net/smc/smc_tracepoint.h
@@ -22,6 +22,7 @@ TRACE_EVENT(smc_switch_to_fallback,
TP_STRUCT__entry(
__field(const void *, sk)
__field(const void *, clcsk)
+ __field(u64, net_cookie)
__field(int, fallback_rsn)
),
@@ -31,11 +32,13 @@ TRACE_EVENT(smc_switch_to_fallback,
__entry->sk = sk;
__entry->clcsk = clcsk;
+ __entry->net_cookie = sock_net(sk)->net_cookie;
__entry->fallback_rsn = fallback_rsn;
),
- TP_printk("sk=%p clcsk=%p fallback_rsn=%d",
- __entry->sk, __entry->clcsk, __entry->fallback_rsn)
+ TP_printk("sk=%p clcsk=%p net=%llu fallback_rsn=%d",
+ __entry->sk, __entry->clcsk,
+ __entry->net_cookie, __entry->fallback_rsn)
);
DECLARE_EVENT_CLASS(smc_msg_event,
@@ -46,19 +49,23 @@ DECLARE_EVENT_CLASS(smc_msg_event,
TP_STRUCT__entry(
__field(const void *, smc)
+ __field(u64, net_cookie)
__field(size_t, len)
__string(name, smc->conn.lnk->ibname)
),
TP_fast_assign(
+ const struct sock *sk = &smc->sk;
+
__entry->smc = smc;
+ __entry->net_cookie = sock_net(sk)->net_cookie;
__entry->len = len;
__assign_str(name, smc->conn.lnk->ibname);
),
- TP_printk("smc=%p len=%zu dev=%s",
- __entry->smc, __entry->len,
- __get_str(name))
+ TP_printk("smc=%p net=%llu len=%zu dev=%s",
+ __entry->smc, __entry->net_cookie,
+ __entry->len, __get_str(name))
);
DEFINE_EVENT(smc_msg_event, smc_tx_sendmsg,
@@ -84,6 +91,7 @@ TRACE_EVENT(smcr_link_down,
TP_STRUCT__entry(
__field(const void *, lnk)
__field(const void *, lgr)
+ __field(u64, net_cookie)
__field(int, state)
__string(name, lnk->ibname)
__field(void *, location)
@@ -94,13 +102,14 @@ TRACE_EVENT(smcr_link_down,
__entry->lnk = lnk;
__entry->lgr = lgr;
+ __entry->net_cookie = lgr->net->net_cookie;
__entry->state = lnk->state;
__assign_str(name, lnk->ibname);
__entry->location = location;
),
- TP_printk("lnk=%p lgr=%p state=%d dev=%s location=%pS",
- __entry->lnk, __entry->lgr,
+ TP_printk("lnk=%p lgr=%p net=%llu state=%d dev=%s location=%pS",
+ __entry->lnk, __entry->lgr, __entry->net_cookie,
__entry->state, __get_str(name),
__entry->location)
);
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index c6cfdea8b71b..24be1d03fef9 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -54,11 +54,7 @@ struct smc_wr_tx_pend { /* control data for a pending send request */
/* returns true if at least one tx work request is pending on the given link */
static inline bool smc_wr_is_tx_pend(struct smc_link *link)
{
- if (find_first_bit(link->wr_tx_mask, link->wr_tx_cnt) !=
- link->wr_tx_cnt) {
- return true;
- }
- return false;
+ return !bitmap_empty(link->wr_tx_mask, link->wr_tx_cnt);
}
/* wait till all pending tx work requests on the given link are completed */
@@ -696,7 +692,7 @@ void smc_wr_free_link_mem(struct smc_link *lnk)
lnk->wr_tx_compl = NULL;
kfree(lnk->wr_tx_pends);
lnk->wr_tx_pends = NULL;
- kfree(lnk->wr_tx_mask);
+ bitmap_free(lnk->wr_tx_mask);
lnk->wr_tx_mask = NULL;
kfree(lnk->wr_tx_sges);
lnk->wr_tx_sges = NULL;
@@ -772,9 +768,7 @@ int smc_wr_alloc_link_mem(struct smc_link *link)
GFP_KERNEL);
if (!link->wr_rx_sges)
goto no_mem_wr_tx_sges;
- link->wr_tx_mask = kcalloc(BITS_TO_LONGS(SMC_WR_BUF_CNT),
- sizeof(*link->wr_tx_mask),
- GFP_KERNEL);
+ link->wr_tx_mask = bitmap_zalloc(SMC_WR_BUF_CNT, GFP_KERNEL);
if (!link->wr_tx_mask)
goto no_mem_wr_rx_sges;
link->wr_tx_pends = kcalloc(SMC_WR_BUF_CNT,
@@ -887,8 +881,7 @@ int smc_wr_create_link(struct smc_link *lnk)
goto dma_unmap;
}
smc_wr_init_sge(lnk);
- memset(lnk->wr_tx_mask, 0,
- BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*lnk->wr_tx_mask));
+ bitmap_zero(lnk->wr_tx_mask, SMC_WR_BUF_CNT);
init_waitqueue_head(&lnk->wr_tx_wait);
atomic_set(&lnk->wr_tx_refcnt, 0);
init_waitqueue_head(&lnk->wr_reg_wait);