summaryrefslogtreecommitdiff
path: root/net/smc
diff options
context:
space:
mode:
Diffstat (limited to 'net/smc')
-rw-r--r--net/smc/Kconfig1
-rw-r--r--net/smc/Makefile1
-rw-r--r--net/smc/af_smc.c541
-rw-r--r--net/smc/smc.h12
-rw-r--r--net/smc/smc_cdc.c7
-rw-r--r--net/smc/smc_clc.c23
-rw-r--r--net/smc/smc_clc.h20
-rw-r--r--net/smc/smc_close.c120
-rw-r--r--net/smc/smc_close.h3
-rw-r--r--net/smc/smc_core.c547
-rw-r--r--net/smc/smc_core.h41
-rw-r--r--net/smc/smc_diag.c1
-rw-r--r--net/smc/smc_ib.c40
-rw-r--r--net/smc/smc_ib.h4
-rw-r--r--net/smc/smc_ism.c32
-rw-r--r--net/smc/smc_llc.c11
-rw-r--r--net/smc/smc_pnet.c68
-rw-r--r--net/smc/smc_pnet.h7
-rw-r--r--net/smc/smc_rx.c37
-rw-r--r--net/smc/smc_tx.c34
-rw-r--r--net/smc/smc_wr.c45
-rw-r--r--net/smc/smc_wr.h10
22 files changed, 1036 insertions, 569 deletions
diff --git a/net/smc/Kconfig b/net/smc/Kconfig
index c717ef0896aa..f54a70b8da82 100644
--- a/net/smc/Kconfig
+++ b/net/smc/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
config SMC
tristate "SMC socket protocol family"
depends on INET && INFINIBAND
diff --git a/net/smc/Makefile b/net/smc/Makefile
index 4df96b4b8130..cb1254541f37 100644
--- a/net/smc/Makefile
+++ b/net/smc/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_SMC) += smc.o
obj-$(CONFIG_SMC_DIAG) += smc_diag.o
smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 77ef53596d18..b997072c72e5 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
@@ -24,6 +25,7 @@
#include <linux/in.h>
#include <linux/sched/signal.h>
#include <linux/if_vlan.h>
+#include <linux/rcupdate_wait.h>
#include <net/sock.h>
#include <net/tcp.h>
@@ -122,31 +124,16 @@ struct proto smc_proto6 = {
};
EXPORT_SYMBOL_GPL(smc_proto6);
-static int smc_release(struct socket *sock)
+static void smc_restore_fallback_changes(struct smc_sock *smc)
{
- struct sock *sk = sock->sk;
- struct smc_sock *smc;
- int rc = 0;
-
- if (!sk)
- goto out;
-
- smc = smc_sk(sk);
-
- /* cleanup for a dangling non-blocking connect */
- if (smc->connect_info && sk->sk_state == SMC_INIT)
- tcp_abort(smc->clcsock->sk, ECONNABORTED);
- flush_work(&smc->connect_work);
- kfree(smc->connect_info);
- smc->connect_info = NULL;
+ smc->clcsock->file->private_data = smc->sk.sk_socket;
+ smc->clcsock->file = NULL;
+}
- if (sk->sk_state == SMC_LISTEN)
- /* smc_close_non_accepted() is called and acquires
- * sock lock for child sockets again
- */
- lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
- else
- lock_sock(sk);
+static int __smc_release(struct smc_sock *smc)
+{
+ struct sock *sk = &smc->sk;
+ int rc = 0;
if (!smc->use_fallback) {
rc = smc_close_active(smc);
@@ -161,26 +148,57 @@ static int smc_release(struct socket *sock)
}
sk->sk_state = SMC_CLOSED;
sk->sk_state_change(sk);
+ smc_restore_fallback_changes(smc);
}
sk->sk_prot->unhash(sk);
if (sk->sk_state == SMC_CLOSED) {
if (smc->clcsock) {
- mutex_lock(&smc->clcsock_release_lock);
- sock_release(smc->clcsock);
- smc->clcsock = NULL;
- mutex_unlock(&smc->clcsock_release_lock);
+ release_sock(sk);
+ smc_clcsock_release(smc);
+ lock_sock(sk);
}
if (!smc->use_fallback)
smc_conn_free(&smc->conn);
}
+ return rc;
+}
+
+static int smc_release(struct socket *sock)
+{
+ struct sock *sk = sock->sk;
+ struct smc_sock *smc;
+ int rc = 0;
+
+ if (!sk)
+ goto out;
+
+ sock_hold(sk); /* sock_put below */
+ smc = smc_sk(sk);
+
+ /* cleanup for a dangling non-blocking connect */
+ if (smc->connect_nonblock && sk->sk_state == SMC_INIT)
+ tcp_abort(smc->clcsock->sk, ECONNABORTED);
+ flush_work(&smc->connect_work);
+
+ if (sk->sk_state == SMC_LISTEN)
+ /* smc_close_non_accepted() is called and acquires
+ * sock lock for child sockets again
+ */
+ lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
+ else
+ lock_sock(sk);
+
+ rc = __smc_release(smc);
+
/* detach socket */
sock_orphan(sk);
sock->sk = NULL;
release_sock(sk);
+ sock_put(sk); /* sock_hold above */
sock_put(sk); /* final sock_put */
out:
return rc;
@@ -255,7 +273,7 @@ static int smc_bind(struct socket *sock, struct sockaddr *uaddr,
/* Check if socket is already active */
rc = -EINVAL;
- if (sk->sk_state != SMC_INIT)
+ if (sk->sk_state != SMC_INIT || smc->connect_nonblock)
goto out_rel;
smc->clcsock->sk->sk_reuse = sk->sk_reuse;
@@ -446,12 +464,22 @@ static void smc_link_save_peer_info(struct smc_link *link,
link->peer_mtu = clc->qp_mtu;
}
+static void smc_switch_to_fallback(struct smc_sock *smc)
+{
+ smc->use_fallback = true;
+ if (smc->sk.sk_socket && smc->sk.sk_socket->file) {
+ smc->clcsock->file = smc->sk.sk_socket->file;
+ smc->clcsock->file->private_data = smc->clcsock;
+ }
+}
+
/* fall back during connect */
static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
{
- smc->use_fallback = true;
+ smc_switch_to_fallback(smc);
smc->fallback_rsn = reason_code;
smc_copy_sock_settings_to_clc(smc);
+ smc->connect_nonblock = 0;
if (smc->sk.sk_state == SMC_INIT)
smc->sk.sk_state = SMC_ACTIVE;
return 0;
@@ -491,46 +519,41 @@ static int smc_connect_abort(struct smc_sock *smc, int reason_code,
mutex_unlock(&smc_client_lgr_pending);
smc_conn_free(&smc->conn);
+ smc->connect_nonblock = 0;
return reason_code;
}
/* check if there is a rdma device available for this connection. */
/* called for connect and listen */
-static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev,
- u8 *ibport, unsigned short vlan_id, u8 gid[])
+static int smc_find_rdma_device(struct smc_sock *smc, struct smc_init_info *ini)
{
- int reason_code = 0;
-
/* PNET table look up: search active ib_device and port
* within same PNETID that also contains the ethernet device
* used for the internal TCP socket
*/
- smc_pnet_find_roce_resource(smc->clcsock->sk, ibdev, ibport, vlan_id,
- gid);
- if (!(*ibdev))
- reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
-
- return reason_code;
+ smc_pnet_find_roce_resource(smc->clcsock->sk, ini);
+ if (!ini->ib_dev)
+ return SMC_CLC_DECL_NOSMCRDEV;
+ return 0;
}
/* check if there is an ISM device available for this connection. */
/* called for connect and listen */
-static int smc_check_ism(struct smc_sock *smc, struct smcd_dev **ismdev)
+static int smc_find_ism_device(struct smc_sock *smc, struct smc_init_info *ini)
{
/* Find ISM device with same PNETID as connecting interface */
- smc_pnet_find_ism_resource(smc->clcsock->sk, ismdev);
- if (!(*ismdev))
- return SMC_CLC_DECL_CNFERR; /* configuration error */
+ smc_pnet_find_ism_resource(smc->clcsock->sk, ini);
+ if (!ini->ism_dev)
+ return SMC_CLC_DECL_NOSMCDDEV;
return 0;
}
/* Check for VLAN ID and register it on ISM device just for CLC handshake */
static int smc_connect_ism_vlan_setup(struct smc_sock *smc,
- struct smcd_dev *ismdev,
- unsigned short vlan_id)
+ struct smc_init_info *ini)
{
- if (vlan_id && smc_ism_get_vlan(ismdev, vlan_id))
- return SMC_CLC_DECL_CNFERR;
+ if (ini->vlan_id && smc_ism_get_vlan(ini->ism_dev, ini->vlan_id))
+ return SMC_CLC_DECL_ISMVLANERR;
return 0;
}
@@ -538,12 +561,11 @@ static int smc_connect_ism_vlan_setup(struct smc_sock *smc,
* used, the VLAN ID will be registered again during the connection setup.
*/
static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd,
- struct smcd_dev *ismdev,
- unsigned short vlan_id)
+ struct smc_init_info *ini)
{
if (!is_smcd)
return 0;
- if (vlan_id && smc_ism_put_vlan(ismdev, vlan_id))
+ if (ini->vlan_id && smc_ism_put_vlan(ini->ism_dev, ini->vlan_id))
return SMC_CLC_DECL_CNFERR;
return 0;
}
@@ -551,13 +573,12 @@ static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd,
/* CLC handshake during connect */
static int smc_connect_clc(struct smc_sock *smc, int smc_type,
struct smc_clc_msg_accept_confirm *aclc,
- struct smc_ib_device *ibdev, u8 ibport,
- u8 gid[], struct smcd_dev *ismdev)
+ struct smc_init_info *ini)
{
int rc = 0;
/* do inband token exchange */
- rc = smc_clc_send_proposal(smc, smc_type, ibdev, ibport, gid, ismdev);
+ rc = smc_clc_send_proposal(smc, smc_type, ini);
if (rc)
return rc;
/* receive SMC Accept CLC message */
@@ -568,23 +589,19 @@ static int smc_connect_clc(struct smc_sock *smc, int smc_type,
/* setup for RDMA connection of client */
static int smc_connect_rdma(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *aclc,
- struct smc_ib_device *ibdev, u8 ibport)
+ struct smc_init_info *ini)
{
- int local_contact = SMC_FIRST_CONTACT;
struct smc_link *link;
int reason_code = 0;
+ ini->is_smcd = false;
+ ini->ib_lcl = &aclc->lcl;
+ ini->ib_clcqpn = ntoh24(aclc->qpn);
+ ini->srv_first_contact = aclc->hdr.flag;
+
mutex_lock(&smc_client_lgr_pending);
- local_contact = smc_conn_create(smc, false, aclc->hdr.flag, ibdev,
- ibport, ntoh24(aclc->qpn), &aclc->lcl,
- NULL, 0);
- if (local_contact < 0) {
- if (local_contact == -ENOMEM)
- reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
- else if (local_contact == -ENOLINK)
- reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */
- else
- reason_code = SMC_CLC_DECL_INTERR; /* other error */
+ reason_code = smc_conn_create(smc, ini);
+ if (reason_code) {
mutex_unlock(&smc_client_lgr_pending);
return reason_code;
}
@@ -594,45 +611,48 @@ static int smc_connect_rdma(struct smc_sock *smc,
/* create send buffer and rmb */
if (smc_buf_create(smc, false))
- return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact);
+ return smc_connect_abort(smc, SMC_CLC_DECL_MEM,
+ ini->cln_first_contact);
- if (local_contact == SMC_FIRST_CONTACT)
+ if (ini->cln_first_contact == SMC_FIRST_CONTACT)
smc_link_save_peer_info(link, aclc);
if (smc_rmb_rtoken_handling(&smc->conn, aclc))
return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RTOK,
- local_contact);
+ ini->cln_first_contact);
smc_close_init(smc);
smc_rx_init(smc);
- if (local_contact == SMC_FIRST_CONTACT) {
+ if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
if (smc_ib_ready_link(link))
return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK,
- local_contact);
+ ini->cln_first_contact);
} else {
if (smc_reg_rmb(link, smc->conn.rmb_desc, true))
return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB,
- local_contact);
+ ini->cln_first_contact);
}
smc_rmb_sync_sg_for_device(&smc->conn);
reason_code = smc_clc_send_confirm(smc);
if (reason_code)
- return smc_connect_abort(smc, reason_code, local_contact);
+ return smc_connect_abort(smc, reason_code,
+ ini->cln_first_contact);
smc_tx_init(smc);
- if (local_contact == SMC_FIRST_CONTACT) {
+ if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
/* QP confirmation over RoCE fabric */
reason_code = smc_clnt_conf_first_link(smc);
if (reason_code)
return smc_connect_abort(smc, reason_code,
- local_contact);
+ ini->cln_first_contact);
}
mutex_unlock(&smc_client_lgr_pending);
smc_copy_sock_settings_to_clc(smc);
+ smc->connect_nonblock = 0;
if (smc->sk.sk_state == SMC_INIT)
smc->sk.sk_state = SMC_ACTIVE;
@@ -642,23 +662,26 @@ static int smc_connect_rdma(struct smc_sock *smc,
/* setup for ISM connection of client */
static int smc_connect_ism(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *aclc,
- struct smcd_dev *ismdev)
+ struct smc_init_info *ini)
{
- int local_contact = SMC_FIRST_CONTACT;
int rc = 0;
+ ini->is_smcd = true;
+ ini->ism_gid = aclc->gid;
+ ini->srv_first_contact = aclc->hdr.flag;
+
/* there is only one lgr role for SMC-D; use server lock */
mutex_lock(&smc_server_lgr_pending);
- local_contact = smc_conn_create(smc, true, aclc->hdr.flag, NULL, 0, 0,
- NULL, ismdev, aclc->gid);
- if (local_contact < 0) {
+ rc = smc_conn_create(smc, ini);
+ if (rc) {
mutex_unlock(&smc_server_lgr_pending);
- return SMC_CLC_DECL_MEM;
+ return rc;
}
/* Create send and receive buffers */
if (smc_buf_create(smc, true))
- return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact);
+ return smc_connect_abort(smc, SMC_CLC_DECL_MEM,
+ ini->cln_first_contact);
smc_conn_save_peer_info(smc, aclc);
smc_close_init(smc);
@@ -667,10 +690,11 @@ static int smc_connect_ism(struct smc_sock *smc,
rc = smc_clc_send_confirm(smc);
if (rc)
- return smc_connect_abort(smc, rc, local_contact);
+ return smc_connect_abort(smc, rc, ini->cln_first_contact);
mutex_unlock(&smc_server_lgr_pending);
smc_copy_sock_settings_to_clc(smc);
+ smc->connect_nonblock = 0;
if (smc->sk.sk_state == SMC_INIT)
smc->sk.sk_state = SMC_ACTIVE;
@@ -682,15 +706,9 @@ static int __smc_connect(struct smc_sock *smc)
{
bool ism_supported = false, rdma_supported = false;
struct smc_clc_msg_accept_confirm aclc;
- struct smc_ib_device *ibdev;
- struct smcd_dev *ismdev;
- u8 gid[SMC_GID_SIZE];
- unsigned short vlan;
+ struct smc_init_info ini = {0};
int smc_type;
int rc = 0;
- u8 ibport;
-
- sock_hold(&smc->sk); /* sock put in passive closing */
if (smc->use_fallback)
return smc_connect_fallback(smc, smc->fallback_rsn);
@@ -703,20 +721,21 @@ static int __smc_connect(struct smc_sock *smc)
if (using_ipsec(smc))
return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC);
- /* check for VLAN ID */
- if (smc_vlan_by_tcpsk(smc->clcsock, &vlan))
- return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR);
+ /* get vlan id from IP device */
+ if (smc_vlan_by_tcpsk(smc->clcsock, &ini))
+ return smc_connect_decline_fallback(smc,
+ SMC_CLC_DECL_GETVLANERR);
/* check if there is an ism device available */
- if (!smc_check_ism(smc, &ismdev) &&
- !smc_connect_ism_vlan_setup(smc, ismdev, vlan)) {
+ if (!smc_find_ism_device(smc, &ini) &&
+ !smc_connect_ism_vlan_setup(smc, &ini)) {
/* ISM is supported for this connection */
ism_supported = true;
smc_type = SMC_TYPE_D;
}
/* check if there is a rdma device available */
- if (!smc_check_rdma(smc, &ibdev, &ibport, vlan, gid)) {
+ if (!smc_find_rdma_device(smc, &ini)) {
/* RDMA is supported for this connection */
rdma_supported = true;
if (ism_supported)
@@ -730,25 +749,25 @@ static int __smc_connect(struct smc_sock *smc)
return smc_connect_decline_fallback(smc, SMC_CLC_DECL_NOSMCDEV);
/* perform CLC handshake */
- rc = smc_connect_clc(smc, smc_type, &aclc, ibdev, ibport, gid, ismdev);
+ rc = smc_connect_clc(smc, smc_type, &aclc, &ini);
if (rc) {
- smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
+ smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini);
return smc_connect_decline_fallback(smc, rc);
}
/* depending on previous steps, connect using rdma or ism */
if (rdma_supported && aclc.hdr.path == SMC_TYPE_R)
- rc = smc_connect_rdma(smc, &aclc, ibdev, ibport);
+ rc = smc_connect_rdma(smc, &aclc, &ini);
else if (ism_supported && aclc.hdr.path == SMC_TYPE_D)
- rc = smc_connect_ism(smc, &aclc, ismdev);
+ rc = smc_connect_ism(smc, &aclc, &ini);
else
rc = SMC_CLC_DECL_MODEUNSUPP;
if (rc) {
- smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
+ smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini);
return smc_connect_decline_fallback(smc, rc);
}
- smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
+ smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini);
return 0;
}
@@ -756,17 +775,31 @@ static void smc_connect_work(struct work_struct *work)
{
struct smc_sock *smc = container_of(work, struct smc_sock,
connect_work);
- int rc;
+ long timeo = smc->sk.sk_sndtimeo;
+ int rc = 0;
- lock_sock(&smc->sk);
- rc = kernel_connect(smc->clcsock, &smc->connect_info->addr,
- smc->connect_info->alen, smc->connect_info->flags);
+ if (!timeo)
+ timeo = MAX_SCHEDULE_TIMEOUT;
+ lock_sock(smc->clcsock->sk);
if (smc->clcsock->sk->sk_err) {
smc->sk.sk_err = smc->clcsock->sk->sk_err;
- goto out;
- }
- if (rc < 0) {
- smc->sk.sk_err = -rc;
+ } else if ((1 << smc->clcsock->sk->sk_state) &
+ (TCPF_SYN_SENT | TCP_SYN_RECV)) {
+ rc = sk_stream_wait_connect(smc->clcsock->sk, &timeo);
+ if ((rc == -EPIPE) &&
+ ((1 << smc->clcsock->sk->sk_state) &
+ (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)))
+ rc = 0;
+ }
+ release_sock(smc->clcsock->sk);
+ lock_sock(&smc->sk);
+ if (rc != 0 || smc->sk.sk_err) {
+ smc->sk.sk_state = SMC_CLOSED;
+ if (rc == -EPIPE || rc == -EAGAIN)
+ smc->sk.sk_err = EPIPE;
+ else if (signal_pending(current))
+ smc->sk.sk_err = -sock_intr_errno(timeo);
+ sock_put(&smc->sk); /* passive closing */
goto out;
}
@@ -775,12 +808,14 @@ static void smc_connect_work(struct work_struct *work)
smc->sk.sk_err = -rc;
out:
- if (smc->sk.sk_err)
- smc->sk.sk_state_change(&smc->sk);
- else
- smc->sk.sk_write_space(&smc->sk);
- kfree(smc->connect_info);
- smc->connect_info = NULL;
+ if (!sock_flag(&smc->sk, SOCK_DEAD)) {
+ if (smc->sk.sk_err) {
+ smc->sk.sk_state_change(&smc->sk);
+ } else { /* allow polling before and after fallback decision */
+ smc->clcsock->sk->sk_write_space(smc->clcsock->sk);
+ smc->sk.sk_write_space(&smc->sk);
+ }
+ }
release_sock(&smc->sk);
}
@@ -813,26 +848,20 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
smc_copy_sock_settings_to_clc(smc);
tcp_sk(smc->clcsock->sk)->syn_smc = 1;
+ if (smc->connect_nonblock) {
+ rc = -EALREADY;
+ goto out;
+ }
+ rc = kernel_connect(smc->clcsock, addr, alen, flags);
+ if (rc && rc != -EINPROGRESS)
+ goto out;
+
+ sock_hold(&smc->sk); /* sock put in passive closing */
if (flags & O_NONBLOCK) {
- if (smc->connect_info) {
- rc = -EALREADY;
- goto out;
- }
- smc->connect_info = kzalloc(alen + 2 * sizeof(int), GFP_KERNEL);
- if (!smc->connect_info) {
- rc = -ENOMEM;
- goto out;
- }
- smc->connect_info->alen = alen;
- smc->connect_info->flags = flags ^ O_NONBLOCK;
- memcpy(&smc->connect_info->addr, addr, alen);
- schedule_work(&smc->connect_work);
+ if (schedule_work(&smc->connect_work))
+ smc->connect_nonblock = 1;
rc = -EINPROGRESS;
} else {
- rc = kernel_connect(smc->clcsock, addr, alen, flags);
- if (rc)
- goto out;
-
rc = __smc_connect(smc);
if (rc < 0)
goto out;
@@ -872,11 +901,11 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
if (rc < 0)
lsk->sk_err = -rc;
if (rc < 0 || lsk->sk_state == SMC_CLOSED) {
+ new_sk->sk_prot->unhash(new_sk);
if (new_clcsock)
sock_release(new_clcsock);
new_sk->sk_state = SMC_CLOSED;
sock_set_flag(new_sk, SOCK_DEAD);
- new_sk->sk_prot->unhash(new_sk);
sock_put(new_sk); /* final */
*new_smc = NULL;
goto out;
@@ -927,16 +956,21 @@ struct sock *smc_accept_dequeue(struct sock *parent,
smc_accept_unlink(new_sk);
if (new_sk->sk_state == SMC_CLOSED) {
+ new_sk->sk_prot->unhash(new_sk);
if (isk->clcsock) {
sock_release(isk->clcsock);
isk->clcsock = NULL;
}
- new_sk->sk_prot->unhash(new_sk);
sock_put(new_sk); /* final */
continue;
}
- if (new_sock)
+ if (new_sock) {
sock_graft(new_sk, new_sock);
+ if (isk->use_fallback) {
+ smc_sk(new_sk)->clcsock->file = new_sock->file;
+ isk->clcsock->file->private_data = isk->clcsock;
+ }
+ }
return new_sk;
}
return NULL;
@@ -947,31 +981,14 @@ void smc_close_non_accepted(struct sock *sk)
{
struct smc_sock *smc = smc_sk(sk);
+ sock_hold(sk); /* sock_put below */
lock_sock(sk);
if (!sk->sk_lingertime)
/* wait for peer closing */
sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
- if (!smc->use_fallback) {
- smc_close_active(smc);
- sock_set_flag(sk, SOCK_DEAD);
- sk->sk_shutdown |= SHUTDOWN_MASK;
- }
- if (smc->clcsock) {
- struct socket *tcp;
-
- tcp = smc->clcsock;
- smc->clcsock = NULL;
- sock_release(tcp);
- }
- if (smc->use_fallback) {
- sock_put(sk); /* passive closing */
- sk->sk_state = SMC_CLOSED;
- } else {
- if (sk->sk_state == SMC_CLOSED)
- smc_conn_free(&smc->conn);
- }
+ __smc_release(smc);
release_sock(sk);
- sk->sk_prot->unhash(sk);
+ sock_put(sk); /* sock_hold above */
sock_put(sk); /* final sock_put */
}
@@ -1037,13 +1054,13 @@ static void smc_listen_out(struct smc_sock *new_smc)
struct smc_sock *lsmc = new_smc->listen_smc;
struct sock *newsmcsk = &new_smc->sk;
- lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
if (lsmc->sk.sk_state == SMC_LISTEN) {
+ lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
smc_accept_enqueue(&lsmc->sk, newsmcsk);
+ release_sock(&lsmc->sk);
} else { /* no longer listening */
smc_close_non_accepted(newsmcsk);
}
- release_sock(&lsmc->sk);
/* Wake up accept */
lsmc->sk.sk_data_ready(&lsmc->sk);
@@ -1087,7 +1104,7 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
return;
}
smc_conn_free(&new_smc->conn);
- new_smc->use_fallback = true;
+ smc_switch_to_fallback(new_smc);
new_smc->fallback_rsn = reason_code;
if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
if (smc_clc_send_decline(new_smc, reason_code) < 0) {
@@ -1099,7 +1116,7 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
}
/* listen worker: check prefixes */
-static int smc_listen_rdma_check(struct smc_sock *new_smc,
+static int smc_listen_prfx_check(struct smc_sock *new_smc,
struct smc_clc_msg_proposal *pclc)
{
struct smc_clc_msg_proposal_prefix *pclc_prfx;
@@ -1107,25 +1124,21 @@ static int smc_listen_rdma_check(struct smc_sock *new_smc,
pclc_prfx = smc_clc_proposal_get_prefix(pclc);
if (smc_clc_prfx_match(newclcsock, pclc_prfx))
- return SMC_CLC_DECL_CNFERR;
+ return SMC_CLC_DECL_DIFFPREFIX;
return 0;
}
/* listen worker: initialize connection and buffers */
static int smc_listen_rdma_init(struct smc_sock *new_smc,
- struct smc_clc_msg_proposal *pclc,
- struct smc_ib_device *ibdev, u8 ibport,
- int *local_contact)
+ struct smc_init_info *ini)
{
+ int rc;
+
/* allocate connection / link group */
- *local_contact = smc_conn_create(new_smc, false, 0, ibdev, ibport, 0,
- &pclc->lcl, NULL, 0);
- if (*local_contact < 0) {
- if (*local_contact == -ENOMEM)
- return SMC_CLC_DECL_MEM;/* insufficient memory*/
- return SMC_CLC_DECL_INTERR; /* other error */
- }
+ rc = smc_conn_create(new_smc, ini);
+ if (rc)
+ return rc;
/* create send buffer and rmb */
if (smc_buf_create(new_smc, false))
@@ -1137,33 +1150,30 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc,
/* listen worker: initialize connection and buffers for SMC-D */
static int smc_listen_ism_init(struct smc_sock *new_smc,
struct smc_clc_msg_proposal *pclc,
- struct smcd_dev *ismdev,
- int *local_contact)
+ struct smc_init_info *ini)
{
struct smc_clc_msg_smcd *pclc_smcd;
+ int rc;
pclc_smcd = smc_get_clc_msg_smcd(pclc);
- *local_contact = smc_conn_create(new_smc, true, 0, NULL, 0, 0, NULL,
- ismdev, pclc_smcd->gid);
- if (*local_contact < 0) {
- if (*local_contact == -ENOMEM)
- return SMC_CLC_DECL_MEM;/* insufficient memory*/
- return SMC_CLC_DECL_INTERR; /* other error */
- }
+ ini->ism_gid = pclc_smcd->gid;
+ rc = smc_conn_create(new_smc, ini);
+ if (rc)
+ return rc;
/* Check if peer can be reached via ISM device */
if (smc_ism_cantalk(new_smc->conn.lgr->peer_gid,
new_smc->conn.lgr->vlan_id,
new_smc->conn.lgr->smcd)) {
- if (*local_contact == SMC_FIRST_CONTACT)
+ if (ini->cln_first_contact == SMC_FIRST_CONTACT)
smc_lgr_forget(new_smc->conn.lgr);
smc_conn_free(&new_smc->conn);
- return SMC_CLC_DECL_CNFERR;
+ return SMC_CLC_DECL_SMCDNOTALK;
}
/* Create send and receive buffers */
if (smc_buf_create(new_smc, true)) {
- if (*local_contact == SMC_FIRST_CONTACT)
+ if (ini->cln_first_contact == SMC_FIRST_CONTACT)
smc_lgr_forget(new_smc->conn.lgr);
smc_conn_free(&new_smc->conn);
return SMC_CLC_DECL_MEM;
@@ -1227,15 +1237,13 @@ static void smc_listen_work(struct work_struct *work)
struct socket *newclcsock = new_smc->clcsock;
struct smc_clc_msg_accept_confirm cclc;
struct smc_clc_msg_proposal *pclc;
- struct smc_ib_device *ibdev;
+ struct smc_init_info ini = {0};
bool ism_supported = false;
- struct smcd_dev *ismdev;
u8 buf[SMC_CLC_MAX_LEN];
- int local_contact = 0;
- unsigned short vlan;
- int reason_code = 0;
int rc = 0;
- u8 ibport;
+
+ if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN)
+ return smc_listen_out_err(new_smc);
if (new_smc->use_fallback) {
smc_listen_out_connected(new_smc);
@@ -1244,7 +1252,7 @@ static void smc_listen_work(struct work_struct *work)
/* check if peer is smc capable */
if (!tcp_sk(newclcsock->sk)->syn_smc) {
- new_smc->use_fallback = true;
+ smc_switch_to_fallback(new_smc);
new_smc->fallback_rsn = SMC_CLC_DECL_PEERNOSMC;
smc_listen_out_connected(new_smc);
return;
@@ -1254,17 +1262,26 @@ static void smc_listen_work(struct work_struct *work)
* wait for and receive SMC Proposal CLC message
*/
pclc = (struct smc_clc_msg_proposal *)&buf;
- reason_code = smc_clc_wait_msg(new_smc, pclc, SMC_CLC_MAX_LEN,
- SMC_CLC_PROPOSAL, CLC_WAIT_TIME);
- if (reason_code) {
- smc_listen_decline(new_smc, reason_code, 0);
- return;
- }
+ rc = smc_clc_wait_msg(new_smc, pclc, SMC_CLC_MAX_LEN,
+ SMC_CLC_PROPOSAL, CLC_WAIT_TIME);
+ if (rc)
+ goto out_decl;
/* IPSec connections opt out of SMC-R optimizations */
if (using_ipsec(new_smc)) {
- smc_listen_decline(new_smc, SMC_CLC_DECL_IPSEC, 0);
- return;
+ rc = SMC_CLC_DECL_IPSEC;
+ goto out_decl;
+ }
+
+ /* check for matching IP prefix and subnet length */
+ rc = smc_listen_prfx_check(new_smc, pclc);
+ if (rc)
+ goto out_decl;
+
+ /* get vlan id from IP device */
+ if (smc_vlan_by_tcpsk(new_smc->clcsock, &ini)) {
+ rc = SMC_CLC_DECL_GETVLANERR;
+ goto out_decl;
}
mutex_lock(&smc_server_lgr_pending);
@@ -1273,59 +1290,73 @@ static void smc_listen_work(struct work_struct *work)
smc_tx_init(new_smc);
/* check if ISM is available */
- if ((pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) &&
- !smc_check_ism(new_smc, &ismdev) &&
- !smc_listen_ism_init(new_smc, pclc, ismdev, &local_contact)) {
- ism_supported = true;
+ if (pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) {
+ ini.is_smcd = true; /* prepare ISM check */
+ rc = smc_find_ism_device(new_smc, &ini);
+ if (!rc)
+ rc = smc_listen_ism_init(new_smc, pclc, &ini);
+ if (!rc)
+ ism_supported = true;
+ else if (pclc->hdr.path == SMC_TYPE_D)
+ goto out_unlock; /* skip RDMA and decline */
}
/* check if RDMA is available */
- if (!ism_supported &&
- ((pclc->hdr.path != SMC_TYPE_R && pclc->hdr.path != SMC_TYPE_B) ||
- smc_vlan_by_tcpsk(new_smc->clcsock, &vlan) ||
- smc_check_rdma(new_smc, &ibdev, &ibport, vlan, NULL) ||
- smc_listen_rdma_check(new_smc, pclc) ||
- smc_listen_rdma_init(new_smc, pclc, ibdev, ibport,
- &local_contact) ||
- smc_listen_rdma_reg(new_smc, local_contact))) {
- /* SMC not supported, decline */
- mutex_unlock(&smc_server_lgr_pending);
- smc_listen_decline(new_smc, SMC_CLC_DECL_MODEUNSUPP,
- local_contact);
- return;
+ if (!ism_supported) { /* SMC_TYPE_R or SMC_TYPE_B */
+ /* prepare RDMA check */
+ ini.is_smcd = false;
+ ini.ism_dev = NULL;
+ ini.ib_lcl = &pclc->lcl;
+ rc = smc_find_rdma_device(new_smc, &ini);
+ if (rc) {
+ /* no RDMA device found */
+ if (pclc->hdr.path == SMC_TYPE_B)
+ /* neither ISM nor RDMA device found */
+ rc = SMC_CLC_DECL_NOSMCDEV;
+ goto out_unlock;
+ }
+ rc = smc_listen_rdma_init(new_smc, &ini);
+ if (rc)
+ goto out_unlock;
+ rc = smc_listen_rdma_reg(new_smc, ini.cln_first_contact);
+ if (rc)
+ goto out_unlock;
}
/* send SMC Accept CLC message */
- rc = smc_clc_send_accept(new_smc, local_contact);
- if (rc) {
- mutex_unlock(&smc_server_lgr_pending);
- smc_listen_decline(new_smc, rc, local_contact);
- return;
- }
+ rc = smc_clc_send_accept(new_smc, ini.cln_first_contact);
+ if (rc)
+ goto out_unlock;
/* SMC-D does not need this lock any more */
if (ism_supported)
mutex_unlock(&smc_server_lgr_pending);
/* receive SMC Confirm CLC message */
- reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
- SMC_CLC_CONFIRM, CLC_WAIT_TIME);
- if (reason_code) {
+ rc = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
+ SMC_CLC_CONFIRM, CLC_WAIT_TIME);
+ if (rc) {
if (!ism_supported)
- mutex_unlock(&smc_server_lgr_pending);
- smc_listen_decline(new_smc, reason_code, local_contact);
- return;
+ goto out_unlock;
+ goto out_decl;
}
/* finish worker */
if (!ism_supported) {
- rc = smc_listen_rdma_finish(new_smc, &cclc, local_contact);
+ rc = smc_listen_rdma_finish(new_smc, &cclc,
+ ini.cln_first_contact);
mutex_unlock(&smc_server_lgr_pending);
if (rc)
return;
}
smc_conn_save_peer_info(new_smc, &cclc);
smc_listen_out_connected(new_smc);
+ return;
+
+out_unlock:
+ mutex_unlock(&smc_server_lgr_pending);
+out_decl:
+ smc_listen_decline(new_smc, rc, ini.cln_first_contact);
}
static void smc_tcp_listen_work(struct work_struct *work)
@@ -1372,7 +1403,8 @@ static int smc_listen(struct socket *sock, int backlog)
lock_sock(sk);
rc = -EINVAL;
- if ((sk->sk_state != SMC_INIT) && (sk->sk_state != SMC_LISTEN))
+ if ((sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) ||
+ smc->connect_nonblock)
goto out;
rc = 0;
@@ -1500,8 +1532,8 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
goto out;
if (msg->msg_flags & MSG_FASTOPEN) {
- if (sk->sk_state == SMC_INIT) {
- smc->use_fallback = true;
+ if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
+ smc_switch_to_fallback(smc);
smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
} else {
rc = -EINVAL;
@@ -1571,8 +1603,8 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
struct sock *sk = sock->sk;
- __poll_t mask = 0;
struct smc_sock *smc;
+ __poll_t mask = 0;
if (!sk)
return EPOLLNVAL;
@@ -1582,8 +1614,6 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
/* delegate to CLC child sock */
mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
sk->sk_err = smc->clcsock->sk->sk_err;
- if (sk->sk_err)
- mask |= EPOLLERR;
} else {
if (sk->sk_state != SMC_CLOSED)
sock_poll_wait(file, sock, wait);
@@ -1594,9 +1624,14 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
mask |= EPOLLHUP;
if (sk->sk_state == SMC_LISTEN) {
/* woken up by sk_data_ready in smc_listen_work() */
- mask = smc_accept_poll(sk);
+ mask |= smc_accept_poll(sk);
+ } else if (smc->use_fallback) { /* as result of connect_work()*/
+ mask |= smc->clcsock->ops->poll(file, smc->clcsock,
+ wait);
+ sk->sk_err = smc->clcsock->sk->sk_err;
} else {
- if (atomic_read(&smc->conn.sndbuf_space) ||
+ if ((sk->sk_state != SMC_INIT &&
+ atomic_read(&smc->conn.sndbuf_space)) ||
sk->sk_shutdown & SEND_SHUTDOWN) {
mask |= EPOLLOUT | EPOLLWRNORM;
} else {
@@ -1702,8 +1737,8 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
case TCP_FASTOPEN_KEY:
case TCP_FASTOPEN_NO_COOKIE:
/* option not supported by SMC */
- if (sk->sk_state == SMC_INIT) {
- smc->use_fallback = true;
+ if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
+ smc_switch_to_fallback(smc);
smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
} else {
if (!smc->use_fallback)
@@ -1711,14 +1746,18 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
}
break;
case TCP_NODELAY:
- if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) {
+ if (sk->sk_state != SMC_INIT &&
+ sk->sk_state != SMC_LISTEN &&
+ sk->sk_state != SMC_CLOSED) {
if (val && !smc->use_fallback)
mod_delayed_work(system_wq, &smc->conn.tx_work,
0);
}
break;
case TCP_CORK:
- if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) {
+ if (sk->sk_state != SMC_INIT &&
+ sk->sk_state != SMC_LISTEN &&
+ sk->sk_state != SMC_CLOSED) {
if (!val && !smc->use_fallback)
mod_delayed_work(system_wq, &smc->conn.tx_work,
0);
@@ -1999,24 +2038,30 @@ static int __init smc_init(void)
rc = smc_pnet_init();
if (rc)
- return rc;
+ goto out_pernet_subsys;
+
+ rc = smc_core_init();
+ if (rc) {
+ pr_err("%s: smc_core_init fails with %d\n", __func__, rc);
+ goto out_pnet;
+ }
rc = smc_llc_init();
if (rc) {
pr_err("%s: smc_llc_init fails with %d\n", __func__, rc);
- goto out_pnet;
+ goto out_core;
}
rc = smc_cdc_init();
if (rc) {
pr_err("%s: smc_cdc_init fails with %d\n", __func__, rc);
- goto out_pnet;
+ goto out_core;
}
rc = proto_register(&smc_proto, 1);
if (rc) {
pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc);
- goto out_pnet;
+ goto out_core;
}
rc = proto_register(&smc_proto6, 1);
@@ -2048,21 +2093,27 @@ out_proto6:
proto_unregister(&smc_proto6);
out_proto:
proto_unregister(&smc_proto);
+out_core:
+ smc_core_exit();
out_pnet:
smc_pnet_exit();
+out_pernet_subsys:
+ unregister_pernet_subsys(&smc_net_ops);
+
return rc;
}
static void __exit smc_exit(void)
{
- smc_core_exit();
static_branch_disable(&tcp_have_smc);
- smc_ib_unregister_client();
sock_unregister(PF_SMC);
+ smc_core_exit();
+ smc_ib_unregister_client();
proto_unregister(&smc_proto6);
proto_unregister(&smc_proto);
smc_pnet_exit();
unregister_pernet_subsys(&smc_net_ops);
+ rcu_barrier();
}
module_init(smc_init);
diff --git a/net/smc/smc.h b/net/smc/smc.h
index adbdf195eb08..be11ba41190f 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -188,12 +188,7 @@ struct smc_connection {
* 0 for SMC-R, 32 for SMC-D
*/
u64 peer_token; /* SMC-D token of peer */
-};
-
-struct smc_connect_info {
- int flags;
- int alen;
- struct sockaddr addr;
+ u8 killed : 1; /* abnormal termination */
};
struct smc_sock { /* smc sock container */
@@ -201,7 +196,6 @@ struct smc_sock { /* smc sock container */
struct socket *clcsock; /* internal tcp socket */
struct smc_connection conn; /* smc connection */
struct smc_sock *listen_smc; /* listen parent */
- struct smc_connect_info *connect_info; /* connect address & flags */
struct work_struct connect_work; /* handle non-blocking connect*/
struct work_struct tcp_listen_work;/* handle tcp socket accepts */
struct work_struct smc_listen_work;/* prepare new accept socket */
@@ -219,6 +213,10 @@ struct smc_sock { /* smc sock container */
* started, waiting for unsent
* data to be sent
*/
+ u8 connect_nonblock : 1;
+ /* non-blocking connect in
+ * flight
+ */
struct mutex clcsock_release_lock;
/* protects clcsock of a listen
* socket
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index d0b0f4c865b4..164f1584861b 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -63,7 +63,7 @@ int smc_cdc_get_free_slot(struct smc_connection *conn,
rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
wr_rdma_buf,
(struct smc_wr_tx_pend_priv **)pend);
- if (!conn->alert_token_local)
+ if (conn->killed)
/* abnormal termination */
rc = -EPIPE;
return rc;
@@ -131,6 +131,9 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn)
{
int rc;
+ if (!conn->lgr || (conn->lgr->is_smcd && conn->lgr->peer_shutdown))
+ return -EPIPE;
+
if (conn->lgr->is_smcd) {
spin_lock_bh(&conn->send_lock);
rc = smcd_cdc_msg_send(conn);
@@ -328,7 +331,7 @@ static void smcd_cdc_rx_tsklet(unsigned long data)
struct smcd_cdc_msg cdc;
struct smc_sock *smc;
- if (!conn)
+ if (!conn || conn->killed)
return;
data_cdc = (struct smcd_cdc_msg *)conn->rmb_desc->cpu_addr;
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index d53fd588d1f5..0879f7bed967 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -97,17 +97,19 @@ static int smc_clc_prfx_set4_rcu(struct dst_entry *dst, __be32 ipv4,
struct smc_clc_msg_proposal_prefix *prop)
{
struct in_device *in_dev = __in_dev_get_rcu(dst->dev);
+ const struct in_ifaddr *ifa;
if (!in_dev)
return -ENODEV;
- for_ifa(in_dev) {
+
+ in_dev_for_each_ifa_rcu(ifa, in_dev) {
if (!inet_ifa_match(ipv4, ifa))
continue;
prop->prefix_len = inet_mask_len(ifa->ifa_mask);
prop->outgoing_subnet = ifa->ifa_address & ifa->ifa_mask;
/* prop->ipv6_prefixes_cnt = 0; already done by memset before */
return 0;
- } endfor_ifa(in_dev);
+ }
return -ENOENT;
}
@@ -190,14 +192,15 @@ static int smc_clc_prfx_match4_rcu(struct net_device *dev,
struct smc_clc_msg_proposal_prefix *prop)
{
struct in_device *in_dev = __in_dev_get_rcu(dev);
+ const struct in_ifaddr *ifa;
if (!in_dev)
return -ENODEV;
- for_ifa(in_dev) {
+ in_dev_for_each_ifa_rcu(ifa, in_dev) {
if (prop->prefix_len == inet_mask_len(ifa->ifa_mask) &&
inet_ifa_match(prop->outgoing_subnet, ifa))
return 0;
- } endfor_ifa(in_dev);
+ }
return -ENOENT;
}
@@ -346,7 +349,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
smc->peer_diagnosis = ntohl(dclc->peer_diagnosis);
if (((struct smc_clc_msg_decline *)buf)->hdr.flag) {
smc->conn.lgr->sync_err = 1;
- smc_lgr_terminate(smc->conn.lgr);
+ smc_lgr_terminate(smc->conn.lgr, true);
}
}
@@ -385,8 +388,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
/* send CLC PROPOSAL message across internal TCP socket */
int smc_clc_send_proposal(struct smc_sock *smc, int smc_type,
- struct smc_ib_device *ibdev, u8 ibport, u8 gid[],
- struct smcd_dev *ismdev)
+ struct smc_init_info *ini)
{
struct smc_clc_ipv6_prefix ipv6_prfx[SMC_CLC_MAX_V6_PREFIX];
struct smc_clc_msg_proposal_prefix pclc_prfx;
@@ -416,8 +418,9 @@ int smc_clc_send_proposal(struct smc_sock *smc, int smc_type,
/* add SMC-R specifics */
memcpy(pclc.lcl.id_for_peer, local_systemid,
sizeof(local_systemid));
- memcpy(&pclc.lcl.gid, gid, SMC_GID_SIZE);
- memcpy(&pclc.lcl.mac, &ibdev->mac[ibport - 1], ETH_ALEN);
+ memcpy(&pclc.lcl.gid, ini->ib_gid, SMC_GID_SIZE);
+ memcpy(&pclc.lcl.mac, &ini->ib_dev->mac[ini->ib_port - 1],
+ ETH_ALEN);
pclc.iparea_offset = htons(0);
}
if (smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B) {
@@ -425,7 +428,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, int smc_type,
memset(&pclc_smcd, 0, sizeof(pclc_smcd));
plen += sizeof(pclc_smcd);
pclc.iparea_offset = htons(SMC_CLC_PROPOSAL_MAX_OFFSET);
- pclc_smcd.gid = ismdev->local_gid;
+ pclc_smcd.gid = ini->ism_dev->local_gid;
}
pclc.hdr.length = htons(plen);
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 24658e8c0de4..ca209272e5fa 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -34,16 +34,22 @@
#define SMC_CLC_DECL_CNFERR 0x03000000 /* configuration error */
#define SMC_CLC_DECL_PEERNOSMC 0x03010000 /* peer did not indicate SMC */
#define SMC_CLC_DECL_IPSEC 0x03020000 /* IPsec usage */
-#define SMC_CLC_DECL_NOSMCDEV 0x03030000 /* no SMC device found */
+#define SMC_CLC_DECL_NOSMCDEV 0x03030000 /* no SMC device found (R or D) */
+#define SMC_CLC_DECL_NOSMCDDEV 0x03030001 /* no SMC-D device found */
+#define SMC_CLC_DECL_NOSMCRDEV 0x03030002 /* no SMC-R device found */
+#define SMC_CLC_DECL_SMCDNOTALK 0x03030003 /* SMC-D dev can't talk to peer */
#define SMC_CLC_DECL_MODEUNSUPP 0x03040000 /* smc modes do not match (R or D)*/
#define SMC_CLC_DECL_RMBE_EC 0x03050000 /* peer has eyecatcher in RMBE */
#define SMC_CLC_DECL_OPTUNSUPP 0x03060000 /* fastopen sockopt not supported */
+#define SMC_CLC_DECL_DIFFPREFIX 0x03070000 /* IP prefix / subnet mismatch */
+#define SMC_CLC_DECL_GETVLANERR 0x03080000 /* err to get vlan id of ip device*/
+#define SMC_CLC_DECL_ISMVLANERR 0x03090000 /* err to reg vlan id on ism dev */
#define SMC_CLC_DECL_SYNCERR 0x04000000 /* synchronization error */
#define SMC_CLC_DECL_PEERDECL 0x05000000 /* peer declined during handshake */
-#define SMC_CLC_DECL_INTERR 0x99990000 /* internal error */
-#define SMC_CLC_DECL_ERR_RTOK 0x99990001 /* rtoken handling failed */
-#define SMC_CLC_DECL_ERR_RDYLNK 0x99990002 /* ib ready link failed */
-#define SMC_CLC_DECL_ERR_REGRMB 0x99990003 /* reg rmb failed */
+#define SMC_CLC_DECL_INTERR 0x09990000 /* internal error */
+#define SMC_CLC_DECL_ERR_RTOK 0x09990001 /* rtoken handling failed */
+#define SMC_CLC_DECL_ERR_RDYLNK 0x09990002 /* ib ready link failed */
+#define SMC_CLC_DECL_ERR_REGRMB 0x09990003 /* reg rmb failed */
struct smc_clc_msg_hdr { /* header1 of clc messages */
u8 eyecatcher[4]; /* eye catcher */
@@ -179,6 +185,7 @@ smc_get_clc_msg_smcd(struct smc_clc_msg_proposal *prop)
}
struct smcd_dev;
+struct smc_init_info;
int smc_clc_prfx_match(struct socket *clcsock,
struct smc_clc_msg_proposal_prefix *prop);
@@ -186,8 +193,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
u8 expected_type, unsigned long timeout);
int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);
int smc_clc_send_proposal(struct smc_sock *smc, int smc_type,
- struct smc_ib_device *smcibdev, u8 ibport, u8 gid[],
- struct smcd_dev *ismdev);
+ struct smc_init_info *ini);
int smc_clc_send_confirm(struct smc_sock *smc);
int smc_clc_send_accept(struct smc_sock *smc, int srv_first_contact);
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index 2ad37e998509..290270c821ca 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -13,13 +13,28 @@
#include <linux/sched/signal.h>
#include <net/sock.h>
+#include <net/tcp.h>
#include "smc.h"
#include "smc_tx.h"
#include "smc_cdc.h"
#include "smc_close.h"
-#define SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME (5 * HZ)
+/* release the clcsock that is assigned to the smc_sock */
+void smc_clcsock_release(struct smc_sock *smc)
+{
+ struct socket *tcp;
+
+ if (smc->listen_smc && current_work() != &smc->smc_listen_work)
+ cancel_work_sync(&smc->smc_listen_work);
+ mutex_lock(&smc->clcsock_release_lock);
+ if (smc->clcsock) {
+ tcp = smc->clcsock;
+ smc->clcsock = NULL;
+ sock_release(tcp);
+ }
+ mutex_unlock(&smc->clcsock_release_lock);
+}
static void smc_close_cleanup_listen(struct sock *parent)
{
@@ -49,8 +64,9 @@ static void smc_close_stream_wait(struct smc_sock *smc, long timeout)
rc = sk_wait_event(sk, &timeout,
!smc_tx_prepared_sends(&smc->conn) ||
- (sk->sk_err == ECONNABORTED) ||
- (sk->sk_err == ECONNRESET),
+ sk->sk_err == ECONNABORTED ||
+ sk->sk_err == ECONNRESET ||
+ smc->conn.killed,
&wait);
if (rc)
break;
@@ -79,68 +95,73 @@ static int smc_close_final(struct smc_connection *conn)
conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
else
conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1;
+ if (conn->killed)
+ return -EPIPE;
return smc_cdc_get_slot_and_msg_send(conn);
}
-static int smc_close_abort(struct smc_connection *conn)
+int smc_close_abort(struct smc_connection *conn)
{
conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
return smc_cdc_get_slot_and_msg_send(conn);
}
+static void smc_close_cancel_work(struct smc_sock *smc)
+{
+ struct sock *sk = &smc->sk;
+
+ release_sock(sk);
+ cancel_work_sync(&smc->conn.close_work);
+ cancel_delayed_work_sync(&smc->conn.tx_work);
+ lock_sock(sk);
+ sk->sk_state = SMC_CLOSED;
+}
+
/* terminate smc socket abnormally - active abort
* link group is terminated, i.e. RDMA communication no longer possible
*/
-static void smc_close_active_abort(struct smc_sock *smc)
+void smc_close_active_abort(struct smc_sock *smc)
{
struct sock *sk = &smc->sk;
-
- struct smc_cdc_conn_state_flags *txflags =
- &smc->conn.local_tx_ctrl.conn_state_flags;
+ bool release_clcsock = false;
if (sk->sk_state != SMC_INIT && smc->clcsock && smc->clcsock->sk) {
sk->sk_err = ECONNABORTED;
- if (smc->clcsock && smc->clcsock->sk) {
- smc->clcsock->sk->sk_err = ECONNABORTED;
- smc->clcsock->sk->sk_state_change(smc->clcsock->sk);
- }
+ if (smc->clcsock && smc->clcsock->sk)
+ tcp_abort(smc->clcsock->sk, ECONNABORTED);
}
switch (sk->sk_state) {
case SMC_ACTIVE:
sk->sk_state = SMC_PEERABORTWAIT;
- release_sock(sk);
- cancel_delayed_work_sync(&smc->conn.tx_work);
- lock_sock(sk);
+ smc_close_cancel_work(smc);
+ sk->sk_state = SMC_CLOSED;
sock_put(sk); /* passive closing */
break;
case SMC_APPCLOSEWAIT1:
case SMC_APPCLOSEWAIT2:
- if (!smc_cdc_rxed_any_close(&smc->conn))
- sk->sk_state = SMC_PEERABORTWAIT;
- else
- sk->sk_state = SMC_CLOSED;
- release_sock(sk);
- cancel_delayed_work_sync(&smc->conn.tx_work);
- lock_sock(sk);
+ smc_close_cancel_work(smc);
+ sk->sk_state = SMC_CLOSED;
+ sock_put(sk); /* postponed passive closing */
break;
case SMC_PEERCLOSEWAIT1:
case SMC_PEERCLOSEWAIT2:
- if (!txflags->peer_conn_closed) {
- /* just SHUTDOWN_SEND done */
- sk->sk_state = SMC_PEERABORTWAIT;
- } else {
- sk->sk_state = SMC_CLOSED;
- }
+ case SMC_PEERFINCLOSEWAIT:
+ sk->sk_state = SMC_PEERABORTWAIT;
+ smc_close_cancel_work(smc);
+ sk->sk_state = SMC_CLOSED;
+ smc_conn_free(&smc->conn);
+ release_clcsock = true;
sock_put(sk); /* passive closing */
break;
case SMC_PROCESSABORT:
case SMC_APPFINCLOSEWAIT:
+ sk->sk_state = SMC_PEERABORTWAIT;
+ smc_close_cancel_work(smc);
sk->sk_state = SMC_CLOSED;
- break;
- case SMC_PEERFINCLOSEWAIT:
- sock_put(sk); /* passive closing */
+ smc_conn_free(&smc->conn);
+ release_clcsock = true;
break;
case SMC_INIT:
case SMC_PEERABORTWAIT:
@@ -150,6 +171,12 @@ static void smc_close_active_abort(struct smc_sock *smc)
sock_set_flag(sk, SOCK_DEAD);
sk->sk_state_change(sk);
+
+ if (release_clcsock) {
+ release_sock(sk);
+ smc_clcsock_release(smc);
+ lock_sock(sk);
+ }
}
static inline bool smc_close_sent_any_close(struct smc_connection *conn)
@@ -199,8 +226,6 @@ again:
if (sk->sk_state == SMC_ACTIVE) {
/* send close request */
rc = smc_close_final(conn);
- if (rc)
- break;
sk->sk_state = SMC_PEERCLOSEWAIT1;
} else {
/* peer event has changed the state */
@@ -213,8 +238,6 @@ again:
!smc_close_sent_any_close(conn)) {
/* just shutdown wr done, send close request */
rc = smc_close_final(conn);
- if (rc)
- break;
}
sk->sk_state = SMC_CLOSED;
break;
@@ -230,8 +253,6 @@ again:
goto again;
/* confirm close from peer */
rc = smc_close_final(conn);
- if (rc)
- break;
if (smc_cdc_rxed_any_close(conn)) {
/* peer has closed the socket already */
sk->sk_state = SMC_CLOSED;
@@ -247,8 +268,6 @@ again:
!smc_close_sent_any_close(conn)) {
/* just shutdown wr done, send close request */
rc = smc_close_final(conn);
- if (rc)
- break;
}
/* peer sending PeerConnectionClosed will cause transition */
break;
@@ -256,10 +275,12 @@ again:
/* peer sending PeerConnectionClosed will cause transition */
break;
case SMC_PROCESSABORT:
- smc_close_abort(conn);
+ rc = smc_close_abort(conn);
sk->sk_state = SMC_CLOSED;
break;
case SMC_PEERABORTWAIT:
+ sk->sk_state = SMC_CLOSED;
+ break;
case SMC_CLOSED:
/* nothing to do, add tracing in future patch */
break;
@@ -321,18 +342,13 @@ static void smc_close_passive_work(struct work_struct *work)
close_work);
struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
struct smc_cdc_conn_state_flags *rxflags;
+ bool release_clcsock = false;
struct sock *sk = &smc->sk;
int old_state;
lock_sock(sk);
old_state = sk->sk_state;
- if (!conn->alert_token_local) {
- /* abnormal termination */
- smc_close_active_abort(smc);
- goto wakeup;
- }
-
rxflags = &conn->local_rx_ctrl.conn_state_flags;
if (rxflags->peer_conn_abort) {
/* peer has not received all data */
@@ -400,13 +416,13 @@ wakeup:
if ((sk->sk_state == SMC_CLOSED) &&
(sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) {
smc_conn_free(conn);
- if (smc->clcsock) {
- sock_release(smc->clcsock);
- smc->clcsock = NULL;
- }
+ if (smc->clcsock)
+ release_clcsock = true;
}
}
release_sock(sk);
+ if (release_clcsock)
+ smc_clcsock_release(smc);
sock_put(sk); /* sock_hold done by schedulers of close_work */
}
@@ -434,8 +450,6 @@ again:
goto again;
/* send close wr request */
rc = smc_close_wr(conn);
- if (rc)
- break;
sk->sk_state = SMC_PEERCLOSEWAIT1;
break;
case SMC_APPCLOSEWAIT1:
@@ -449,8 +463,6 @@ again:
goto again;
/* confirm close from peer */
rc = smc_close_wr(conn);
- if (rc)
- break;
sk->sk_state = SMC_APPCLOSEWAIT2;
break;
case SMC_APPCLOSEWAIT2:
diff --git a/net/smc/smc_close.h b/net/smc/smc_close.h
index 19eb6a211c23..634fea2b7c95 100644
--- a/net/smc/smc_close.h
+++ b/net/smc/smc_close.h
@@ -23,5 +23,8 @@ void smc_close_wake_tx_prepared(struct smc_sock *smc);
int smc_close_active(struct smc_sock *smc);
int smc_close_shutdown_write(struct smc_sock *smc);
void smc_close_init(struct smc_sock *smc);
+void smc_clcsock_release(struct smc_sock *smc);
+int smc_close_abort(struct smc_connection *conn);
+void smc_close_active_abort(struct smc_sock *smc);
#endif /* SMC_CLOSE_H */
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 53a17cfa61af..bb92c7c6214c 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -13,6 +13,8 @@
#include <linux/if_vlan.h>
#include <linux/random.h>
#include <linux/workqueue.h>
+#include <linux/wait.h>
+#include <linux/reboot.h>
#include <net/tcp.h>
#include <net/sock.h>
#include <rdma/ib_verbs.h>
@@ -39,23 +41,46 @@ static struct smc_lgr_list smc_lgr_list = { /* established link groups */
.num = 0,
};
+static atomic_t lgr_cnt; /* number of existing link groups */
+static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted);
+
static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
struct smc_buf_desc *buf_desc);
+/* return head of link group list and its lock for a given link group */
+static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr,
+ spinlock_t **lgr_lock)
+{
+ if (lgr->is_smcd) {
+ *lgr_lock = &lgr->smcd->lgr_lock;
+ return &lgr->smcd->lgr_list;
+ }
+
+ *lgr_lock = &smc_lgr_list.lock;
+ return &smc_lgr_list.list;
+}
+
static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
{
/* client link group creation always follows the server link group
* creation. For client use a somewhat higher removal delay time,
* otherwise there is a risk of out-of-sync link groups.
*/
- mod_delayed_work(system_wq, &lgr->free_work,
- (!lgr->is_smcd && lgr->role == SMC_CLNT) ?
- SMC_LGR_FREE_DELAY_CLNT : SMC_LGR_FREE_DELAY_SERV);
+ if (!lgr->freeing && !lgr->freefast) {
+ mod_delayed_work(system_wq, &lgr->free_work,
+ (!lgr->is_smcd && lgr->role == SMC_CLNT) ?
+ SMC_LGR_FREE_DELAY_CLNT :
+ SMC_LGR_FREE_DELAY_SERV);
+ }
}
void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr)
{
- mod_delayed_work(system_wq, &lgr->free_work, SMC_LGR_FREE_DELAY_FAST);
+ if (!lgr->freeing && !lgr->freefast) {
+ lgr->freefast = 1;
+ mod_delayed_work(system_wq, &lgr->free_work,
+ SMC_LGR_FREE_DELAY_FAST);
+ }
}
/* Register connection's alert token in our lookup structure.
@@ -134,16 +159,17 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
__smc_lgr_unregister_conn(conn);
}
write_unlock_bh(&lgr->conns_lock);
+ conn->lgr = NULL;
}
/* Send delete link, either as client to request the initiation
* of the DELETE LINK sequence from server; or as server to
* initiate the delete processing. See smc_llc_rx_delete_link().
*/
-static int smc_link_send_delete(struct smc_link *lnk)
+static int smc_link_send_delete(struct smc_link *lnk, bool orderly)
{
if (lnk->state == SMC_LNK_ACTIVE &&
- !smc_llc_send_delete_link(lnk, SMC_LLC_REQ, true)) {
+ !smc_llc_send_delete_link(lnk, SMC_LLC_REQ, orderly)) {
smc_llc_link_deleting(lnk);
return 0;
}
@@ -157,69 +183,84 @@ static void smc_lgr_free_work(struct work_struct *work)
struct smc_link_group *lgr = container_of(to_delayed_work(work),
struct smc_link_group,
free_work);
+ spinlock_t *lgr_lock;
+ struct smc_link *lnk;
bool conns;
- spin_lock_bh(&smc_lgr_list.lock);
+ smc_lgr_list_head(lgr, &lgr_lock);
+ spin_lock_bh(lgr_lock);
+ if (lgr->freeing) {
+ spin_unlock_bh(lgr_lock);
+ return;
+ }
read_lock_bh(&lgr->conns_lock);
conns = RB_EMPTY_ROOT(&lgr->conns_all);
read_unlock_bh(&lgr->conns_lock);
if (!conns) { /* number of lgr connections is no longer zero */
- spin_unlock_bh(&smc_lgr_list.lock);
+ spin_unlock_bh(lgr_lock);
return;
}
- if (!list_empty(&lgr->list))
- list_del_init(&lgr->list); /* remove from smc_lgr_list */
- spin_unlock_bh(&smc_lgr_list.lock);
+ list_del_init(&lgr->list); /* remove from smc_lgr_list */
+ lnk = &lgr->lnk[SMC_SINGLE_LINK];
if (!lgr->is_smcd && !lgr->terminating) {
- struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
-
/* try to send del link msg, on error free lgr immediately */
if (lnk->state == SMC_LNK_ACTIVE &&
- !smc_link_send_delete(lnk)) {
+ !smc_link_send_delete(lnk, true)) {
/* reschedule in case we never receive a response */
smc_lgr_schedule_free_work(lgr);
+ spin_unlock_bh(lgr_lock);
return;
}
}
+ lgr->freeing = 1; /* this instance does the freeing, no new schedule */
+ spin_unlock_bh(lgr_lock);
+ cancel_delayed_work(&lgr->free_work);
+
+ if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE)
+ smc_llc_link_inactive(lnk);
+ if (lgr->is_smcd && !lgr->terminating)
+ smc_ism_signal_shutdown(lgr);
+ smc_lgr_free(lgr);
+}
- if (!delayed_work_pending(&lgr->free_work)) {
- struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
+static void smc_lgr_terminate_work(struct work_struct *work)
+{
+ struct smc_link_group *lgr = container_of(work, struct smc_link_group,
+ terminate_work);
- if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE)
- smc_llc_link_inactive(lnk);
- if (lgr->is_smcd)
- smc_ism_signal_shutdown(lgr);
- smc_lgr_free(lgr);
- }
+ smc_lgr_terminate(lgr, true);
}
/* create a new SMC link group */
-static int smc_lgr_create(struct smc_sock *smc, bool is_smcd,
- struct smc_ib_device *smcibdev, u8 ibport,
- char *peer_systemid, unsigned short vlan_id,
- struct smcd_dev *smcismdev, u64 peer_gid)
+static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
{
struct smc_link_group *lgr;
+ struct list_head *lgr_list;
struct smc_link *lnk;
+ spinlock_t *lgr_lock;
u8 rndvec[3];
int rc = 0;
int i;
- if (is_smcd && vlan_id) {
- rc = smc_ism_get_vlan(smcismdev, vlan_id);
- if (rc)
+ if (ini->is_smcd && ini->vlan_id) {
+ if (smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) {
+ rc = SMC_CLC_DECL_ISMVLANERR;
goto out;
+ }
}
lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
if (!lgr) {
- rc = -ENOMEM;
- goto out;
+ rc = SMC_CLC_DECL_MEM;
+ goto ism_put_vlan;
}
- lgr->is_smcd = is_smcd;
+ lgr->is_smcd = ini->is_smcd;
lgr->sync_err = 0;
- lgr->vlan_id = vlan_id;
+ lgr->terminating = 0;
+ lgr->freefast = 0;
+ lgr->freeing = 0;
+ lgr->vlan_id = ini->vlan_id;
rwlock_init(&lgr->sndbufs_lock);
rwlock_init(&lgr->rmbs_lock);
rwlock_init(&lgr->conns_lock);
@@ -230,30 +271,42 @@ static int smc_lgr_create(struct smc_sock *smc, bool is_smcd,
smc_lgr_list.num += SMC_LGR_NUM_INCR;
memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
+ INIT_WORK(&lgr->terminate_work, smc_lgr_terminate_work);
lgr->conns_all = RB_ROOT;
- if (is_smcd) {
+ if (ini->is_smcd) {
/* SMC-D specific settings */
- lgr->peer_gid = peer_gid;
- lgr->smcd = smcismdev;
+ get_device(&ini->ism_dev->dev);
+ lgr->peer_gid = ini->ism_gid;
+ lgr->smcd = ini->ism_dev;
+ lgr_list = &ini->ism_dev->lgr_list;
+ lgr_lock = &lgr->smcd->lgr_lock;
+ lgr->peer_shutdown = 0;
+ atomic_inc(&ini->ism_dev->lgr_cnt);
} else {
/* SMC-R specific settings */
+ get_device(&ini->ib_dev->ibdev->dev);
lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
- memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN);
+ memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
+ SMC_SYSTEMID_LEN);
lnk = &lgr->lnk[SMC_SINGLE_LINK];
/* initialize link */
lnk->state = SMC_LNK_ACTIVATING;
lnk->link_id = SMC_SINGLE_LINK;
- lnk->smcibdev = smcibdev;
- lnk->ibport = ibport;
- lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu;
- if (!smcibdev->initialized)
- smc_ib_setup_per_ibdev(smcibdev);
+ lnk->smcibdev = ini->ib_dev;
+ lnk->ibport = ini->ib_port;
+ lgr_list = &smc_lgr_list.list;
+ lgr_lock = &smc_lgr_list.lock;
+ lnk->path_mtu =
+ ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
+ if (!ini->ib_dev->initialized)
+ smc_ib_setup_per_ibdev(ini->ib_dev);
get_random_bytes(rndvec, sizeof(rndvec));
lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
(rndvec[2] << 16);
rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
- vlan_id, lnk->gid, &lnk->sgid_index);
+ ini->vlan_id, lnk->gid,
+ &lnk->sgid_index);
if (rc)
goto free_lgr;
rc = smc_llc_link_init(lnk);
@@ -271,11 +324,13 @@ static int smc_lgr_create(struct smc_sock *smc, bool is_smcd,
rc = smc_wr_create_link(lnk);
if (rc)
goto destroy_qp;
+ atomic_inc(&lgr_cnt);
+ atomic_inc(&ini->ib_dev->lnk_cnt);
}
smc->conn.lgr = lgr;
- spin_lock_bh(&smc_lgr_list.lock);
- list_add(&lgr->list, &smc_lgr_list.list);
- spin_unlock_bh(&smc_lgr_list.lock);
+ spin_lock_bh(lgr_lock);
+ list_add(&lgr->list, lgr_list);
+ spin_unlock_bh(lgr_lock);
return 0;
destroy_qp:
@@ -288,7 +343,16 @@ clear_llc_lnk:
smc_llc_link_clear(lnk);
free_lgr:
kfree(lgr);
+ism_put_vlan:
+ if (ini->is_smcd && ini->vlan_id)
+ smc_ism_put_vlan(ini->ism_dev, ini->vlan_id);
out:
+ if (rc < 0) {
+ if (rc == -ENOMEM)
+ rc = SMC_CLC_DECL_MEM;
+ else
+ rc = SMC_CLC_DECL_INTERR;
+ }
return rc;
}
@@ -299,7 +363,7 @@ static void smc_buf_unuse(struct smc_connection *conn,
conn->sndbuf_desc->used = 0;
if (conn->rmb_desc) {
if (!conn->rmb_desc->regerr) {
- if (!lgr->is_smcd) {
+ if (!lgr->is_smcd && !list_empty(&lgr->list)) {
/* unregister rmb with peer */
smc_llc_do_delete_rkey(
&lgr->lnk[SMC_SINGLE_LINK],
@@ -325,14 +389,16 @@ void smc_conn_free(struct smc_connection *conn)
if (!lgr)
return;
if (lgr->is_smcd) {
- smc_ism_unset_conn(conn);
+ if (!list_empty(&lgr->list))
+ smc_ism_unset_conn(conn);
tasklet_kill(&conn->rx_tsklet);
} else {
smc_cdc_tx_dismiss_slots(conn);
}
- smc_lgr_unregister_conn(conn);
- smc_buf_unuse(conn, lgr); /* allow buffer reuse */
- conn->lgr = NULL;
+ if (!list_empty(&lgr->list)) {
+ smc_lgr_unregister_conn(conn);
+ smc_buf_unuse(conn, lgr); /* allow buffer reuse */
+ }
if (!lgr->conns_num)
smc_lgr_schedule_free_work(lgr);
@@ -347,6 +413,8 @@ static void smc_link_clear(struct smc_link *lnk)
smc_ib_destroy_queue_pair(lnk);
smc_ib_dealloc_protection_domain(lnk);
smc_wr_free_link_mem(lnk);
+ if (!atomic_dec_return(&lnk->smcibdev->lnk_cnt))
+ wake_up(&lnk->smcibdev->lnks_deleted);
}
static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
@@ -423,24 +491,101 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr)
static void smc_lgr_free(struct smc_link_group *lgr)
{
smc_lgr_free_bufs(lgr);
- if (lgr->is_smcd)
- smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
- else
+ if (lgr->is_smcd) {
+ if (!lgr->terminating) {
+ smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
+ put_device(&lgr->smcd->dev);
+ }
+ if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
+ wake_up(&lgr->smcd->lgrs_deleted);
+ } else {
smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
+ put_device(&lgr->lnk[SMC_SINGLE_LINK].smcibdev->ibdev->dev);
+ if (!atomic_dec_return(&lgr_cnt))
+ wake_up(&lgrs_deleted);
+ }
kfree(lgr);
}
void smc_lgr_forget(struct smc_link_group *lgr)
{
- spin_lock_bh(&smc_lgr_list.lock);
+ struct list_head *lgr_list;
+ spinlock_t *lgr_lock;
+
+ lgr_list = smc_lgr_list_head(lgr, &lgr_lock);
+ spin_lock_bh(lgr_lock);
/* do not use this link group for new connections */
- if (!list_empty(&lgr->list))
- list_del_init(&lgr->list);
- spin_unlock_bh(&smc_lgr_list.lock);
+ if (!list_empty(lgr_list))
+ list_del_init(lgr_list);
+ spin_unlock_bh(lgr_lock);
}
-/* terminate linkgroup abnormally */
-static void __smc_lgr_terminate(struct smc_link_group *lgr)
+static void smcd_unregister_all_dmbs(struct smc_link_group *lgr)
+{
+ int i;
+
+ for (i = 0; i < SMC_RMBE_SIZES; i++) {
+ struct smc_buf_desc *buf_desc;
+
+ list_for_each_entry(buf_desc, &lgr->rmbs[i], list) {
+ buf_desc->len += sizeof(struct smcd_cdc_msg);
+ smc_ism_unregister_dmb(lgr->smcd, buf_desc);
+ }
+ }
+}
+
+static void smc_sk_wake_ups(struct smc_sock *smc)
+{
+ smc->sk.sk_write_space(&smc->sk);
+ smc->sk.sk_data_ready(&smc->sk);
+ smc->sk.sk_state_change(&smc->sk);
+}
+
+/* kill a connection */
+static void smc_conn_kill(struct smc_connection *conn, bool soft)
+{
+ struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
+
+ if (conn->lgr->is_smcd && conn->lgr->peer_shutdown)
+ conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
+ else
+ smc_close_abort(conn);
+ conn->killed = 1;
+ smc->sk.sk_err = ECONNABORTED;
+ smc_sk_wake_ups(smc);
+ if (conn->lgr->is_smcd) {
+ smc_ism_unset_conn(conn);
+ if (soft)
+ tasklet_kill(&conn->rx_tsklet);
+ else
+ tasklet_unlock_wait(&conn->rx_tsklet);
+ } else {
+ smc_cdc_tx_dismiss_slots(conn);
+ }
+ smc_lgr_unregister_conn(conn);
+ smc_close_active_abort(smc);
+}
+
+static void smc_lgr_cleanup(struct smc_link_group *lgr)
+{
+ if (lgr->is_smcd) {
+ smc_ism_signal_shutdown(lgr);
+ smcd_unregister_all_dmbs(lgr);
+ smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
+ put_device(&lgr->smcd->dev);
+ } else {
+ struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
+
+ wake_up(&lnk->wr_reg_wait);
+ if (lnk->state != SMC_LNK_INACTIVE) {
+ smc_link_send_delete(lnk, false);
+ smc_llc_link_inactive(lnk);
+ }
+ }
+}
+
+/* terminate link group */
+static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
{
struct smc_connection *conn;
struct smc_sock *smc;
@@ -448,93 +593,174 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr)
if (lgr->terminating)
return; /* lgr already terminating */
+ if (!soft)
+ cancel_delayed_work_sync(&lgr->free_work);
lgr->terminating = 1;
- if (!list_empty(&lgr->list)) /* forget lgr */
- list_del_init(&lgr->list);
if (!lgr->is_smcd)
smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
- write_lock_bh(&lgr->conns_lock);
+ /* kill remaining link group connections */
+ read_lock_bh(&lgr->conns_lock);
node = rb_first(&lgr->conns_all);
while (node) {
+ read_unlock_bh(&lgr->conns_lock);
conn = rb_entry(node, struct smc_connection, alert_node);
smc = container_of(conn, struct smc_sock, conn);
- sock_hold(&smc->sk); /* sock_put in close work */
- conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
- __smc_lgr_unregister_conn(conn);
- conn->lgr = NULL;
- write_unlock_bh(&lgr->conns_lock);
- if (!schedule_work(&conn->close_work))
- sock_put(&smc->sk);
- write_lock_bh(&lgr->conns_lock);
+ sock_hold(&smc->sk); /* sock_put below */
+ lock_sock(&smc->sk);
+ smc_conn_kill(conn, soft);
+ release_sock(&smc->sk);
+ sock_put(&smc->sk); /* sock_hold above */
+ read_lock_bh(&lgr->conns_lock);
node = rb_first(&lgr->conns_all);
}
- write_unlock_bh(&lgr->conns_lock);
- if (!lgr->is_smcd)
- wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait);
- smc_lgr_schedule_free_work(lgr);
+ read_unlock_bh(&lgr->conns_lock);
+ smc_lgr_cleanup(lgr);
+ if (soft)
+ smc_lgr_schedule_free_work_fast(lgr);
+ else
+ smc_lgr_free(lgr);
}
-void smc_lgr_terminate(struct smc_link_group *lgr)
+/* unlink and terminate link group
+ * @soft: true if link group shutdown can take its time
+ * false if immediate link group shutdown is required
+ */
+void smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
{
- spin_lock_bh(&smc_lgr_list.lock);
- __smc_lgr_terminate(lgr);
- spin_unlock_bh(&smc_lgr_list.lock);
+ spinlock_t *lgr_lock;
+
+ smc_lgr_list_head(lgr, &lgr_lock);
+ spin_lock_bh(lgr_lock);
+ if (lgr->terminating) {
+ spin_unlock_bh(lgr_lock);
+ return; /* lgr already terminating */
+ }
+ if (!soft)
+ lgr->freeing = 1;
+ list_del_init(&lgr->list);
+ spin_unlock_bh(lgr_lock);
+ __smc_lgr_terminate(lgr, soft);
}
/* Called when IB port is terminated */
void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
{
struct smc_link_group *lgr, *l;
+ LIST_HEAD(lgr_free_list);
spin_lock_bh(&smc_lgr_list.lock);
list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
if (!lgr->is_smcd &&
lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev &&
- lgr->lnk[SMC_SINGLE_LINK].ibport == ibport)
- __smc_lgr_terminate(lgr);
+ lgr->lnk[SMC_SINGLE_LINK].ibport == ibport) {
+ list_move(&lgr->list, &lgr_free_list);
+ lgr->freeing = 1;
+ }
}
spin_unlock_bh(&smc_lgr_list.lock);
+
+ list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
+ list_del_init(&lgr->list);
+ __smc_lgr_terminate(lgr, false);
+ }
}
-/* Called when SMC-D device is terminated or peer is lost */
+/* Called when peer lgr shutdown (regularly or abnormally) is received */
void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
{
struct smc_link_group *lgr, *l;
LIST_HEAD(lgr_free_list);
/* run common cleanup function and build free list */
- spin_lock_bh(&smc_lgr_list.lock);
- list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
- if (lgr->is_smcd && lgr->smcd == dev &&
- (!peer_gid || lgr->peer_gid == peer_gid) &&
+ spin_lock_bh(&dev->lgr_lock);
+ list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) {
+ if ((!peer_gid || lgr->peer_gid == peer_gid) &&
(vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
- __smc_lgr_terminate(lgr);
+ if (peer_gid) /* peer triggered termination */
+ lgr->peer_shutdown = 1;
list_move(&lgr->list, &lgr_free_list);
}
}
- spin_unlock_bh(&smc_lgr_list.lock);
+ spin_unlock_bh(&dev->lgr_lock);
/* cancel the regular free workers and actually free lgrs */
list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
list_del_init(&lgr->list);
- cancel_delayed_work_sync(&lgr->free_work);
- if (!peer_gid && vlan == VLAN_VID_MASK) /* dev terminated? */
- smc_ism_signal_shutdown(lgr);
- smc_lgr_free(lgr);
+ schedule_work(&lgr->terminate_work);
+ }
+}
+
+/* Called when an SMCD device is removed or the smc module is unloaded */
+void smc_smcd_terminate_all(struct smcd_dev *smcd)
+{
+ struct smc_link_group *lgr, *lg;
+ LIST_HEAD(lgr_free_list);
+
+ spin_lock_bh(&smcd->lgr_lock);
+ list_splice_init(&smcd->lgr_list, &lgr_free_list);
+ list_for_each_entry(lgr, &lgr_free_list, list)
+ lgr->freeing = 1;
+ spin_unlock_bh(&smcd->lgr_lock);
+
+ list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
+ list_del_init(&lgr->list);
+ __smc_lgr_terminate(lgr, false);
+ }
+
+ if (atomic_read(&smcd->lgr_cnt))
+ wait_event(smcd->lgrs_deleted, !atomic_read(&smcd->lgr_cnt));
+}
+
+/* Called when an SMCR device is removed or the smc module is unloaded.
+ * If smcibdev is given, all SMCR link groups using this device are terminated.
+ * If smcibdev is NULL, all SMCR link groups are terminated.
+ */
+void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
+{
+ struct smc_link_group *lgr, *lg;
+ LIST_HEAD(lgr_free_list);
+
+ spin_lock_bh(&smc_lgr_list.lock);
+ if (!smcibdev) {
+ list_splice_init(&smc_lgr_list.list, &lgr_free_list);
+ list_for_each_entry(lgr, &lgr_free_list, list)
+ lgr->freeing = 1;
+ } else {
+ list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
+ if (lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev) {
+ list_move(&lgr->list, &lgr_free_list);
+ lgr->freeing = 1;
+ }
+ }
+ }
+ spin_unlock_bh(&smc_lgr_list.lock);
+
+ list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
+ list_del_init(&lgr->list);
+ __smc_lgr_terminate(lgr, false);
+ }
+
+ if (smcibdev) {
+ if (atomic_read(&smcibdev->lnk_cnt))
+ wait_event(smcibdev->lnks_deleted,
+ !atomic_read(&smcibdev->lnk_cnt));
+ } else {
+ if (atomic_read(&lgr_cnt))
+ wait_event(lgrs_deleted, !atomic_read(&lgr_cnt));
}
}
/* Determine vlan of internal TCP socket.
* @vlan_id: address to store the determined vlan id into
*/
-int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id)
+int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
{
struct dst_entry *dst = sk_dst_get(clcsock->sk);
struct net_device *ndev;
int i, nest_lvl, rc = 0;
- *vlan_id = 0;
+ ini->vlan_id = 0;
if (!dst) {
rc = -ENOTCONN;
goto out;
@@ -546,12 +772,12 @@ int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id)
ndev = dst->dev;
if (is_vlan_dev(ndev)) {
- *vlan_id = vlan_dev_vlan_id(ndev);
+ ini->vlan_id = vlan_dev_vlan_id(ndev);
goto out_rel;
}
rtnl_lock();
- nest_lvl = dev_get_nest_level(ndev);
+ nest_lvl = ndev->lower_level;
for (i = 0; i < nest_lvl; i++) {
struct list_head *lower = &ndev->adj_list.lower;
@@ -560,7 +786,7 @@ int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id)
lower = lower->next;
ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower);
if (is_vlan_dev(ndev)) {
- *vlan_id = vlan_dev_vlan_id(ndev);
+ ini->vlan_id = vlan_dev_vlan_id(ndev);
break;
}
}
@@ -594,39 +820,36 @@ static bool smcd_lgr_match(struct smc_link_group *lgr,
}
/* create a new SMC connection (and a new link group if necessary) */
-int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
- struct smc_ib_device *smcibdev, u8 ibport, u32 clcqpn,
- struct smc_clc_msg_local *lcl, struct smcd_dev *smcd,
- u64 peer_gid)
+int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
{
struct smc_connection *conn = &smc->conn;
- int local_contact = SMC_FIRST_CONTACT;
+ struct list_head *lgr_list;
struct smc_link_group *lgr;
- unsigned short vlan_id;
enum smc_lgr_role role;
+ spinlock_t *lgr_lock;
int rc = 0;
+ lgr_list = ini->is_smcd ? &ini->ism_dev->lgr_list : &smc_lgr_list.list;
+ lgr_lock = ini->is_smcd ? &ini->ism_dev->lgr_lock : &smc_lgr_list.lock;
+ ini->cln_first_contact = SMC_FIRST_CONTACT;
role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
- rc = smc_vlan_by_tcpsk(smc->clcsock, &vlan_id);
- if (rc)
- return rc;
-
- if ((role == SMC_CLNT) && srv_first_contact)
+ if (role == SMC_CLNT && ini->srv_first_contact)
/* create new link group as well */
goto create;
/* determine if an existing link group can be reused */
- spin_lock_bh(&smc_lgr_list.lock);
- list_for_each_entry(lgr, &smc_lgr_list.list, list) {
+ spin_lock_bh(lgr_lock);
+ list_for_each_entry(lgr, lgr_list, list) {
write_lock_bh(&lgr->conns_lock);
- if ((is_smcd ? smcd_lgr_match(lgr, smcd, peer_gid) :
- smcr_lgr_match(lgr, lcl, role, clcqpn)) &&
+ if ((ini->is_smcd ?
+ smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) :
+ smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) &&
!lgr->sync_err &&
- lgr->vlan_id == vlan_id &&
+ lgr->vlan_id == ini->vlan_id &&
(role == SMC_CLNT ||
lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {
/* link group found */
- local_contact = SMC_REUSE_CONTACT;
+ ini->cln_first_contact = SMC_REUSE_CONTACT;
conn->lgr = lgr;
smc_lgr_register_conn(conn); /* add smc conn to lgr */
if (delayed_work_pending(&lgr->free_work))
@@ -636,29 +859,31 @@ int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
}
write_unlock_bh(&lgr->conns_lock);
}
- spin_unlock_bh(&smc_lgr_list.lock);
+ spin_unlock_bh(lgr_lock);
- if (role == SMC_CLNT && !srv_first_contact &&
- (local_contact == SMC_FIRST_CONTACT)) {
+ if (role == SMC_CLNT && !ini->srv_first_contact &&
+ ini->cln_first_contact == SMC_FIRST_CONTACT) {
/* Server reuses a link group, but Client wants to start
* a new one
* send out_of_sync decline, reason synchr. error
*/
- return -ENOLINK;
+ return SMC_CLC_DECL_SYNCERR;
}
create:
- if (local_contact == SMC_FIRST_CONTACT) {
- rc = smc_lgr_create(smc, is_smcd, smcibdev, ibport,
- lcl->id_for_peer, vlan_id, smcd, peer_gid);
+ if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
+ rc = smc_lgr_create(smc, ini);
if (rc)
goto out;
+ lgr = conn->lgr;
+ write_lock_bh(&lgr->conns_lock);
smc_lgr_register_conn(conn); /* add smc conn to lgr */
+ write_unlock_bh(&lgr->conns_lock);
}
conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
conn->urg_state = SMC_URG_READ;
- if (is_smcd) {
+ if (ini->is_smcd) {
conn->rx_off = sizeof(struct smcd_cdc_msg);
smcd_cdc_rx_init(conn); /* init tasklet for this conn */
}
@@ -667,7 +892,7 @@ create:
#endif
out:
- return rc ? rc : local_contact;
+ return rc;
}
/* convert the RMB size into the compressed notation - minimum 16K.
@@ -1022,29 +1247,63 @@ int smc_rmb_rtoken_handling(struct smc_connection *conn,
return 0;
}
-/* Called (from smc_exit) when module is removed */
-void smc_core_exit(void)
+static void smc_core_going_away(void)
{
- struct smc_link_group *lgr, *lg;
- LIST_HEAD(lgr_freeing_list);
+ struct smc_ib_device *smcibdev;
+ struct smcd_dev *smcd;
- spin_lock_bh(&smc_lgr_list.lock);
- if (!list_empty(&smc_lgr_list.list))
- list_splice_init(&smc_lgr_list.list, &lgr_freeing_list);
- spin_unlock_bh(&smc_lgr_list.lock);
- list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) {
- list_del_init(&lgr->list);
- if (!lgr->is_smcd) {
- struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
+ spin_lock(&smc_ib_devices.lock);
+ list_for_each_entry(smcibdev, &smc_ib_devices.list, list) {
+ int i;
- if (lnk->state == SMC_LNK_ACTIVE)
- smc_llc_send_delete_link(lnk, SMC_LLC_REQ,
- false);
- smc_llc_link_inactive(lnk);
- }
- cancel_delayed_work_sync(&lgr->free_work);
- if (lgr->is_smcd)
- smc_ism_signal_shutdown(lgr);
- smc_lgr_free(lgr); /* free link group */
+ for (i = 0; i < SMC_MAX_PORTS; i++)
+ set_bit(i, smcibdev->ports_going_away);
}
+ spin_unlock(&smc_ib_devices.lock);
+
+ spin_lock(&smcd_dev_list.lock);
+ list_for_each_entry(smcd, &smcd_dev_list.list, list) {
+ smcd->going_away = 1;
+ }
+ spin_unlock(&smcd_dev_list.lock);
+}
+
+/* Clean up all SMC link groups */
+static void smc_lgrs_shutdown(void)
+{
+ struct smcd_dev *smcd;
+
+ smc_core_going_away();
+
+ smc_smcr_terminate_all(NULL);
+
+ spin_lock(&smcd_dev_list.lock);
+ list_for_each_entry(smcd, &smcd_dev_list.list, list)
+ smc_smcd_terminate_all(smcd);
+ spin_unlock(&smcd_dev_list.lock);
+}
+
+static int smc_core_reboot_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ smc_lgrs_shutdown();
+
+ return 0;
+}
+
+static struct notifier_block smc_reboot_notifier = {
+ .notifier_call = smc_core_reboot_event,
+};
+
+int __init smc_core_init(void)
+{
+ atomic_set(&lgr_cnt, 0);
+ return register_reboot_notifier(&smc_reboot_notifier);
+}
+
+/* Called (from smc_exit) when module is removed */
+void smc_core_exit(void)
+{
+ unregister_reboot_notifier(&smc_reboot_notifier);
+ smc_lgrs_shutdown();
}
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 8806d2afa6ed..c472e12951d1 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -202,8 +202,11 @@ struct smc_link_group {
u8 id[SMC_LGR_ID_SIZE]; /* unique lgr id */
struct delayed_work free_work; /* delayed freeing of an lgr */
+ struct work_struct terminate_work; /* abnormal lgr termination */
u8 sync_err : 1; /* lgr no longer fits to peer */
u8 terminating : 1;/* lgr is terminating */
+ u8 freefast : 1; /* free worker scheduled fast */
+ u8 freeing : 1; /* lgr is being freed */
bool is_smcd; /* SMC-R or SMC-D */
union {
@@ -225,10 +228,30 @@ struct smc_link_group {
/* Peer GID (remote) */
struct smcd_dev *smcd;
/* ISM device for VLAN reg. */
+ u8 peer_shutdown : 1;
+ /* peer triggered shutdownn */
};
};
};
+struct smc_clc_msg_local;
+
+struct smc_init_info {
+ u8 is_smcd;
+ unsigned short vlan_id;
+ int srv_first_contact;
+ int cln_first_contact;
+ /* SMC-R */
+ struct smc_clc_msg_local *ib_lcl;
+ struct smc_ib_device *ib_dev;
+ u8 ib_gid[SMC_GID_SIZE];
+ u8 ib_port;
+ u32 ib_clcqpn;
+ /* SMC-D */
+ u64 ism_gid;
+ struct smcd_dev *ism_dev;
+};
+
/* Find the connection associated with the given alert token in the link group.
* To use rbtrees we have to implement our own search core.
* Requires @conns_lock
@@ -262,15 +285,23 @@ static inline struct smc_connection *smc_lgr_find_conn(
return res;
}
+static inline void smc_lgr_terminate_sched(struct smc_link_group *lgr)
+{
+ if (!lgr->terminating && !lgr->freeing)
+ schedule_work(&lgr->terminate_work);
+}
+
struct smc_sock;
struct smc_clc_msg_accept_confirm;
struct smc_clc_msg_local;
void smc_lgr_forget(struct smc_link_group *lgr);
-void smc_lgr_terminate(struct smc_link_group *lgr);
+void smc_lgr_terminate(struct smc_link_group *lgr, bool soft);
void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport);
void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid,
unsigned short vlan);
+void smc_smcd_terminate_all(struct smcd_dev *dev);
+void smc_smcr_terminate_all(struct smc_ib_device *smcibdev);
int smc_buf_create(struct smc_sock *smc, bool is_smcd);
int smc_uncompress_bufsize(u8 compressed);
int smc_rmb_rtoken_handling(struct smc_connection *conn,
@@ -281,15 +312,13 @@ void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn);
void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);
void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);
void smc_rmb_sync_sg_for_device(struct smc_connection *conn);
-int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id);
+int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini);
void smc_conn_free(struct smc_connection *conn);
-int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
- struct smc_ib_device *smcibdev, u8 ibport, u32 clcqpn,
- struct smc_clc_msg_local *lcl, struct smcd_dev *smcd,
- u64 peer_gid);
+int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini);
void smcd_conn_free(struct smc_connection *conn);
void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr);
+int smc_core_init(void);
void smc_core_exit(void);
static inline struct smc_link_group *smc_get_lgr(struct smc_link *link)
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index 371b4cf31fcd..f38727ecf8b2 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 53f429c04843..548632621f4b 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -15,6 +15,7 @@
#include <linux/random.h>
#include <linux/workqueue.h>
#include <linux/scatterlist.h>
+#include <linux/wait.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_cache.h>
@@ -146,18 +147,13 @@ out:
static int smc_ib_fill_mac(struct smc_ib_device *smcibdev, u8 ibport)
{
const struct ib_gid_attr *attr;
- int rc = 0;
+ int rc;
attr = rdma_get_gid_attr(smcibdev->ibdev, ibport, 0);
if (IS_ERR(attr))
return -ENODEV;
- if (attr->ndev)
- memcpy(smcibdev->mac[ibport - 1], attr->ndev->dev_addr,
- ETH_ALEN);
- else
- rc = -ENODEV;
-
+ rc = rdma_read_gid_l2_fields(attr, NULL, smcibdev->mac[ibport - 1]);
rdma_put_gid_attr(attr);
return rc;
}
@@ -185,6 +181,7 @@ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
unsigned short vlan_id, u8 gid[], u8 *sgid_index)
{
const struct ib_gid_attr *attr;
+ const struct net_device *ndev;
int i;
for (i = 0; i < smcibdev->pattr[ibport - 1].gid_tbl_len; i++) {
@@ -192,11 +189,14 @@ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
if (IS_ERR(attr))
continue;
- if (attr->ndev &&
+ rcu_read_lock();
+ ndev = rdma_read_gid_attr_ndev_rcu(attr);
+ if (!IS_ERR(ndev) &&
((!vlan_id && !is_vlan_dev(attr->ndev)) ||
(vlan_id && is_vlan_dev(attr->ndev) &&
vlan_dev_vlan_id(attr->ndev) == vlan_id)) &&
attr->gid_type == IB_GID_TYPE_ROCE) {
+ rcu_read_unlock();
if (gid)
memcpy(gid, &attr->gid, SMC_GID_SIZE);
if (sgid_index)
@@ -204,6 +204,7 @@ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
rdma_put_gid_attr(attr);
return 0;
}
+ rcu_read_unlock();
rdma_put_gid_attr(attr);
}
return -ENODEV;
@@ -242,8 +243,12 @@ static void smc_ib_port_event_work(struct work_struct *work)
for_each_set_bit(port_idx, &smcibdev->port_event_mask, SMC_MAX_PORTS) {
smc_ib_remember_port_attr(smcibdev, port_idx + 1);
clear_bit(port_idx, &smcibdev->port_event_mask);
- if (!smc_ib_port_active(smcibdev, port_idx + 1))
+ if (!smc_ib_port_active(smcibdev, port_idx + 1)) {
+ set_bit(port_idx, smcibdev->ports_going_away);
smc_port_terminate(smcibdev, port_idx + 1);
+ } else {
+ clear_bit(port_idx, smcibdev->ports_going_away);
+ }
}
}
@@ -259,8 +264,10 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler,
switch (ibevent->event) {
case IB_EVENT_DEVICE_FATAL:
/* terminate all ports on device */
- for (port_idx = 0; port_idx < SMC_MAX_PORTS; port_idx++)
+ for (port_idx = 0; port_idx < SMC_MAX_PORTS; port_idx++) {
set_bit(port_idx, &smcibdev->port_event_mask);
+ set_bit(port_idx, smcibdev->ports_going_away);
+ }
schedule_work(&smcibdev->port_event_work);
break;
case IB_EVENT_PORT_ERR:
@@ -269,6 +276,10 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler,
port_idx = ibevent->element.port_num - 1;
if (port_idx < SMC_MAX_PORTS) {
set_bit(port_idx, &smcibdev->port_event_mask);
+ if (ibevent->event == IB_EVENT_PORT_ERR)
+ set_bit(port_idx, smcibdev->ports_going_away);
+ else if (ibevent->event == IB_EVENT_PORT_ACTIVE)
+ clear_bit(port_idx, smcibdev->ports_going_away);
schedule_work(&smcibdev->port_event_work);
}
break;
@@ -307,6 +318,7 @@ static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv)
port_idx = ibevent->element.qp->port - 1;
if (port_idx < SMC_MAX_PORTS) {
set_bit(port_idx, &smcibdev->port_event_mask);
+ set_bit(port_idx, smcibdev->ports_going_away);
schedule_work(&smcibdev->port_event_work);
}
break;
@@ -509,9 +521,9 @@ static void smc_ib_cleanup_per_ibdev(struct smc_ib_device *smcibdev)
if (!smcibdev->initialized)
return;
smcibdev->initialized = 0;
- smc_wr_remove_dev(smcibdev);
ib_destroy_cq(smcibdev->roce_cq_recv);
ib_destroy_cq(smcibdev->roce_cq_send);
+ smc_wr_remove_dev(smcibdev);
}
static struct ib_client smc_ib_client;
@@ -532,7 +544,8 @@ static void smc_ib_add_dev(struct ib_device *ibdev)
smcibdev->ibdev = ibdev;
INIT_WORK(&smcibdev->port_event_work, smc_ib_port_event_work);
-
+ atomic_set(&smcibdev->lnk_cnt, 0);
+ init_waitqueue_head(&smcibdev->lnks_deleted);
spin_lock(&smc_ib_devices.lock);
list_add_tail(&smcibdev->list, &smc_ib_devices.list);
spin_unlock(&smc_ib_devices.lock);
@@ -554,7 +567,7 @@ static void smc_ib_add_dev(struct ib_device *ibdev)
schedule_work(&smcibdev->port_event_work);
}
-/* callback function for ib_register_client() */
+/* callback function for ib_unregister_client() */
static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data)
{
struct smc_ib_device *smcibdev;
@@ -564,6 +577,7 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data)
spin_lock(&smc_ib_devices.lock);
list_del_init(&smcibdev->list); /* remove from smc_ib_devices */
spin_unlock(&smc_ib_devices.lock);
+ smc_smcr_terminate_all(smcibdev);
smc_ib_cleanup_per_ibdev(smcibdev);
ib_unregister_event_handler(&smcibdev->event_handler);
kfree(smcibdev);
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index da60ab9e8d70..255db87547d3 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -14,6 +14,7 @@
#include <linux/interrupt.h>
#include <linux/if_ether.h>
+#include <linux/wait.h>
#include <rdma/ib_verbs.h>
#include <net/smc.h>
@@ -47,6 +48,9 @@ struct smc_ib_device { /* ib-device infos for smc */
u8 initialized : 1; /* ib dev CQ, evthdl done */
struct work_struct port_event_work;
unsigned long port_event_mask;
+ DECLARE_BITMAP(ports_going_away, SMC_MAX_PORTS);
+ atomic_t lnk_cnt; /* number of links on ibdev */
+ wait_queue_head_t lnks_deleted; /* wait 4 removal of all links*/
};
struct smc_buf_desc;
diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index 2fff79db1a59..5c4727d5066e 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -146,6 +146,10 @@ out:
int smc_ism_unregister_dmb(struct smcd_dev *smcd, struct smc_buf_desc *dmb_desc)
{
struct smcd_dmb dmb;
+ int rc = 0;
+
+ if (!dmb_desc->dma_addr)
+ return rc;
memset(&dmb, 0, sizeof(dmb));
dmb.dmb_tok = dmb_desc->token;
@@ -153,7 +157,13 @@ int smc_ism_unregister_dmb(struct smcd_dev *smcd, struct smc_buf_desc *dmb_desc)
dmb.cpu_addr = dmb_desc->cpu_addr;
dmb.dma_addr = dmb_desc->dma_addr;
dmb.dmb_len = dmb_desc->len;
- return smcd->ops->unregister_dmb(smcd, &dmb);
+ rc = smcd->ops->unregister_dmb(smcd, &dmb);
+ if (!rc || rc == ISM_ERROR) {
+ dmb_desc->cpu_addr = NULL;
+ dmb_desc->dma_addr = 0;
+ }
+
+ return rc;
}
int smc_ism_register_dmb(struct smc_link_group *lgr, int dmb_len,
@@ -226,6 +236,9 @@ int smc_ism_signal_shutdown(struct smc_link_group *lgr)
int rc;
union smcd_sw_event_info ev_info;
+ if (lgr->peer_shutdown)
+ return 0;
+
memcpy(ev_info.uid, lgr->id, SMC_LGR_ID_SIZE);
ev_info.vlan_id = lgr->vlan_id;
ev_info.code = ISM_EVENT_REQUEST;
@@ -286,9 +299,17 @@ struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
smc_pnetid_by_dev_port(parent, 0, smcd->pnetid);
spin_lock_init(&smcd->lock);
+ spin_lock_init(&smcd->lgr_lock);
INIT_LIST_HEAD(&smcd->vlan);
+ INIT_LIST_HEAD(&smcd->lgr_list);
+ init_waitqueue_head(&smcd->lgrs_deleted);
smcd->event_wq = alloc_ordered_workqueue("ism_evt_wq-%s)",
WQ_MEM_RECLAIM, name);
+ if (!smcd->event_wq) {
+ kfree(smcd->conn);
+ kfree(smcd);
+ return NULL;
+ }
return smcd;
}
EXPORT_SYMBOL_GPL(smcd_alloc_dev);
@@ -306,11 +327,12 @@ EXPORT_SYMBOL_GPL(smcd_register_dev);
void smcd_unregister_dev(struct smcd_dev *smcd)
{
spin_lock(&smcd_dev_list.lock);
- list_del(&smcd->list);
+ list_del_init(&smcd->list);
spin_unlock(&smcd_dev_list.lock);
+ smcd->going_away = 1;
+ smc_smcd_terminate_all(smcd);
flush_workqueue(smcd->event_wq);
destroy_workqueue(smcd->event_wq);
- smc_smcd_terminate(smcd, 0, VLAN_VID_MASK);
device_del(&smcd->dev);
}
@@ -337,6 +359,8 @@ void smcd_handle_event(struct smcd_dev *smcd, struct smcd_event *event)
{
struct smc_ism_event_work *wrk;
+ if (smcd->going_away)
+ return;
/* copy event to event work queue, and let it be handled there */
wrk = kmalloc(sizeof(*wrk), GFP_ATOMIC);
if (!wrk)
@@ -362,7 +386,7 @@ void smcd_handle_irq(struct smcd_dev *smcd, unsigned int dmbno)
spin_lock_irqsave(&smcd->lock, flags);
conn = smcd->conn[dmbno];
- if (conn)
+ if (conn && !conn->killed)
tasklet_schedule(&conn->rx_tsklet);
spin_unlock_irqrestore(&smcd->lock, flags);
}
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 4fd60c522802..a9f6431dd69a 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -475,7 +475,7 @@ static void smc_llc_rx_delete_link(struct smc_link *link,
smc_llc_prep_delete_link(llc, link, SMC_LLC_RESP, true);
}
smc_llc_send_message(link, llc, sizeof(*llc));
- smc_lgr_schedule_free_work_fast(lgr);
+ smc_lgr_terminate_sched(lgr);
}
}
@@ -614,7 +614,7 @@ static void smc_llc_testlink_work(struct work_struct *work)
rc = wait_for_completion_interruptible_timeout(&link->llc_testlink_resp,
SMC_LLC_WAIT_TIME);
if (rc <= 0) {
- smc_lgr_terminate(smc_get_lgr(link));
+ smc_lgr_terminate(smc_get_lgr(link), true);
return;
}
next_interval = link->llc_testlink_time;
@@ -656,6 +656,7 @@ void smc_llc_link_active(struct smc_link *link, int testlink_time)
void smc_llc_link_deleting(struct smc_link *link)
{
link->state = SMC_LNK_DELETING;
+ smc_wr_wakeup_tx_wait(link);
}
/* called in tasklet context */
@@ -663,6 +664,8 @@ void smc_llc_link_inactive(struct smc_link *link)
{
link->state = SMC_LNK_INACTIVE;
cancel_delayed_work(&link->llc_testlink_wrk);
+ smc_wr_wakeup_reg_wait(link);
+ smc_wr_wakeup_tx_wait(link);
}
/* called in worker context */
@@ -695,9 +698,11 @@ int smc_llc_do_confirm_rkey(struct smc_link *link,
int smc_llc_do_delete_rkey(struct smc_link *link,
struct smc_buf_desc *rmb_desc)
{
- int rc;
+ int rc = 0;
mutex_lock(&link->llc_delete_rkey_mutex);
+ if (link->state != SMC_LNK_ACTIVE)
+ goto out;
reinit_completion(&link->llc_delete_rkey);
rc = smc_llc_send_delete_rkey(link, rmb_desc);
if (rc)
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 8d2f6296279c..82dedf052d86 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -26,6 +26,7 @@
#include "smc_pnet.h"
#include "smc_ib.h"
#include "smc_ism.h"
+#include "smc_core.h"
#define SMC_ASCII_BLANK 32
@@ -375,8 +376,6 @@ static int smc_pnet_fill_entry(struct net *net,
return 0;
error:
- if (pnetelem->ndev)
- dev_put(pnetelem->ndev);
return rc;
}
@@ -603,35 +602,36 @@ static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = genl_info_net(info);
- return smc_pnet_remove_by_pnetid(net, NULL);
+ smc_pnet_remove_by_pnetid(net, NULL);
+ return 0;
}
/* SMC_PNETID generic netlink operation definition */
static const struct genl_ops smc_pnet_ops[] = {
{
.cmd = SMC_PNETID_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.flags = GENL_ADMIN_PERM,
- .policy = smc_pnet_policy,
.doit = smc_pnet_get,
.dumpit = smc_pnet_dump,
.start = smc_pnet_dump_start
},
{
.cmd = SMC_PNETID_ADD,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.flags = GENL_ADMIN_PERM,
- .policy = smc_pnet_policy,
.doit = smc_pnet_add
},
{
.cmd = SMC_PNETID_DEL,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.flags = GENL_ADMIN_PERM,
- .policy = smc_pnet_policy,
.doit = smc_pnet_del
},
{
.cmd = SMC_PNETID_FLUSH,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.flags = GENL_ADMIN_PERM,
- .policy = smc_pnet_policy,
.doit = smc_pnet_flush
}
};
@@ -642,6 +642,7 @@ static struct genl_family smc_pnet_nl_family __ro_after_init = {
.name = SMCR_GENL_FAMILY_NAME,
.version = SMCR_GENL_FAMILY_VERSION,
.maxattr = SMC_PNETID_MAX,
+ .policy = smc_pnet_policy,
.netnsok = true,
.module = THIS_MODULE,
.ops = smc_pnet_ops,
@@ -715,7 +716,7 @@ static struct net_device *pnet_find_base_ndev(struct net_device *ndev)
int i, nest_lvl;
rtnl_lock();
- nest_lvl = dev_get_nest_level(ndev);
+ nest_lvl = ndev->lower_level;
for (i = 0; i < nest_lvl; i++) {
struct list_head *lower = &ndev->adj_list.lower;
@@ -758,8 +759,7 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev,
* IB device and port
*/
static void smc_pnet_find_rdma_dev(struct net_device *netdev,
- struct smc_ib_device **smcibdev,
- u8 *ibport, unsigned short vlan_id, u8 gid[])
+ struct smc_init_info *ini)
{
struct smc_ib_device *ibdev;
@@ -779,10 +779,11 @@ static void smc_pnet_find_rdma_dev(struct net_device *netdev,
dev_put(ndev);
if (netdev == ndev &&
smc_ib_port_active(ibdev, i) &&
- !smc_ib_determine_gid(ibdev, i, vlan_id, gid,
- NULL)) {
- *smcibdev = ibdev;
- *ibport = i;
+ !test_bit(i - 1, ibdev->ports_going_away) &&
+ !smc_ib_determine_gid(ibdev, i, ini->vlan_id,
+ ini->ib_gid, NULL)) {
+ ini->ib_dev = ibdev;
+ ini->ib_port = i;
break;
}
}
@@ -797,9 +798,7 @@ static void smc_pnet_find_rdma_dev(struct net_device *netdev,
* If nothing found, try to use handshake device
*/
static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
- struct smc_ib_device **smcibdev,
- u8 *ibport, unsigned short vlan_id,
- u8 gid[])
+ struct smc_init_info *ini)
{
u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
struct smc_ib_device *ibdev;
@@ -809,7 +808,7 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
ndev_pnetid) &&
smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) {
- smc_pnet_find_rdma_dev(ndev, smcibdev, ibport, vlan_id, gid);
+ smc_pnet_find_rdma_dev(ndev, ini);
return; /* pnetid could not be determined */
}
@@ -820,10 +819,11 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
continue;
if (smc_pnet_match(ibdev->pnetid[i - 1], ndev_pnetid) &&
smc_ib_port_active(ibdev, i) &&
- !smc_ib_determine_gid(ibdev, i, vlan_id, gid,
- NULL)) {
- *smcibdev = ibdev;
- *ibport = i;
+ !test_bit(i - 1, ibdev->ports_going_away) &&
+ !smc_ib_determine_gid(ibdev, i, ini->vlan_id,
+ ini->ib_gid, NULL)) {
+ ini->ib_dev = ibdev;
+ ini->ib_port = i;
goto out;
}
}
@@ -833,7 +833,7 @@ out:
}
static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
- struct smcd_dev **smcismdev)
+ struct smc_init_info *ini)
{
u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
struct smcd_dev *ismdev;
@@ -846,8 +846,9 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
spin_lock(&smcd_dev_list.lock);
list_for_each_entry(ismdev, &smcd_dev_list.list, list) {
- if (smc_pnet_match(ismdev->pnetid, ndev_pnetid)) {
- *smcismdev = ismdev;
+ if (smc_pnet_match(ismdev->pnetid, ndev_pnetid) &&
+ !ismdev->going_away) {
+ ini->ism_dev = ismdev;
break;
}
}
@@ -858,21 +859,18 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
* determine ib_device and port belonging to used internal TCP socket
* ethernet interface.
*/
-void smc_pnet_find_roce_resource(struct sock *sk,
- struct smc_ib_device **smcibdev, u8 *ibport,
- unsigned short vlan_id, u8 gid[])
+void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini)
{
struct dst_entry *dst = sk_dst_get(sk);
- *smcibdev = NULL;
- *ibport = 0;
-
+ ini->ib_dev = NULL;
+ ini->ib_port = 0;
if (!dst)
goto out;
if (!dst->dev)
goto out_rel;
- smc_pnet_find_roce_by_pnetid(dst->dev, smcibdev, ibport, vlan_id, gid);
+ smc_pnet_find_roce_by_pnetid(dst->dev, ini);
out_rel:
dst_release(dst);
@@ -880,17 +878,17 @@ out:
return;
}
-void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev)
+void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini)
{
struct dst_entry *dst = sk_dst_get(sk);
- *smcismdev = NULL;
+ ini->ism_dev = NULL;
if (!dst)
goto out;
if (!dst->dev)
goto out_rel;
- smc_pnet_find_ism_by_pnetid(dst->dev, smcismdev);
+ smc_pnet_find_ism_by_pnetid(dst->dev, ini);
out_rel:
dst_release(dst);
diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h
index 5eac42fb45d0..4564e4d69c2e 100644
--- a/net/smc/smc_pnet.h
+++ b/net/smc/smc_pnet.h
@@ -18,6 +18,7 @@
struct smc_ib_device;
struct smcd_dev;
+struct smc_init_info;
/**
* struct smc_pnettable - SMC PNET table anchor
@@ -43,9 +44,7 @@ int smc_pnet_init(void) __init;
int smc_pnet_net_init(struct net *net);
void smc_pnet_exit(void);
void smc_pnet_net_exit(struct net *net);
-void smc_pnet_find_roce_resource(struct sock *sk,
- struct smc_ib_device **smcibdev, u8 *ibport,
- unsigned short vlan_id, u8 gid[]);
-void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev);
+void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini);
+void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini);
#endif
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index 413a6abf227e..39d7b34d06d2 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -201,6 +201,8 @@ int smc_rx_wait(struct smc_sock *smc, long *timeo,
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct smc_connection *conn = &smc->conn;
+ struct smc_cdc_conn_state_flags *cflags =
+ &conn->local_tx_ctrl.conn_state_flags;
struct sock *sk = &smc->sk;
int rc;
@@ -210,9 +212,10 @@ int smc_rx_wait(struct smc_sock *smc, long *timeo,
add_wait_queue(sk_sleep(sk), &wait);
rc = sk_wait_event(sk, timeo,
sk->sk_err ||
+ cflags->peer_conn_abort ||
sk->sk_shutdown & RCV_SHUTDOWN ||
- fcrit(conn) ||
- smc_cdc_rxed_any_close_or_senddone(conn),
+ conn->killed ||
+ fcrit(conn),
&wait);
remove_wait_queue(sk_sleep(sk), &wait);
sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
@@ -262,6 +265,18 @@ static int smc_rx_recv_urg(struct smc_sock *smc, struct msghdr *msg, int len,
return -EAGAIN;
}
+static bool smc_rx_recvmsg_data_available(struct smc_sock *smc)
+{
+ struct smc_connection *conn = &smc->conn;
+
+ if (smc_rx_data_available(conn))
+ return true;
+ else if (conn->urg_state == SMC_URG_VALID)
+ /* we received a single urgent Byte - skip */
+ smc_rx_update_cons(smc, 0);
+ return false;
+}
+
/* smc_rx_recvmsg - receive data from RMBE
* @msg: copy data to receive buffer
* @pipe: copy data to pipe if set - indicates splice() call
@@ -303,16 +318,20 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg,
if (read_done >= target || (pipe && read_done))
break;
- if (atomic_read(&conn->bytes_to_rcv))
+ if (conn->killed)
+ break;
+
+ if (smc_rx_recvmsg_data_available(smc))
goto copy;
- else if (conn->urg_state == SMC_URG_VALID)
- /* we received a single urgent Byte - skip */
- smc_rx_update_cons(smc, 0);
- if (sk->sk_shutdown & RCV_SHUTDOWN ||
- smc_cdc_rxed_any_close_or_senddone(conn) ||
- conn->local_tx_ctrl.conn_state_flags.peer_conn_abort)
+ if (sk->sk_shutdown & RCV_SHUTDOWN) {
+ /* smc_cdc_msg_recv_action() could have run after
+ * above smc_rx_recvmsg_data_available()
+ */
+ if (smc_rx_recvmsg_data_available(smc))
+ goto copy;
break;
+ }
if (read_done) {
if (sk->sk_err ||
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index f0de323d15d6..0d42e7716b91 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -76,18 +76,17 @@ static int smc_tx_wait(struct smc_sock *smc, int flags)
DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct smc_connection *conn = &smc->conn;
struct sock *sk = &smc->sk;
- bool noblock;
long timeo;
int rc = 0;
/* similar to sk_stream_wait_memory */
timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
- noblock = timeo ? false : true;
add_wait_queue(sk_sleep(sk), &wait);
while (1) {
sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
if (sk->sk_err ||
(sk->sk_shutdown & SEND_SHUTDOWN) ||
+ conn->killed ||
conn->local_tx_ctrl.conn_state_flags.peer_done_writing) {
rc = -EPIPE;
break;
@@ -97,8 +96,8 @@ static int smc_tx_wait(struct smc_sock *smc, int flags)
break;
}
if (!timeo) {
- if (noblock)
- set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ /* ensure EPOLLOUT is subsequently generated */
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
rc = -EAGAIN;
break;
}
@@ -157,7 +156,7 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
return -ENOTCONN;
if (smc->sk.sk_shutdown & SEND_SHUTDOWN ||
(smc->sk.sk_err == ECONNABORTED) ||
- conn->local_tx_ctrl.conn_state_flags.peer_conn_abort)
+ conn->killed)
return -EPIPE;
if (smc_cdc_rxed_any_close(conn))
return send_done ?: -ECONNRESET;
@@ -284,10 +283,8 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,
peer_rmbe_offset;
rdma_wr->rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey;
rc = ib_post_send(link->roce_qp, &rdma_wr->wr, NULL);
- if (rc) {
- conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
- smc_lgr_terminate(lgr);
- }
+ if (rc)
+ smc_lgr_terminate(lgr, true);
return rc;
}
@@ -497,10 +494,11 @@ static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
if (smc->sk.sk_err == ECONNABORTED)
return sock_error(&smc->sk);
+ if (conn->killed)
+ return -EPIPE;
rc = 0;
- if (conn->alert_token_local) /* connection healthy */
- mod_delayed_work(system_wq, &conn->tx_work,
- SMC_TX_WORK_DELAY);
+ mod_delayed_work(system_wq, &conn->tx_work,
+ SMC_TX_WORK_DELAY);
}
return rc;
}
@@ -549,6 +547,9 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
{
int rc;
+ if (conn->killed ||
+ conn->local_rx_ctrl.conn_state_flags.peer_conn_abort)
+ return -EPIPE; /* connection being aborted */
if (conn->lgr->is_smcd)
rc = smcd_tx_sndbuf_nonempty(conn);
else
@@ -575,9 +576,7 @@ void smc_tx_work(struct work_struct *work)
int rc;
lock_sock(&smc->sk);
- if (smc->sk.sk_err ||
- !conn->alert_token_local ||
- conn->local_rx_ctrl.conn_state_flags.peer_conn_abort)
+ if (smc->sk.sk_err)
goto out;
rc = smc_tx_sndbuf_nonempty(conn);
@@ -610,8 +609,11 @@ void smc_tx_consumer_update(struct smc_connection *conn, bool force)
((to_confirm > conn->rmbe_update_limit) &&
((sender_free <= (conn->rmb_desc->len / 2)) ||
conn->local_rx_ctrl.prod_flags.write_blocked))) {
+ if (conn->killed ||
+ conn->local_rx_ctrl.conn_state_flags.peer_conn_abort)
+ return;
if ((smc_cdc_get_slot_and_msg_send(conn) < 0) &&
- conn->alert_token_local) { /* connection healthy */
+ !conn->killed) {
schedule_delayed_work(&conn->tx_work,
SMC_TX_WORK_DELAY);
return;
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index 253aa75dc2b6..337ee52ad3d3 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -50,6 +50,26 @@ struct smc_wr_tx_pend { /* control data for a pending send request */
/*------------------------------- completion --------------------------------*/
+/* returns true if at least one tx work request is pending on the given link */
+static inline bool smc_wr_is_tx_pend(struct smc_link *link)
+{
+ if (find_first_bit(link->wr_tx_mask, link->wr_tx_cnt) !=
+ link->wr_tx_cnt) {
+ return true;
+ }
+ return false;
+}
+
+/* wait till all pending tx work requests on the given link are completed */
+static inline int smc_wr_tx_wait_no_pending_sends(struct smc_link *link)
+{
+ if (wait_event_timeout(link->wr_tx_wait, !smc_wr_is_tx_pend(link),
+ SMC_WR_TX_WAIT_PENDING_TIME))
+ return 0;
+ else /* timeout */
+ return -EPIPE;
+}
+
static inline int smc_wr_tx_find_pending_index(struct smc_link *link, u64 wr_id)
{
u32 i;
@@ -75,7 +95,7 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
link->wr_reg_state = FAILED;
else
link->wr_reg_state = CONFIRMED;
- wake_up(&link->wr_reg_wait);
+ smc_wr_wakeup_reg_wait(link);
return;
}
@@ -101,7 +121,7 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
clear_bit(i, link->wr_tx_mask);
}
/* terminate connections of this link group abnormally */
- smc_lgr_terminate(smc_get_lgr(link));
+ smc_lgr_terminate_sched(smc_get_lgr(link));
}
if (pnd_snd.handler)
pnd_snd.handler(&pnd_snd.priv, link, wc->status);
@@ -171,6 +191,7 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
struct smc_rdma_wr **wr_rdma_buf,
struct smc_wr_tx_pend_priv **wr_pend_priv)
{
+ struct smc_link_group *lgr = smc_get_lgr(link);
struct smc_wr_tx_pend *wr_pend;
u32 idx = link->wr_tx_cnt;
struct ib_send_wr *wr_ib;
@@ -179,19 +200,20 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
*wr_buf = NULL;
*wr_pend_priv = NULL;
- if (in_softirq()) {
+ if (in_softirq() || lgr->terminating) {
rc = smc_wr_tx_get_free_slot_index(link, &idx);
if (rc)
return rc;
} else {
- rc = wait_event_timeout(
+ rc = wait_event_interruptible_timeout(
link->wr_tx_wait,
link->state == SMC_LNK_INACTIVE ||
+ lgr->terminating ||
(smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY),
SMC_WR_TX_WAIT_FREE_SLOT_TIME);
if (!rc) {
/* timeout - terminate connections */
- smc_lgr_terminate(smc_get_lgr(link));
+ smc_lgr_terminate_sched(lgr);
return -EPIPE;
}
if (idx == link->wr_tx_cnt)
@@ -227,6 +249,7 @@ int smc_wr_tx_put_slot(struct smc_link *link,
memset(&link->wr_tx_bufs[idx], 0,
sizeof(link->wr_tx_bufs[idx]));
test_and_clear_bit(idx, link->wr_tx_mask);
+ wake_up(&link->wr_tx_wait);
return 1;
}
@@ -247,7 +270,7 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx], NULL);
if (rc) {
smc_wr_tx_put_slot(link, priv);
- smc_lgr_terminate(smc_get_lgr(link));
+ smc_lgr_terminate_sched(smc_get_lgr(link));
}
return rc;
}
@@ -272,7 +295,7 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
SMC_WR_REG_MR_WAIT_TIME);
if (!rc) {
/* timeout - terminate connections */
- smc_lgr_terminate(smc_get_lgr(link));
+ smc_lgr_terminate_sched(smc_get_lgr(link));
return -EPIPE;
}
if (rc == -ERESTARTSYS)
@@ -373,7 +396,7 @@ static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num)
/* terminate connections of this link group
* abnormally
*/
- smc_lgr_terminate(smc_get_lgr(link));
+ smc_lgr_terminate_sched(smc_get_lgr(link));
break;
default:
smc_wr_rx_post(link); /* refill WR RX */
@@ -510,8 +533,10 @@ void smc_wr_free_link(struct smc_link *lnk)
{
struct ib_device *ibdev;
- memset(lnk->wr_tx_mask, 0,
- BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*lnk->wr_tx_mask));
+ if (smc_wr_tx_wait_no_pending_sends(lnk))
+ memset(lnk->wr_tx_mask, 0,
+ BITS_TO_LONGS(SMC_WR_BUF_CNT) *
+ sizeof(*lnk->wr_tx_mask));
if (!lnk->smcibdev)
return;
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index 09bf32fd3959..3ac99c898418 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h
@@ -60,6 +60,16 @@ static inline void smc_wr_tx_set_wr_id(atomic_long_t *wr_tx_id, long val)
atomic_long_set(wr_tx_id, val);
}
+static inline void smc_wr_wakeup_tx_wait(struct smc_link *lnk)
+{
+ wake_up_all(&lnk->wr_tx_wait);
+}
+
+static inline void smc_wr_wakeup_reg_wait(struct smc_link *lnk)
+{
+ wake_up(&lnk->wr_reg_wait);
+}
+
/* post a new receive work request to fill a completed old work request entry */
static inline int smc_wr_rx_post(struct smc_link *link)
{