summaryrefslogtreecommitdiff
path: root/net/smc
diff options
context:
space:
mode:
Diffstat (limited to 'net/smc')
-rw-r--r--net/smc/af_smc.c196
-rw-r--r--net/smc/smc.h20
-rw-r--r--net/smc/smc_diag.c2
-rw-r--r--net/smc/smc_pnet.c8
4 files changed, 195 insertions, 31 deletions
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 961854e56736..306d9e8cd1dd 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -566,12 +566,118 @@ static void smc_stat_fallback(struct smc_sock *smc)
mutex_unlock(&net->smc.mutex_fback_rsn);
}
-static void smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
+/* must be called under rcu read lock */
+static void smc_fback_wakeup_waitqueue(struct smc_sock *smc, void *key)
{
- wait_queue_head_t *smc_wait = sk_sleep(&smc->sk);
- wait_queue_head_t *clc_wait = sk_sleep(smc->clcsock->sk);
- unsigned long flags;
+ struct socket_wq *wq;
+ __poll_t flags;
+ wq = rcu_dereference(smc->sk.sk_wq);
+ if (!skwq_has_sleeper(wq))
+ return;
+
+ /* wake up smc sk->sk_wq */
+ if (!key) {
+ /* sk_state_change */
+ wake_up_interruptible_all(&wq->wait);
+ } else {
+ flags = key_to_poll(key);
+ if (flags & (EPOLLIN | EPOLLOUT))
+ /* sk_data_ready or sk_write_space */
+ wake_up_interruptible_sync_poll(&wq->wait, flags);
+ else if (flags & EPOLLERR)
+ /* sk_error_report */
+ wake_up_interruptible_poll(&wq->wait, flags);
+ }
+}
+
+static int smc_fback_mark_woken(wait_queue_entry_t *wait,
+ unsigned int mode, int sync, void *key)
+{
+ struct smc_mark_woken *mark =
+ container_of(wait, struct smc_mark_woken, wait_entry);
+
+ mark->woken = true;
+ mark->key = key;
+ return 0;
+}
+
+static void smc_fback_forward_wakeup(struct smc_sock *smc, struct sock *clcsk,
+ void (*clcsock_callback)(struct sock *sk))
+{
+ struct smc_mark_woken mark = { .woken = false };
+ struct socket_wq *wq;
+
+ init_waitqueue_func_entry(&mark.wait_entry,
+ smc_fback_mark_woken);
+ rcu_read_lock();
+ wq = rcu_dereference(clcsk->sk_wq);
+ if (!wq)
+ goto out;
+ add_wait_queue(sk_sleep(clcsk), &mark.wait_entry);
+ clcsock_callback(clcsk);
+ remove_wait_queue(sk_sleep(clcsk), &mark.wait_entry);
+
+ if (mark.woken)
+ smc_fback_wakeup_waitqueue(smc, mark.key);
+out:
+ rcu_read_unlock();
+}
+
+static void smc_fback_state_change(struct sock *clcsk)
+{
+ struct smc_sock *smc =
+ smc_clcsock_user_data(clcsk);
+
+ if (!smc)
+ return;
+ smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_state_change);
+}
+
+static void smc_fback_data_ready(struct sock *clcsk)
+{
+ struct smc_sock *smc =
+ smc_clcsock_user_data(clcsk);
+
+ if (!smc)
+ return;
+ smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_data_ready);
+}
+
+static void smc_fback_write_space(struct sock *clcsk)
+{
+ struct smc_sock *smc =
+ smc_clcsock_user_data(clcsk);
+
+ if (!smc)
+ return;
+ smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_write_space);
+}
+
+static void smc_fback_error_report(struct sock *clcsk)
+{
+ struct smc_sock *smc =
+ smc_clcsock_user_data(clcsk);
+
+ if (!smc)
+ return;
+ smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_error_report);
+}
+
+static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
+{
+ struct sock *clcsk;
+ int rc = 0;
+
+ mutex_lock(&smc->clcsock_release_lock);
+ if (!smc->clcsock) {
+ rc = -EBADF;
+ goto out;
+ }
+ clcsk = smc->clcsock->sk;
+
+ if (smc->use_fallback)
+ goto out;
smc->use_fallback = true;
smc->fallback_rsn = reason_code;
smc_stat_fallback(smc);
@@ -582,22 +688,41 @@ static void smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
smc->clcsock->wq.fasync_list =
smc->sk.sk_socket->wq.fasync_list;
- /* There may be some entries remaining in
- * smc socket->wq, which should be removed
- * to clcsocket->wq during the fallback.
+ /* There might be some wait entries remaining
+ * in smc sk->sk_wq and they should be woken up
+ * as clcsock's wait queue is woken up.
*/
- spin_lock_irqsave(&smc_wait->lock, flags);
- spin_lock_nested(&clc_wait->lock, SINGLE_DEPTH_NESTING);
- list_splice_init(&smc_wait->head, &clc_wait->head);
- spin_unlock(&clc_wait->lock);
- spin_unlock_irqrestore(&smc_wait->lock, flags);
+ smc->clcsk_state_change = clcsk->sk_state_change;
+ smc->clcsk_data_ready = clcsk->sk_data_ready;
+ smc->clcsk_write_space = clcsk->sk_write_space;
+ smc->clcsk_error_report = clcsk->sk_error_report;
+
+ clcsk->sk_state_change = smc_fback_state_change;
+ clcsk->sk_data_ready = smc_fback_data_ready;
+ clcsk->sk_write_space = smc_fback_write_space;
+ clcsk->sk_error_report = smc_fback_error_report;
+
+ smc->clcsock->sk->sk_user_data =
+ (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);
}
+out:
+ mutex_unlock(&smc->clcsock_release_lock);
+ return rc;
}
/* fall back during connect */
static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
{
- smc_switch_to_fallback(smc, reason_code);
+ struct net *net = sock_net(&smc->sk);
+ int rc = 0;
+
+ rc = smc_switch_to_fallback(smc, reason_code);
+ if (rc) { /* fallback fails */
+ this_cpu_inc(net->smc.smc_stats->clnt_hshake_err_cnt);
+ if (smc->sk.sk_state == SMC_INIT)
+ sock_put(&smc->sk); /* passive closing */
+ return rc;
+ }
smc_copy_sock_settings_to_clc(smc);
smc->connect_nonblock = 0;
if (smc->sk.sk_state == SMC_INIT)
@@ -1518,11 +1643,12 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
{
/* RDMA setup failed, switch back to TCP */
smc_conn_abort(new_smc, local_first);
- if (reason_code < 0) { /* error, no fallback possible */
+ if (reason_code < 0 ||
+ smc_switch_to_fallback(new_smc, reason_code)) {
+ /* error, no fallback possible */
smc_listen_out_err(new_smc);
return;
}
- smc_switch_to_fallback(new_smc, reason_code);
if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
if (smc_clc_send_decline(new_smc, reason_code, version) < 0) {
smc_listen_out_err(new_smc);
@@ -1964,8 +2090,11 @@ static void smc_listen_work(struct work_struct *work)
/* check if peer is smc capable */
if (!tcp_sk(newclcsock->sk)->syn_smc) {
- smc_switch_to_fallback(new_smc, SMC_CLC_DECL_PEERNOSMC);
- smc_listen_out_connected(new_smc);
+ rc = smc_switch_to_fallback(new_smc, SMC_CLC_DECL_PEERNOSMC);
+ if (rc)
+ smc_listen_out_err(new_smc);
+ else
+ smc_listen_out_connected(new_smc);
return;
}
@@ -2094,10 +2223,9 @@ out:
static void smc_clcsock_data_ready(struct sock *listen_clcsock)
{
- struct smc_sock *lsmc;
+ struct smc_sock *lsmc =
+ smc_clcsock_user_data(listen_clcsock);
- lsmc = (struct smc_sock *)
- ((uintptr_t)listen_clcsock->sk_user_data & ~SK_USER_DATA_NOCOPY);
if (!lsmc)
return;
lsmc->clcsk_data_ready(listen_clcsock);
@@ -2254,7 +2382,9 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
if (msg->msg_flags & MSG_FASTOPEN) {
if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
- smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
+ rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
+ if (rc)
+ goto out;
} else {
rc = -EINVAL;
goto out;
@@ -2447,6 +2577,11 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
/* generic setsockopts reaching us here always apply to the
* CLC socket
*/
+ mutex_lock(&smc->clcsock_release_lock);
+ if (!smc->clcsock) {
+ mutex_unlock(&smc->clcsock_release_lock);
+ return -EBADF;
+ }
if (unlikely(!smc->clcsock->ops->setsockopt))
rc = -EOPNOTSUPP;
else
@@ -2456,6 +2591,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
sk->sk_err = smc->clcsock->sk->sk_err;
sk_error_report(sk);
}
+ mutex_unlock(&smc->clcsock_release_lock);
if (optlen < sizeof(int))
return -EINVAL;
@@ -2472,7 +2608,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
case TCP_FASTOPEN_NO_COOKIE:
/* option not supported by SMC */
if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
- smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
+ rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
} else {
rc = -EINVAL;
}
@@ -2515,13 +2651,23 @@ static int smc_getsockopt(struct socket *sock, int level, int optname,
char __user *optval, int __user *optlen)
{
struct smc_sock *smc;
+ int rc;
smc = smc_sk(sock->sk);
+ mutex_lock(&smc->clcsock_release_lock);
+ if (!smc->clcsock) {
+ mutex_unlock(&smc->clcsock_release_lock);
+ return -EBADF;
+ }
/* socket options apply to the CLC socket */
- if (unlikely(!smc->clcsock->ops->getsockopt))
+ if (unlikely(!smc->clcsock->ops->getsockopt)) {
+ mutex_unlock(&smc->clcsock_release_lock);
return -EOPNOTSUPP;
- return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname,
- optval, optlen);
+ }
+ rc = smc->clcsock->ops->getsockopt(smc->clcsock, level, optname,
+ optval, optlen);
+ mutex_unlock(&smc->clcsock_release_lock);
+ return rc;
}
static int smc_ioctl(struct socket *sock, unsigned int cmd,
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 3d0b8e300deb..37b2001a0255 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -139,6 +139,12 @@ enum smc_urg_state {
SMC_URG_READ = 3, /* data was already read */
};
+struct smc_mark_woken {
+ bool woken;
+ void *key;
+ wait_queue_entry_t wait_entry;
+};
+
struct smc_connection {
struct rb_node alert_node;
struct smc_link_group *lgr; /* link group of connection */
@@ -228,8 +234,14 @@ struct smc_connection {
struct smc_sock { /* smc sock container */
struct sock sk;
struct socket *clcsock; /* internal tcp socket */
+ void (*clcsk_state_change)(struct sock *sk);
+ /* original stat_change fct. */
void (*clcsk_data_ready)(struct sock *sk);
- /* original data_ready fct. **/
+ /* original data_ready fct. */
+ void (*clcsk_write_space)(struct sock *sk);
+ /* original write_space fct. */
+ void (*clcsk_error_report)(struct sock *sk);
+ /* original error_report fct. */
struct smc_connection conn; /* smc connection */
struct smc_sock *listen_smc; /* listen parent */
struct work_struct connect_work; /* handle non-blocking connect*/
@@ -264,6 +276,12 @@ static inline struct smc_sock *smc_sk(const struct sock *sk)
return (struct smc_sock *)sk;
}
+static inline struct smc_sock *smc_clcsock_user_data(struct sock *clcsk)
+{
+ return (struct smc_sock *)
+ ((uintptr_t)clcsk->sk_user_data & ~SK_USER_DATA_NOCOPY);
+}
+
extern struct workqueue_struct *smc_hs_wq; /* wq for handshake work */
extern struct workqueue_struct *smc_close_wq; /* wq for close work */
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index b8898c787d23..1fca2f90a9c7 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -146,13 +146,11 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
(req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) &&
!list_empty(&smc->conn.lgr->list)) {
struct smc_link *link = smc->conn.lnk;
- struct net *net = read_pnet(&link->smcibdev->ibdev->coredev.rdma_net);
struct smc_diag_lgrinfo linfo = {
.role = smc->conn.lgr->role,
.lnk[0].ibport = link->ibport,
.lnk[0].link_id = link->link_id,
- .lnk[0].net_cookie = net->net_cookie,
};
memcpy(linfo.lnk[0].ibname,
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 291f1484a1b7..0599246c0376 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -368,9 +368,6 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net,
new_pe->type = SMC_PNET_ETH;
memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN);
strncpy(new_pe->eth_name, eth_name, IFNAMSIZ);
- new_pe->ndev = ndev;
- if (ndev)
- netdev_tracker_alloc(ndev, &new_pe->dev_tracker, GFP_KERNEL);
rc = -EEXIST;
new_netdev = true;
write_lock(&pnettable->lock);
@@ -382,6 +379,11 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net,
}
}
if (new_netdev) {
+ if (ndev) {
+ new_pe->ndev = ndev;
+ netdev_tracker_alloc(ndev, &new_pe->dev_tracker,
+ GFP_ATOMIC);
+ }
list_add_tail(&new_pe->list, &pnettable->pnetlist);
write_unlock(&pnettable->lock);
} else {