summaryrefslogtreecommitdiff
path: root/net/mptcp
diff options
context:
space:
mode:
Diffstat (limited to 'net/mptcp')
-rw-r--r--net/mptcp/ctrl.c42
-rw-r--r--net/mptcp/options.c26
-rw-r--r--net/mptcp/pm_netlink.c49
-rw-r--r--net/mptcp/pm_userspace.c295
-rw-r--r--net/mptcp/protocol.c35
-rw-r--r--net/mptcp/protocol.h46
6 files changed, 235 insertions, 258 deletions
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index 38d8121331d4..2dd81e6c26bd 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -32,6 +32,7 @@ struct mptcp_pernet {
unsigned int close_timeout;
unsigned int stale_loss_cnt;
atomic_t active_disable_times;
+ u8 syn_retrans_before_tcp_fallback;
unsigned long active_disable_stamp;
u8 mptcp_enabled;
u8 checksum_enabled;
@@ -92,6 +93,7 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
pernet->mptcp_enabled = 1;
pernet->add_addr_timeout = TCP_RTO_MAX;
pernet->blackhole_timeout = 3600;
+ pernet->syn_retrans_before_tcp_fallback = 2;
atomic_set(&pernet->active_disable_times, 0);
pernet->close_timeout = TCP_TIMEWAIT_LEN;
pernet->checksum_enabled = 0;
@@ -102,16 +104,15 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
}
#ifdef CONFIG_SYSCTL
-static int mptcp_set_scheduler(const struct net *net, const char *name)
+static int mptcp_set_scheduler(char *scheduler, const char *name)
{
- struct mptcp_pernet *pernet = mptcp_get_pernet(net);
struct mptcp_sched_ops *sched;
int ret = 0;
rcu_read_lock();
sched = mptcp_sched_find(name);
if (sched)
- strscpy(pernet->scheduler, name, MPTCP_SCHED_NAME_MAX);
+ strscpy(scheduler, name, MPTCP_SCHED_NAME_MAX);
else
ret = -ENOENT;
rcu_read_unlock();
@@ -122,7 +123,7 @@ static int mptcp_set_scheduler(const struct net *net, const char *name)
static int proc_scheduler(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
- const struct net *net = current->nsproxy->net_ns;
+ char (*scheduler)[MPTCP_SCHED_NAME_MAX] = ctl->data;
char val[MPTCP_SCHED_NAME_MAX];
struct ctl_table tbl = {
.data = val,
@@ -130,11 +131,11 @@ static int proc_scheduler(const struct ctl_table *ctl, int write,
};
int ret;
- strscpy(val, mptcp_get_scheduler(net), MPTCP_SCHED_NAME_MAX);
+ strscpy(val, *scheduler, MPTCP_SCHED_NAME_MAX);
ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
if (write && ret == 0)
- ret = mptcp_set_scheduler(net, val);
+ ret = mptcp_set_scheduler(*scheduler, val);
return ret;
}
@@ -161,7 +162,9 @@ static int proc_blackhole_detect_timeout(const struct ctl_table *table,
int write, void *buffer, size_t *lenp,
loff_t *ppos)
{
- struct mptcp_pernet *pernet = mptcp_get_pernet(current->nsproxy->net_ns);
+ struct mptcp_pernet *pernet = container_of(table->data,
+ struct mptcp_pernet,
+ blackhole_timeout);
int ret;
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
@@ -228,7 +231,7 @@ static struct ctl_table mptcp_sysctl_table[] = {
{
.procname = "available_schedulers",
.maxlen = MPTCP_SCHED_BUF_MAX,
- .mode = 0644,
+ .mode = 0444,
.proc_handler = proc_available_schedulers,
},
{
@@ -244,6 +247,12 @@ static struct ctl_table mptcp_sysctl_table[] = {
.proc_handler = proc_blackhole_detect_timeout,
.extra1 = SYSCTL_ZERO,
},
+ {
+ .procname = "syn_retrans_before_tcp_fallback",
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ },
};
static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
@@ -268,6 +277,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
/* table[7] is for available_schedulers which is read-only info */
table[8].data = &pernet->close_timeout;
table[9].data = &pernet->blackhole_timeout;
+ table[10].data = &pernet->syn_retrans_before_tcp_fallback;
hdr = register_net_sysctl_sz(net, MPTCP_SYSCTL_PATH, table,
ARRAY_SIZE(mptcp_sysctl_table));
@@ -391,22 +401,26 @@ void mptcp_active_enable(struct sock *sk)
void mptcp_active_detect_blackhole(struct sock *ssk, bool expired)
{
struct mptcp_subflow_context *subflow;
- u32 timeouts;
if (!sk_is_mptcp(ssk))
return;
- timeouts = inet_csk(ssk)->icsk_retransmits;
subflow = mptcp_subflow_ctx(ssk);
if (subflow->request_mptcp && ssk->sk_state == TCP_SYN_SENT) {
- if (timeouts == 2 || (timeouts < 2 && expired)) {
- MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVEDROP);
+ struct net *net = sock_net(ssk);
+ u8 timeouts, to_max;
+
+ timeouts = inet_csk(ssk)->icsk_retransmits;
+ to_max = mptcp_get_pernet(net)->syn_retrans_before_tcp_fallback;
+
+ if (timeouts == to_max || (timeouts < to_max && expired)) {
+ MPTCP_INC_STATS(net, MPTCP_MIB_MPCAPABLEACTIVEDROP);
subflow->mpc_drop = 1;
mptcp_subflow_early_fallback(mptcp_sk(subflow->conn), subflow);
- } else {
- subflow->mpc_drop = 0;
}
+ } else if (ssk->sk_state == TCP_SYN_SENT) {
+ subflow->mpc_drop = 0;
}
}
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 1603b3702e22..fd2de185bc93 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -108,7 +108,6 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->suboptions |= OPTION_MPTCP_DSS;
mp_opt->use_map = 1;
mp_opt->mpc_map = 1;
- mp_opt->use_ack = 0;
mp_opt->data_len = get_unaligned_be16(ptr);
ptr += 2;
}
@@ -157,11 +156,6 @@ static void mptcp_parse_option(const struct sk_buff *skb,
pr_debug("DSS\n");
ptr++;
- /* we must clear 'mpc_map' be able to detect MP_CAPABLE
- * map vs DSS map in mptcp_incoming_options(), and reconstruct
- * map info accordingly
- */
- mp_opt->mpc_map = 0;
flags = (*ptr++) & MPTCP_DSS_FLAG_MASK;
mp_opt->data_fin = (flags & MPTCP_DSS_DATA_FIN) != 0;
mp_opt->dsn64 = (flags & MPTCP_DSS_DSN64) != 0;
@@ -369,8 +363,11 @@ void mptcp_get_options(const struct sk_buff *skb,
const unsigned char *ptr;
int length;
- /* initialize option status */
- mp_opt->suboptions = 0;
+ /* Ensure that casting the whole status to u32 is efficient and safe */
+ BUILD_BUG_ON(sizeof_field(struct mptcp_options_received, status) != sizeof(u32));
+ BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct mptcp_options_received, status),
+ sizeof(u32)));
+ *(u32 *)&mp_opt->status = 0;
length = (th->doff * 4) - sizeof(struct tcphdr);
ptr = (const unsigned char *)(th + 1);
@@ -607,7 +604,6 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
}
opts->ext_copy.use_ack = 1;
opts->suboptions = OPTION_MPTCP_DSS;
- WRITE_ONCE(msk->old_wspace, __mptcp_space((struct sock *)msk));
/* Add kind/length/subtype/flag overhead if mapping is not populated */
if (dss_size == 0)
@@ -667,8 +663,15 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
&echo, &drop_other_suboptions))
return false;
+ /*
+ * Later on, mptcp_write_options() will enforce mutually exclusion with
+ * DSS, bail out if such option is set and we can't drop it.
+ */
if (drop_other_suboptions)
remaining += opt_size;
+ else if (opts->suboptions & OPTION_MPTCP_DSS)
+ return false;
+
len = mptcp_add_addr_len(opts->addr.family, echo, !!opts->addr.port);
if (remaining < len)
return false;
@@ -1281,7 +1284,7 @@ static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th)
}
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICT);
}
- return;
+ goto update_wspace;
}
if (rcv_wnd_new != rcv_wnd_old) {
@@ -1306,6 +1309,9 @@ raise_win:
th->window = htons(new_win);
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDSHARED);
}
+
+update_wspace:
+ WRITE_ONCE(msk->old_wspace, tp->rcv_wnd);
}
__sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum)
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 7a0f7998376a..572d160edca3 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -107,8 +107,8 @@ static void remote_address(const struct sock_common *skc,
#endif
}
-static bool lookup_subflow_by_saddr(const struct list_head *list,
- const struct mptcp_addr_info *saddr)
+bool mptcp_lookup_subflow_by_saddr(const struct list_head *list,
+ const struct mptcp_addr_info *saddr)
{
struct mptcp_subflow_context *subflow;
struct mptcp_addr_info cur;
@@ -1447,8 +1447,8 @@ out_free:
return ret;
}
-static bool remove_anno_list_by_saddr(struct mptcp_sock *msk,
- const struct mptcp_addr_info *addr)
+bool mptcp_remove_anno_list_by_saddr(struct mptcp_sock *msk,
+ const struct mptcp_addr_info *addr)
{
struct mptcp_pm_add_entry *entry;
@@ -1476,7 +1476,7 @@ static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk,
list.ids[list.nr++] = mptcp_endp_get_local_id(msk, addr);
- ret = remove_anno_list_by_saddr(msk, addr);
+ ret = mptcp_remove_anno_list_by_saddr(msk, addr);
if (ret || force) {
spin_lock_bh(&msk->pm.lock);
if (ret) {
@@ -1520,7 +1520,7 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
}
lock_sock(sk);
- remove_subflow = lookup_subflow_by_saddr(&msk->conn_list, addr);
+ remove_subflow = mptcp_lookup_subflow_by_saddr(&msk->conn_list, addr);
mptcp_pm_remove_anno_addr(msk, addr, remove_subflow &&
!(entry->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT));
@@ -1633,36 +1633,6 @@ int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info)
return ret;
}
-/* Called from the userspace PM only */
-void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list)
-{
- struct mptcp_rm_list alist = { .nr = 0 };
- struct mptcp_pm_addr_entry *entry;
- int anno_nr = 0;
-
- list_for_each_entry(entry, rm_list, list) {
- if (alist.nr >= MPTCP_RM_IDS_MAX)
- break;
-
- /* only delete if either announced or matching a subflow */
- if (remove_anno_list_by_saddr(msk, &entry->addr))
- anno_nr++;
- else if (!lookup_subflow_by_saddr(&msk->conn_list,
- &entry->addr))
- continue;
-
- alist.ids[alist.nr++] = entry->addr.id;
- }
-
- if (alist.nr) {
- spin_lock_bh(&msk->pm.lock);
- msk->pm.add_addr_signaled -= anno_nr;
- mptcp_pm_remove_addr(msk, &alist);
- spin_unlock_bh(&msk->pm.lock);
- }
-}
-
-/* Called from the in-kernel PM only */
static void mptcp_pm_flush_addrs_and_subflows(struct mptcp_sock *msk,
struct list_head *rm_list)
{
@@ -1671,11 +1641,11 @@ static void mptcp_pm_flush_addrs_and_subflows(struct mptcp_sock *msk,
list_for_each_entry(entry, rm_list, list) {
if (slist.nr < MPTCP_RM_IDS_MAX &&
- lookup_subflow_by_saddr(&msk->conn_list, &entry->addr))
+ mptcp_lookup_subflow_by_saddr(&msk->conn_list, &entry->addr))
slist.ids[slist.nr++] = mptcp_endp_get_local_id(msk, &entry->addr);
if (alist.nr < MPTCP_RM_IDS_MAX &&
- remove_anno_list_by_saddr(msk, &entry->addr))
+ mptcp_remove_anno_list_by_saddr(msk, &entry->addr))
alist.ids[alist.nr++] = mptcp_endp_get_local_id(msk, &entry->addr);
}
@@ -2050,7 +2020,8 @@ int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
}
if ((addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) &&
- (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) {
+ (entry->flags & (MPTCP_PM_ADDR_FLAG_SIGNAL |
+ MPTCP_PM_ADDR_FLAG_IMPLICIT))) {
spin_unlock_bh(&pernet->lock);
GENL_SET_ERR_MSG(info, "invalid addr flags");
return -EINVAL;
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index e35178f5205f..a3d477059b11 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -8,6 +8,10 @@
#include "mib.h"
#include "mptcp_pm_gen.h"
+#define mptcp_for_each_userspace_pm_addr(__msk, __entry) \
+ list_for_each_entry(__entry, \
+ &((__msk)->pm.userspace_pm_local_addr_list), list)
+
void mptcp_free_local_addr_list(struct mptcp_sock *msk)
{
struct mptcp_pm_addr_entry *entry, *tmp;
@@ -26,6 +30,19 @@ void mptcp_free_local_addr_list(struct mptcp_sock *msk)
}
}
+static struct mptcp_pm_addr_entry *
+mptcp_userspace_pm_lookup_addr(struct mptcp_sock *msk,
+ const struct mptcp_addr_info *addr)
+{
+ struct mptcp_pm_addr_entry *entry;
+
+ mptcp_for_each_userspace_pm_addr(msk, entry) {
+ if (mptcp_addresses_equal(&entry->addr, addr, false))
+ return entry;
+ }
+ return NULL;
+}
+
static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
struct mptcp_pm_addr_entry *entry,
bool needs_id)
@@ -41,7 +58,7 @@ static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
bitmap_zero(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
spin_lock_bh(&msk->pm.lock);
- list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) {
+ mptcp_for_each_userspace_pm_addr(msk, e) {
addr_match = mptcp_addresses_equal(&e->addr, &entry->addr, true);
if (addr_match && entry->addr.id == 0 && needs_id)
entry->addr.id = e->addr.id;
@@ -90,22 +107,20 @@ append_err:
static int mptcp_userspace_pm_delete_local_addr(struct mptcp_sock *msk,
struct mptcp_pm_addr_entry *addr)
{
- struct mptcp_pm_addr_entry *entry, *tmp;
struct sock *sk = (struct sock *)msk;
+ struct mptcp_pm_addr_entry *entry;
- list_for_each_entry_safe(entry, tmp, &msk->pm.userspace_pm_local_addr_list, list) {
- if (mptcp_addresses_equal(&entry->addr, &addr->addr, false)) {
- /* TODO: a refcount is needed because the entry can
- * be used multiple times (e.g. fullmesh mode).
- */
- list_del_rcu(&entry->list);
- sock_kfree_s(sk, entry, sizeof(*entry));
- msk->pm.local_addr_used--;
- return 0;
- }
- }
-
- return -EINVAL;
+ entry = mptcp_userspace_pm_lookup_addr(msk, &addr->addr);
+ if (!entry)
+ return -EINVAL;
+
+ /* TODO: a refcount is needed because the entry can
+ * be used multiple times (e.g. fullmesh mode).
+ */
+ list_del_rcu(&entry->list);
+ sock_kfree_s(sk, entry, sizeof(*entry));
+ msk->pm.local_addr_used--;
+ return 0;
}
static struct mptcp_pm_addr_entry *
@@ -113,7 +128,7 @@ mptcp_userspace_pm_lookup_addr_by_id(struct mptcp_sock *msk, unsigned int id)
{
struct mptcp_pm_addr_entry *entry;
- list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) {
+ mptcp_for_each_userspace_pm_addr(msk, entry) {
if (entry->addr.id == id)
return entry;
}
@@ -123,17 +138,12 @@ mptcp_userspace_pm_lookup_addr_by_id(struct mptcp_sock *msk, unsigned int id)
int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk,
struct mptcp_addr_info *skc)
{
- struct mptcp_pm_addr_entry *entry = NULL, *e, new_entry;
+ struct mptcp_pm_addr_entry *entry = NULL, new_entry;
__be16 msk_sport = ((struct inet_sock *)
inet_sk((struct sock *)msk))->inet_sport;
spin_lock_bh(&msk->pm.lock);
- list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) {
- if (mptcp_addresses_equal(&e->addr, skc, false)) {
- entry = e;
- break;
- }
- }
+ entry = mptcp_userspace_pm_lookup_addr(msk, skc);
spin_unlock_bh(&msk->pm.lock);
if (entry)
return entry->addr.id;
@@ -153,50 +163,60 @@ bool mptcp_userspace_pm_is_backup(struct mptcp_sock *msk,
struct mptcp_addr_info *skc)
{
struct mptcp_pm_addr_entry *entry;
- bool backup = false;
+ bool backup;
spin_lock_bh(&msk->pm.lock);
- list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) {
- if (mptcp_addresses_equal(&entry->addr, skc, false)) {
- backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
- break;
- }
- }
+ entry = mptcp_userspace_pm_lookup_addr(msk, skc);
+ backup = entry && !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
spin_unlock_bh(&msk->pm.lock);
return backup;
}
-int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info)
+static struct mptcp_sock *mptcp_userspace_pm_get_sock(const struct genl_info *info)
{
struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
+ struct mptcp_sock *msk;
+
+ if (!token) {
+ GENL_SET_ERR_MSG(info, "missing required token");
+ return NULL;
+ }
+
+ msk = mptcp_token_get_sock(genl_info_net(info), nla_get_u32(token));
+ if (!msk) {
+ NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token");
+ return NULL;
+ }
+
+ if (!mptcp_pm_is_userspace(msk)) {
+ GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected");
+ sock_put((struct sock *)msk);
+ return NULL;
+ }
+
+ return msk;
+}
+
+int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info)
+{
struct nlattr *addr = info->attrs[MPTCP_PM_ATTR_ADDR];
struct mptcp_pm_addr_entry addr_val;
struct mptcp_sock *msk;
int err = -EINVAL;
struct sock *sk;
- u32 token_val;
- if (!addr || !token) {
- GENL_SET_ERR_MSG(info, "missing required inputs");
+ if (!addr) {
+ GENL_SET_ERR_MSG(info, "missing required address");
return err;
}
- token_val = nla_get_u32(token);
-
- msk = mptcp_token_get_sock(sock_net(skb->sk), token_val);
- if (!msk) {
- NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token");
+ msk = mptcp_userspace_pm_get_sock(info);
+ if (!msk)
return err;
- }
sk = (struct sock *)msk;
- if (!mptcp_pm_is_userspace(msk)) {
- GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected");
- goto announce_err;
- }
-
err = mptcp_pm_parse_entry(addr, info, true, &addr_val);
if (err < 0) {
GENL_SET_ERR_MSG(info, "error parsing local address");
@@ -267,40 +287,48 @@ remove_err:
return err;
}
+void mptcp_pm_remove_addr_entry(struct mptcp_sock *msk,
+ struct mptcp_pm_addr_entry *entry)
+{
+ struct mptcp_rm_list alist = { .nr = 0 };
+ int anno_nr = 0;
+
+ /* only delete if either announced or matching a subflow */
+ if (mptcp_remove_anno_list_by_saddr(msk, &entry->addr))
+ anno_nr++;
+ else if (!mptcp_lookup_subflow_by_saddr(&msk->conn_list, &entry->addr))
+ return;
+
+ alist.ids[alist.nr++] = entry->addr.id;
+
+ spin_lock_bh(&msk->pm.lock);
+ msk->pm.add_addr_signaled -= anno_nr;
+ mptcp_pm_remove_addr(msk, &alist);
+ spin_unlock_bh(&msk->pm.lock);
+}
+
int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info)
{
- struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
struct nlattr *id = info->attrs[MPTCP_PM_ATTR_LOC_ID];
struct mptcp_pm_addr_entry *match;
- struct mptcp_pm_addr_entry *entry;
struct mptcp_sock *msk;
- LIST_HEAD(free_list);
int err = -EINVAL;
struct sock *sk;
- u32 token_val;
u8 id_val;
- if (!id || !token) {
- GENL_SET_ERR_MSG(info, "missing required inputs");
+ if (!id) {
+ GENL_SET_ERR_MSG(info, "missing required ID");
return err;
}
id_val = nla_get_u8(id);
- token_val = nla_get_u32(token);
- msk = mptcp_token_get_sock(sock_net(skb->sk), token_val);
- if (!msk) {
- NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token");
+ msk = mptcp_userspace_pm_get_sock(info);
+ if (!msk)
return err;
- }
sk = (struct sock *)msk;
- if (!mptcp_pm_is_userspace(msk)) {
- GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected");
- goto out;
- }
-
if (id_val == 0) {
err = mptcp_userspace_pm_remove_id_zero_address(msk, info);
goto out;
@@ -317,16 +345,14 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info)
goto out;
}
- list_move(&match->list, &free_list);
+ list_del_rcu(&match->list);
spin_unlock_bh(&msk->pm.lock);
- mptcp_pm_remove_addrs(msk, &free_list);
+ mptcp_pm_remove_addr_entry(msk, match);
release_sock(sk);
- list_for_each_entry_safe(match, entry, &free_list, list) {
- sock_kfree_s(sk, match, sizeof(*match));
- }
+ sock_kfree_s(sk, match, sizeof(*match));
err = 0;
out:
@@ -337,7 +363,6 @@ out:
int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE];
- struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR];
struct mptcp_pm_addr_entry entry = { 0 };
struct mptcp_addr_info addr_r;
@@ -345,28 +370,18 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info)
struct mptcp_sock *msk;
int err = -EINVAL;
struct sock *sk;
- u32 token_val;
- if (!laddr || !raddr || !token) {
- GENL_SET_ERR_MSG(info, "missing required inputs");
+ if (!laddr || !raddr) {
+ GENL_SET_ERR_MSG(info, "missing required address(es)");
return err;
}
- token_val = nla_get_u32(token);
-
- msk = mptcp_token_get_sock(genl_info_net(info), token_val);
- if (!msk) {
- NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token");
+ msk = mptcp_userspace_pm_get_sock(info);
+ if (!msk)
return err;
- }
sk = (struct sock *)msk;
- if (!mptcp_pm_is_userspace(msk)) {
- GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected");
- goto create_err;
- }
-
err = mptcp_pm_parse_entry(laddr, info, true, &entry);
if (err < 0) {
NL_SET_ERR_MSG_ATTR(info->extack, laddr, "error parsing local addr");
@@ -469,36 +484,25 @@ static struct sock *mptcp_nl_find_ssk(struct mptcp_sock *msk,
int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE];
- struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR];
- struct mptcp_addr_info addr_l;
+ struct mptcp_pm_addr_entry addr_l;
struct mptcp_addr_info addr_r;
struct mptcp_sock *msk;
struct sock *sk, *ssk;
int err = -EINVAL;
- u32 token_val;
- if (!laddr || !raddr || !token) {
- GENL_SET_ERR_MSG(info, "missing required inputs");
+ if (!laddr || !raddr) {
+ GENL_SET_ERR_MSG(info, "missing required address(es)");
return err;
}
- token_val = nla_get_u32(token);
-
- msk = mptcp_token_get_sock(genl_info_net(info), token_val);
- if (!msk) {
- NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token");
+ msk = mptcp_userspace_pm_get_sock(info);
+ if (!msk)
return err;
- }
sk = (struct sock *)msk;
- if (!mptcp_pm_is_userspace(msk)) {
- GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected");
- goto destroy_err;
- }
-
- err = mptcp_pm_parse_addr(laddr, info, &addr_l);
+ err = mptcp_pm_parse_entry(laddr, info, true, &addr_l);
if (err < 0) {
NL_SET_ERR_MSG_ATTR(info->extack, laddr, "error parsing local addr");
goto destroy_err;
@@ -511,43 +515,41 @@ int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info
}
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- if (addr_l.family == AF_INET && ipv6_addr_v4mapped(&addr_r.addr6)) {
- ipv6_addr_set_v4mapped(addr_l.addr.s_addr, &addr_l.addr6);
- addr_l.family = AF_INET6;
+ if (addr_l.addr.family == AF_INET && ipv6_addr_v4mapped(&addr_r.addr6)) {
+ ipv6_addr_set_v4mapped(addr_l.addr.addr.s_addr, &addr_l.addr.addr6);
+ addr_l.addr.family = AF_INET6;
}
- if (addr_r.family == AF_INET && ipv6_addr_v4mapped(&addr_l.addr6)) {
+ if (addr_r.family == AF_INET && ipv6_addr_v4mapped(&addr_l.addr.addr6)) {
ipv6_addr_set_v4mapped(addr_r.addr.s_addr, &addr_r.addr6);
addr_r.family = AF_INET6;
}
#endif
- if (addr_l.family != addr_r.family) {
+ if (addr_l.addr.family != addr_r.family) {
GENL_SET_ERR_MSG(info, "address families do not match");
err = -EINVAL;
goto destroy_err;
}
- if (!addr_l.port || !addr_r.port) {
+ if (!addr_l.addr.port || !addr_r.port) {
GENL_SET_ERR_MSG(info, "missing local or remote port");
err = -EINVAL;
goto destroy_err;
}
lock_sock(sk);
- ssk = mptcp_nl_find_ssk(msk, &addr_l, &addr_r);
- if (ssk) {
- struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
- struct mptcp_pm_addr_entry entry = { .addr = addr_l };
-
- spin_lock_bh(&msk->pm.lock);
- mptcp_userspace_pm_delete_local_addr(msk, &entry);
- spin_unlock_bh(&msk->pm.lock);
- mptcp_subflow_shutdown(sk, ssk, RCV_SHUTDOWN | SEND_SHUTDOWN);
- mptcp_close_ssk(sk, ssk, subflow);
- MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMSUBFLOW);
- err = 0;
- } else {
+ ssk = mptcp_nl_find_ssk(msk, &addr_l.addr, &addr_r);
+ if (!ssk) {
err = -ESRCH;
+ goto release_sock;
}
+
+ spin_lock_bh(&msk->pm.lock);
+ mptcp_userspace_pm_delete_local_addr(msk, &addr_l);
+ spin_unlock_bh(&msk->pm.lock);
+ mptcp_subflow_shutdown(sk, ssk, RCV_SHUTDOWN | SEND_SHUTDOWN);
+ mptcp_close_ssk(sk, ssk, mptcp_subflow_ctx(ssk));
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMSUBFLOW);
+release_sock:
release_sock(sk);
destroy_err:
@@ -560,31 +562,19 @@ int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info)
struct mptcp_pm_addr_entry loc = { .addr = { .family = AF_UNSPEC }, };
struct mptcp_pm_addr_entry rem = { .addr = { .family = AF_UNSPEC }, };
struct nlattr *attr_rem = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE];
- struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
- struct net *net = sock_net(skb->sk);
struct mptcp_pm_addr_entry *entry;
struct mptcp_sock *msk;
int ret = -EINVAL;
struct sock *sk;
- u32 token_val;
u8 bkup = 0;
- token_val = nla_get_u32(token);
-
- msk = mptcp_token_get_sock(net, token_val);
- if (!msk) {
- NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token");
+ msk = mptcp_userspace_pm_get_sock(info);
+ if (!msk)
return ret;
- }
sk = (struct sock *)msk;
- if (!mptcp_pm_is_userspace(msk)) {
- GENL_SET_ERR_MSG(info, "userspace PM not selected");
- goto set_flags_err;
- }
-
ret = mptcp_pm_parse_entry(attr, info, false, &loc);
if (ret < 0)
goto set_flags_err;
@@ -606,13 +596,12 @@ int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info)
bkup = 1;
spin_lock_bh(&msk->pm.lock);
- list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) {
- if (mptcp_addresses_equal(&entry->addr, &loc.addr, false)) {
- if (bkup)
- entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
- else
- entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP;
- }
+ entry = mptcp_userspace_pm_lookup_addr(msk, &loc.addr);
+ if (entry) {
+ if (bkup)
+ entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
+ else
+ entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP;
}
spin_unlock_bh(&msk->pm.lock);
@@ -632,33 +621,23 @@ int mptcp_userspace_pm_dump_addr(struct sk_buff *msg,
DECLARE_BITMAP(map, MPTCP_PM_MAX_ADDR_ID + 1);
} *bitmap;
const struct genl_info *info = genl_info_dump(cb);
- struct net *net = sock_net(msg->sk);
struct mptcp_pm_addr_entry *entry;
struct mptcp_sock *msk;
- struct nlattr *token;
int ret = -EINVAL;
struct sock *sk;
void *hdr;
bitmap = (struct id_bitmap *)cb->ctx;
- token = info->attrs[MPTCP_PM_ATTR_TOKEN];
- msk = mptcp_token_get_sock(net, nla_get_u32(token));
- if (!msk) {
- NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token");
+ msk = mptcp_userspace_pm_get_sock(info);
+ if (!msk)
return ret;
- }
sk = (struct sock *)msk;
- if (!mptcp_pm_is_userspace(msk)) {
- GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected");
- goto out;
- }
-
lock_sock(sk);
spin_lock_bh(&msk->pm.lock);
- list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) {
+ mptcp_for_each_userspace_pm_addr(msk, entry) {
if (test_bit(entry->addr.id, bitmap->map))
continue;
@@ -680,7 +659,6 @@ int mptcp_userspace_pm_dump_addr(struct sk_buff *msg,
release_sock(sk);
ret = msg->len;
-out:
sock_put(sk);
return ret;
}
@@ -689,28 +667,19 @@ int mptcp_userspace_pm_get_addr(struct sk_buff *skb,
struct genl_info *info)
{
struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR];
- struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
struct mptcp_pm_addr_entry addr, *entry;
- struct net *net = sock_net(skb->sk);
struct mptcp_sock *msk;
struct sk_buff *msg;
int ret = -EINVAL;
struct sock *sk;
void *reply;
- msk = mptcp_token_get_sock(net, nla_get_u32(token));
- if (!msk) {
- NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token");
+ msk = mptcp_userspace_pm_get_sock(info);
+ if (!msk)
return ret;
- }
sk = (struct sock *)msk;
- if (!mptcp_pm_is_userspace(msk)) {
- GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected");
- goto out;
- }
-
ret = mptcp_pm_parse_entry(attr, info, false, &addr);
if (ret < 0)
goto out;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 08a72242428c..6bd819047470 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -47,7 +47,7 @@ static void __mptcp_destroy_sock(struct sock *sk);
static void mptcp_check_send_data_fin(struct sock *sk);
DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
-static struct net_device mptcp_napi_dev;
+static struct net_device *mptcp_napi_dev;
/* Returns end sequence number of the receiver's advertised window */
static u64 mptcp_wnd_end(const struct mptcp_sock *msk)
@@ -136,6 +136,7 @@ static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to,
int delta;
if (MPTCP_SKB_CB(from)->offset ||
+ ((to->len + from->len) > (sk->sk_rcvbuf >> 3)) ||
!skb_try_coalesce(to, from, &fragstolen, &delta))
return false;
@@ -528,13 +529,13 @@ static void mptcp_send_ack(struct mptcp_sock *msk)
mptcp_subflow_send_ack(mptcp_subflow_tcp_sock(subflow));
}
-static void mptcp_subflow_cleanup_rbuf(struct sock *ssk)
+static void mptcp_subflow_cleanup_rbuf(struct sock *ssk, int copied)
{
bool slow;
slow = lock_sock_fast(ssk);
if (tcp_can_send_ack(ssk))
- tcp_cleanup_rbuf(ssk, 1);
+ tcp_cleanup_rbuf(ssk, copied);
unlock_sock_fast(ssk, slow);
}
@@ -551,7 +552,7 @@ static bool mptcp_subflow_could_cleanup(const struct sock *ssk, bool rx_empty)
(ICSK_ACK_PUSHED2 | ICSK_ACK_PUSHED)));
}
-static void mptcp_cleanup_rbuf(struct mptcp_sock *msk)
+static void mptcp_cleanup_rbuf(struct mptcp_sock *msk, int copied)
{
int old_space = READ_ONCE(msk->old_wspace);
struct mptcp_subflow_context *subflow;
@@ -559,14 +560,14 @@ static void mptcp_cleanup_rbuf(struct mptcp_sock *msk)
int space = __mptcp_space(sk);
bool cleanup, rx_empty;
- cleanup = (space > 0) && (space >= (old_space << 1));
- rx_empty = !__mptcp_rmem(sk);
+ cleanup = (space > 0) && (space >= (old_space << 1)) && copied;
+ rx_empty = !__mptcp_rmem(sk) && copied;
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
if (cleanup || mptcp_subflow_could_cleanup(ssk, rx_empty))
- mptcp_subflow_cleanup_rbuf(ssk);
+ mptcp_subflow_cleanup_rbuf(ssk, copied);
}
}
@@ -1766,8 +1767,10 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
* see mptcp_disconnect().
* Attempt it again outside the problematic scope.
*/
- if (!mptcp_disconnect(sk, 0))
+ if (!mptcp_disconnect(sk, 0)) {
+ sk->sk_disconnects++;
sk->sk_socket->state = SS_UNCONNECTED;
+ }
}
inet_clear_bit(DEFER_CONNECT, sk);
@@ -1939,6 +1942,8 @@ do_error:
goto out;
}
+static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied);
+
static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
struct msghdr *msg,
size_t len, int flags,
@@ -1992,6 +1997,7 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
break;
}
+ mptcp_rcv_space_adjust(msk, copied);
return copied;
}
@@ -2217,9 +2223,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
copied += bytes_read;
- /* be sure to advertise window change */
- mptcp_cleanup_rbuf(msk);
-
if (skb_queue_empty(&msk->receive_queue) && __mptcp_move_skbs(msk))
continue;
@@ -2268,7 +2271,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
}
pr_debug("block timeout %ld\n", timeo);
- mptcp_rcv_space_adjust(msk, copied);
+ mptcp_cleanup_rbuf(msk, copied);
err = sk_wait_data(sk, &timeo, NULL);
if (err < 0) {
err = copied ? : err;
@@ -2276,7 +2279,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
}
}
- mptcp_rcv_space_adjust(msk, copied);
+ mptcp_cleanup_rbuf(msk, copied);
out_err:
if (cmsg_flags && copied >= 0) {
@@ -4146,11 +4149,13 @@ void __init mptcp_proto_init(void)
if (percpu_counter_init(&mptcp_sockets_allocated, 0, GFP_KERNEL))
panic("Failed to allocate MPTCP pcpu counter\n");
- init_dummy_netdev(&mptcp_napi_dev);
+ mptcp_napi_dev = alloc_netdev_dummy(0);
+ if (!mptcp_napi_dev)
+ panic("Failed to allocate MPTCP dummy netdev\n");
for_each_possible_cpu(cpu) {
delegated = per_cpu_ptr(&mptcp_delegated_actions, cpu);
INIT_LIST_HEAD(&delegated->head);
- netif_napi_add_tx(&mptcp_napi_dev, &delegated->napi,
+ netif_napi_add_tx(mptcp_napi_dev, &delegated->napi,
mptcp_napi_poll);
napi_enable(&delegated->napi);
}
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index a93e661ef5c4..f6a207958459 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -149,22 +149,24 @@ struct mptcp_options_received {
u32 subflow_seq;
u16 data_len;
__sum16 csum;
- u16 suboptions;
+ struct_group(status,
+ u16 suboptions;
+ u16 use_map:1,
+ dsn64:1,
+ data_fin:1,
+ use_ack:1,
+ ack64:1,
+ mpc_map:1,
+ reset_reason:4,
+ reset_transient:1,
+ echo:1,
+ backup:1,
+ deny_join_id0:1,
+ __unused:2;
+ );
+ u8 join_id;
u32 token;
u32 nonce;
- u16 use_map:1,
- dsn64:1,
- data_fin:1,
- use_ack:1,
- ack64:1,
- mpc_map:1,
- reset_reason:4,
- reset_transient:1,
- echo:1,
- backup:1,
- deny_join_id0:1,
- __unused:2;
- u8 join_id;
u64 thmac;
u8 hmac[MPTCPOPT_HMAC_LEN];
struct mptcp_addr_info addr;
@@ -760,10 +762,15 @@ static inline u64 mptcp_data_avail(const struct mptcp_sock *msk)
static inline bool mptcp_epollin_ready(const struct sock *sk)
{
+ u64 data_avail = mptcp_data_avail(mptcp_sk(sk));
+
+ if (!data_avail)
+ return false;
+
/* mptcp doesn't have to deal with small skbs in the receive queue,
- * at it can always coalesce them
+ * as it can always coalesce them
*/
- return (mptcp_data_avail(mptcp_sk(sk)) >= sk->sk_rcvlowat) ||
+ return (data_avail >= sk->sk_rcvlowat) ||
(mem_cgroup_sockets_enabled && sk->sk_memcg &&
mem_cgroup_under_socket_pressure(sk->sk_memcg)) ||
READ_ONCE(tcp_memory_pressure);
@@ -1027,6 +1034,10 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk,
struct mptcp_pm_add_entry *
mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk,
const struct mptcp_addr_info *addr);
+bool mptcp_lookup_subflow_by_saddr(const struct list_head *list,
+ const struct mptcp_addr_info *saddr);
+bool mptcp_remove_anno_list_by_saddr(struct mptcp_sock *msk,
+ const struct mptcp_addr_info *addr);
int mptcp_pm_set_flags(struct sk_buff *skb, struct genl_info *info);
int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info);
int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info);
@@ -1034,7 +1045,8 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr,
bool echo);
int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list);
-void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list);
+void mptcp_pm_remove_addr_entry(struct mptcp_sock *msk,
+ struct mptcp_pm_addr_entry *entry);
void mptcp_free_local_addr_list(struct mptcp_sock *msk);