diff options
Diffstat (limited to 'net/mptcp')
-rw-r--r-- | net/mptcp/pm_netlink.c | 131 | ||||
-rw-r--r-- | net/mptcp/protocol.c | 60 | ||||
-rw-r--r-- | net/mptcp/protocol.h | 3 | ||||
-rw-r--r-- | net/mptcp/subflow.c | 2 |
4 files changed, 90 insertions, 106 deletions
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 7c7395b58944..291b5da42fdb 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -413,7 +413,7 @@ static bool lookup_address_in_vec(const struct mptcp_addr_info *addrs, unsigned int i; for (i = 0; i < nr; i++) { - if (mptcp_addresses_equal(&addrs[i], addr, addr->port)) + if (addrs[i].id == addr->id) return true; } @@ -449,7 +449,8 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm mptcp_for_each_subflow(msk, subflow) { ssk = mptcp_subflow_tcp_sock(subflow); remote_address((struct sock_common *)ssk, &addrs[i]); - if (deny_id0 && mptcp_addresses_equal(&addrs[i], &remote, false)) + addrs[i].id = subflow->remote_id; + if (deny_id0 && !addrs[i].id) continue; if (!lookup_address_in_vec(addrs, i, &addrs[i]) && @@ -463,6 +464,37 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm return i; } +static void __mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, + bool prio, bool backup) +{ + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + bool slow; + + pr_debug("send ack for %s", + prio ? "mp_prio" : (mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr")); + + slow = lock_sock_fast(ssk); + if (prio) { + if (subflow->backup != backup) + msk->last_snd = NULL; + + subflow->send_mp_prio = 1; + subflow->backup = backup; + subflow->request_bkup = backup; + } + + __mptcp_subflow_send_ack(ssk); + unlock_sock_fast(ssk, slow); +} + +static void mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, + bool prio, bool backup) +{ + spin_unlock_bh(&msk->pm.lock); + __mptcp_pm_send_ack(msk, subflow, prio, backup); + spin_lock_bh(&msk->pm.lock); +} + static struct mptcp_pm_addr_entry * __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id) { @@ -482,30 +514,14 @@ __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info, struct mptcp_pm_addr_entry *entry; list_for_each_entry(entry, &pernet->local_addr_list, list) { - if ((!lookup_by_id && mptcp_addresses_equal(&entry->addr, info, true)) || + if ((!lookup_by_id && + mptcp_addresses_equal(&entry->addr, info, entry->addr.port)) || (lookup_by_id && entry->addr.id == info->id)) return entry; } return NULL; } -static int -lookup_id_by_addr(const struct pm_nl_pernet *pernet, const struct mptcp_addr_info *addr) -{ - const struct mptcp_pm_addr_entry *entry; - int ret = -1; - - rcu_read_lock(); - list_for_each_entry(entry, &pernet->local_addr_list, list) { - if (mptcp_addresses_equal(&entry->addr, addr, entry->addr.port)) { - ret = entry->addr.id; - break; - } - } - rcu_read_unlock(); - return ret; -} - static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) { struct sock *sk = (struct sock *)msk; @@ -523,13 +539,23 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) /* do lazy endpoint usage accounting for the MPC subflows */ if (unlikely(!(msk->pm.status & BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED))) && msk->first) { + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(msk->first); + struct mptcp_pm_addr_entry *entry; struct mptcp_addr_info mpc_addr; - int mpc_id; + bool backup = false; local_address((struct sock_common *)msk->first, &mpc_addr); - mpc_id = lookup_id_by_addr(pernet, &mpc_addr); - if (mpc_id >= 0) - __clear_bit(mpc_id, msk->pm.id_avail_bitmap); + rcu_read_lock(); + entry = __lookup_addr(pernet, &mpc_addr, false); + if (entry) { + __clear_bit(entry->addr.id, msk->pm.id_avail_bitmap); + msk->mpc_endpoint_id = entry->addr.id; + backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); + } + rcu_read_unlock(); + + if (backup) + mptcp_pm_send_ack(msk, subflow, true, backup); msk->pm.status |= BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED); } @@ -705,16 +731,8 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) return; subflow = list_first_entry_or_null(&msk->conn_list, typeof(*subflow), node); - if (subflow) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - - spin_unlock_bh(&msk->pm.lock); - pr_debug("send ack for %s", - mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr"); - - mptcp_subflow_send_ack(ssk); - spin_lock_bh(&msk->pm.lock); - } + if (subflow) + mptcp_pm_send_ack(msk, subflow, false, false); } int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, @@ -729,7 +747,6 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); struct mptcp_addr_info local, remote; - bool slow; local_address((struct sock_common *)ssk, &local); if (!mptcp_addresses_equal(&local, addr, addr->port)) @@ -741,23 +758,18 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, continue; } - slow = lock_sock_fast(ssk); - if (subflow->backup != bkup) - msk->last_snd = NULL; - subflow->backup = bkup; - subflow->send_mp_prio = 1; - subflow->request_bkup = bkup; - - pr_debug("send ack for mp_prio"); - __mptcp_subflow_send_ack(ssk); - unlock_sock_fast(ssk, slow); - + __mptcp_pm_send_ack(msk, subflow, true, bkup); return 0; } return -EINVAL; } +static bool mptcp_local_id_match(const struct mptcp_sock *msk, u8 local_id, u8 id) +{ + return local_id == id || (!local_id && msk->mpc_endpoint_id == id); +} + static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list, enum linux_mptcp_mib_field rm_type) @@ -781,6 +793,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, return; for (i = 0; i < rm_list->nr; i++) { + u8 rm_id = rm_list->ids[i]; bool removed = false; list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) { @@ -788,15 +801,15 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, int how = RCV_SHUTDOWN | SEND_SHUTDOWN; u8 id = subflow->local_id; - if (rm_type == MPTCP_MIB_RMADDR) - id = subflow->remote_id; - - if (rm_list->ids[i] != id) + if (rm_type == MPTCP_MIB_RMADDR && subflow->remote_id != rm_id) + continue; + if (rm_type == MPTCP_MIB_RMSUBFLOW && !mptcp_local_id_match(msk, id, rm_id)) continue; - pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u", + pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u", rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", - i, rm_list->ids[i], subflow->local_id, subflow->remote_id); + i, rm_id, subflow->local_id, subflow->remote_id, + msk->mpc_endpoint_id); spin_unlock_bh(&msk->pm.lock); mptcp_subflow_shutdown(sk, ssk, how); @@ -808,7 +821,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, __MPTCP_INC_STATS(sock_net(sk), rm_type); } if (rm_type == MPTCP_MIB_RMSUBFLOW) - __set_bit(rm_list->ids[i], msk->pm.id_avail_bitmap); + __set_bit(rm_id ? rm_id : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap); if (!removed) continue; @@ -907,10 +920,11 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, /* do not insert duplicate address, differentiate on port only * singled addresses */ + if (!address_use_port(entry)) + entry->addr.port = 0; list_for_each_entry(cur, &pernet->local_addr_list, list) { if (mptcp_addresses_equal(&cur->addr, &entry->addr, - address_use_port(entry) && - address_use_port(cur))) { + cur->addr.port || entry->addr.port)) { /* allow replacing the exiting endpoint only if such * endpoint is an implicit one and the user-space * did not provide an endpoint id @@ -956,7 +970,10 @@ find_next: } pernet->addrs++; - list_add_tail_rcu(&entry->list, &pernet->local_addr_list); + if (!entry->addr.port) + list_add_tail_rcu(&entry->list, &pernet->local_addr_list); + else + list_add_rcu(&entry->list, &pernet->local_addr_list); ret = entry->addr.id; out: @@ -1134,7 +1151,7 @@ void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ss } unlock_sock_fast(ssk, slow); - /* always try to push the pending data regarless of re-injections: + /* always try to push the pending data regardless of re-injections: * we can possibly use backup subflows now, and subflow selection * is cheap under the msk socket lock */ diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 21a3ed64226e..57f23f4e3a7c 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -167,8 +167,8 @@ static bool mptcp_ooo_try_coalesce(struct mptcp_sock *msk, struct sk_buff *to, static void __mptcp_rmem_reclaim(struct sock *sk, int amount) { - amount >>= SK_MEM_QUANTUM_SHIFT; - mptcp_sk(sk)->rmem_fwd_alloc -= amount << SK_MEM_QUANTUM_SHIFT; + amount >>= PAGE_SHIFT; + mptcp_sk(sk)->rmem_fwd_alloc -= amount << PAGE_SHIFT; __sk_mem_reduce_allocated(sk, amount); } @@ -181,8 +181,8 @@ static void mptcp_rmem_uncharge(struct sock *sk, int size) reclaimable = msk->rmem_fwd_alloc - sk_unused_reserved_mem(sk); /* see sk_mem_uncharge() for the rationale behind the following schema */ - if (unlikely(reclaimable >= SK_RECLAIM_THRESHOLD)) - __mptcp_rmem_reclaim(sk, SK_RECLAIM_CHUNK); + if (unlikely(reclaimable >= PAGE_SIZE)) + __mptcp_rmem_reclaim(sk, reclaimable); } static void mptcp_rfree(struct sk_buff *skb) @@ -323,20 +323,16 @@ static bool mptcp_rmem_schedule(struct sock *sk, struct sock *ssk, int size) struct mptcp_sock *msk = mptcp_sk(sk); int amt, amount; - if (size < msk->rmem_fwd_alloc) + if (size <= msk->rmem_fwd_alloc) return true; + size -= msk->rmem_fwd_alloc; amt = sk_mem_pages(size); - amount = amt << SK_MEM_QUANTUM_SHIFT; - msk->rmem_fwd_alloc += amount; - if (!__sk_mem_raise_allocated(sk, size, amt, SK_MEM_RECV)) { - if (ssk->sk_forward_alloc < amount) { - msk->rmem_fwd_alloc -= amount; - return false; - } + amount = amt << PAGE_SHIFT; + if (!__sk_mem_raise_allocated(sk, size, amt, SK_MEM_RECV)) + return false; - ssk->sk_forward_alloc -= amount; - } + msk->rmem_fwd_alloc += amount; return true; } @@ -512,7 +508,7 @@ void __mptcp_subflow_send_ack(struct sock *ssk) tcp_send_ack(ssk); } -void mptcp_subflow_send_ack(struct sock *ssk) +static void mptcp_subflow_send_ack(struct sock *ssk) { bool slow; @@ -971,25 +967,6 @@ static bool mptcp_frag_can_collapse_to(const struct mptcp_sock *msk, df->data_seq + df->data_len == msk->write_seq; } -static void __mptcp_mem_reclaim_partial(struct sock *sk) -{ - int reclaimable = mptcp_sk(sk)->rmem_fwd_alloc - sk_unused_reserved_mem(sk); - - lockdep_assert_held_once(&sk->sk_lock.slock); - - if (reclaimable > SK_MEM_QUANTUM) - __mptcp_rmem_reclaim(sk, reclaimable - 1); - - sk_mem_reclaim_partial(sk); -} - -static void mptcp_mem_reclaim_partial(struct sock *sk) -{ - mptcp_data_lock(sk); - __mptcp_mem_reclaim_partial(sk); - mptcp_data_unlock(sk); -} - static void dfrag_uncharge(struct sock *sk, int len) { sk_mem_uncharge(sk, len); @@ -1009,7 +986,6 @@ static void __mptcp_clean_una(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_data_frag *dtmp, *dfrag; - bool cleaned = false; u64 snd_una; /* on fallback we just need to ignore snd_una, as this is really @@ -1032,7 +1008,6 @@ static void __mptcp_clean_una(struct sock *sk) } dfrag_clear(sk, dfrag); - cleaned = true; } dfrag = mptcp_rtx_head(sk); @@ -1054,7 +1029,6 @@ static void __mptcp_clean_una(struct sock *sk) dfrag->already_sent -= delta; dfrag_uncharge(sk, delta); - cleaned = true; } /* all retransmitted data acked, recovery completed */ @@ -1062,9 +1036,6 @@ static void __mptcp_clean_una(struct sock *sk) msk->recovery = false; out: - if (cleaned && tcp_under_memory_pressure(sk)) - __mptcp_mem_reclaim_partial(sk); - if (snd_una == READ_ONCE(msk->snd_nxt) && snd_una == READ_ONCE(msk->write_seq)) { if (mptcp_timer_pending(sk) && !mptcp_data_fin_enabled(msk)) @@ -1216,12 +1187,6 @@ static struct sk_buff *mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, boo { gfp_t gfp = data_lock_held ? GFP_ATOMIC : sk->sk_allocation; - if (unlikely(tcp_under_memory_pressure(sk))) { - if (data_lock_held) - __mptcp_mem_reclaim_partial(sk); - else - mptcp_mem_reclaim_partial(sk); - } return __mptcp_alloc_tx_skb(sk, ssk, gfp); } @@ -3464,7 +3429,10 @@ static struct proto mptcp_prot = { .get_port = mptcp_get_port, .forward_alloc_get = mptcp_forward_alloc_get, .sockets_allocated = &mptcp_sockets_allocated, + .memory_allocated = &tcp_memory_allocated, + .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, + .memory_pressure = &tcp_memory_pressure, .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 480c5320b86e..5d6043c16b09 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -83,7 +83,6 @@ /* MPTCP MP_JOIN flags */ #define MPTCPOPT_BACKUP BIT(0) -#define MPTCPOPT_HMAC_LEN 20 #define MPTCPOPT_THMAC_LEN 8 /* MPTCP MP_CAPABLE flags */ @@ -283,6 +282,7 @@ struct mptcp_sock { bool use_64bit_ack; /* Set when we received a 64-bit DSN */ bool csum_enabled; bool allow_infinite_fallback; + u8 mpc_endpoint_id; u8 recvmsg_inq:1, cork:1, nodelay:1; @@ -608,7 +608,6 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how); void mptcp_close_ssk(struct sock *sk, struct sock *ssk, struct mptcp_subflow_context *subflow); void __mptcp_subflow_send_ack(struct sock *ssk); -void mptcp_subflow_send_ack(struct sock *ssk); void mptcp_subflow_reset(struct sock *ssk); void mptcp_subflow_queue_clean(struct sock *ssk); void mptcp_sock_graft(struct sock *sk, struct socket *parent); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 63e8892ec807..d4b16d033978 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1634,7 +1634,7 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock) /* the newly created socket really belongs to the owning MPTCP master * socket, even if for additional subflows the allocation is performed * by a kernel workqueue. Adjust inode references, so that the - * procfs/diag interaces really show this one belonging to the correct + * procfs/diag interfaces really show this one belonging to the correct * user. */ SOCK_INODE(sf)->i_ino = SOCK_INODE(sk->sk_socket)->i_ino; |