diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 1 | ||||
-rw-r--r-- | net/ipv4/nexthop.c | 82 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 70 |
3 files changed, 113 insertions, 40 deletions
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index f40b1b72f979..afaf582a5aa9 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -902,6 +902,7 @@ void inet_csk_prepare_forced_close(struct sock *sk) bh_unlock_sock(sk); sock_put(sk); inet_csk_prepare_for_destroy_sock(sk); + inet_sk(sk)->inet_num = 0; } EXPORT_SYMBOL(inet_csk_prepare_forced_close); diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 400a9f89ebdb..cc8049b100b2 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -247,12 +247,11 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh, if (nla_put_u32(skb, NHA_ID, nh->id)) goto nla_put_failure; - if (nh->is_fdb_nh && nla_put_flag(skb, NHA_FDB)) - goto nla_put_failure; - if (nh->is_group) { struct nh_group *nhg = rtnl_dereference(nh->nh_grp); + if (nhg->fdb_nh && nla_put_flag(skb, NHA_FDB)) + goto nla_put_failure; if (nla_put_nh_group(skb, nhg)) goto nla_put_failure; goto out; @@ -264,7 +263,10 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh, if (nla_put_flag(skb, NHA_BLACKHOLE)) goto nla_put_failure; goto out; - } else if (!nh->is_fdb_nh) { + } else if (nhi->fdb_nh) { + if (nla_put_flag(skb, NHA_FDB)) + goto nla_put_failure; + } else { const struct net_device *dev; dev = nhi->fib_nhc.nhc_dev; @@ -385,7 +387,7 @@ errout: } static bool valid_group_nh(struct nexthop *nh, unsigned int npaths, - struct netlink_ext_ack *extack) + bool *is_fdb, struct netlink_ext_ack *extack) { if (nh->is_group) { struct nh_group *nhg = rtnl_dereference(nh->nh_grp); @@ -398,6 +400,7 @@ static bool valid_group_nh(struct nexthop *nh, unsigned int npaths, "Multipath group can not be a nexthop within a group"); return false; } + *is_fdb = nhg->fdb_nh; } else { struct nh_info *nhi = rtnl_dereference(nh->nh_info); @@ -406,6 +409,7 @@ static bool valid_group_nh(struct nexthop *nh, unsigned int npaths, "Blackhole nexthop can not be used in a group with more than 1 path"); return false; } + *is_fdb = nhi->fdb_nh; } return true; @@ -416,12 +420,13 @@ static int nh_check_attr_fdb_group(struct nexthop *nh, u8 *nh_family, { struct nh_info *nhi; - if (!nh->is_fdb_nh) { + nhi = rtnl_dereference(nh->nh_info); + + if (!nhi->fdb_nh) { NL_SET_ERR_MSG(extack, "FDB nexthop group can only have fdb nexthops"); return -EINVAL; } - nhi = rtnl_dereference(nh->nh_info); if (*nh_family == AF_UNSPEC) { *nh_family = nhi->family; } else if (*nh_family != nhi->family) { @@ -473,19 +478,20 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[], nhg = nla_data(tb[NHA_GROUP]); for (i = 0; i < len; ++i) { struct nexthop *nh; + bool is_fdb_nh; nh = nexthop_find_by_id(net, nhg[i].id); if (!nh) { NL_SET_ERR_MSG(extack, "Invalid nexthop id"); return -EINVAL; } - if (!valid_group_nh(nh, len, extack)) + if (!valid_group_nh(nh, len, &is_fdb_nh, extack)) return -EINVAL; if (nhg_fdb && nh_check_attr_fdb_group(nh, &nh_family, extack)) return -EINVAL; - if (!nhg_fdb && nh->is_fdb_nh) { + if (!nhg_fdb && is_fdb_nh) { NL_SET_ERR_MSG(extack, "Non FDB nexthop group cannot have fdb nexthops"); return -EINVAL; } @@ -553,13 +559,13 @@ struct nexthop *nexthop_select_path(struct nexthop *nh, int hash) if (hash > atomic_read(&nhge->upper_bound)) continue; - if (nhge->nh->is_fdb_nh) + nhi = rcu_dereference(nhge->nh->nh_info); + if (nhi->fdb_nh) return nhge->nh; /* nexthops always check if it is good and does * not rely on a sysctl for this behavior */ - nhi = rcu_dereference(nhge->nh->nh_info); switch (nhi->family) { case AF_INET: if (ipv4_good_nh(&nhi->fib_nh)) @@ -624,11 +630,7 @@ int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg, struct netlink_ext_ack *extack) { struct nh_info *nhi; - - if (nh->is_fdb_nh) { - NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop"); - return -EINVAL; - } + bool is_fdb_nh; /* fib6_src is unique to a fib6_info and limits the ability to cache * routes in fib6_nh within a nexthop that is potentially shared @@ -645,10 +647,17 @@ int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg, nhg = rtnl_dereference(nh->nh_grp); if (nhg->has_v4) goto no_v4_nh; + is_fdb_nh = nhg->fdb_nh; } else { nhi = rtnl_dereference(nh->nh_info); if (nhi->family == AF_INET) goto no_v4_nh; + is_fdb_nh = nhi->fdb_nh; + } + + if (is_fdb_nh) { + NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop"); + return -EINVAL; } return 0; @@ -677,12 +686,9 @@ static int fib6_check_nh_list(struct nexthop *old, struct nexthop *new, return fib6_check_nexthop(new, NULL, extack); } -static int nexthop_check_scope(struct nexthop *nh, u8 scope, +static int nexthop_check_scope(struct nh_info *nhi, u8 scope, struct netlink_ext_ack *extack) { - struct nh_info *nhi; - - nhi = rtnl_dereference(nh->nh_info); if (scope == RT_SCOPE_HOST && nhi->fib_nhc.nhc_gw_family) { NL_SET_ERR_MSG(extack, "Route with host scope can not have a gateway"); @@ -704,29 +710,38 @@ static int nexthop_check_scope(struct nexthop *nh, u8 scope, int fib_check_nexthop(struct nexthop *nh, u8 scope, struct netlink_ext_ack *extack) { + struct nh_info *nhi; int err = 0; - if (nh->is_fdb_nh) { - NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop"); - err = -EINVAL; - goto out; - } - if (nh->is_group) { struct nh_group *nhg; + nhg = rtnl_dereference(nh->nh_grp); + if (nhg->fdb_nh) { + NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop"); + err = -EINVAL; + goto out; + } + if (scope == RT_SCOPE_HOST) { NL_SET_ERR_MSG(extack, "Route with host scope can not have multiple nexthops"); err = -EINVAL; goto out; } - nhg = rtnl_dereference(nh->nh_grp); /* all nexthops in a group have the same scope */ - err = nexthop_check_scope(nhg->nh_entries[0].nh, scope, extack); + nhi = rtnl_dereference(nhg->nh_entries[0].nh->nh_info); + err = nexthop_check_scope(nhi, scope, extack); } else { - err = nexthop_check_scope(nh, scope, extack); + nhi = rtnl_dereference(nh->nh_info); + if (nhi->fdb_nh) { + NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop"); + err = -EINVAL; + goto out; + } + err = nexthop_check_scope(nhi, scope, extack); } + out: return err; } @@ -787,6 +802,7 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge, newg->has_v4 = nhg->has_v4; newg->mpath = nhg->mpath; + newg->fdb_nh = nhg->fdb_nh; newg->num_nh = nhg->num_nh; /* copy old entries to new except the one getting removed */ @@ -1216,7 +1232,7 @@ static struct nexthop *nexthop_create_group(struct net *net, } if (cfg->nh_fdb) - nh->is_fdb_nh = 1; + nhg->fdb_nh = 1; rcu_assign_pointer(nh->nh_grp, nhg); @@ -1255,7 +1271,7 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh, goto out; } - if (nh->is_fdb_nh) + if (nhi->fdb_nh) goto out; /* sets nh_dev if successful */ @@ -1326,7 +1342,7 @@ static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg, nhi->fib_nhc.nhc_scope = RT_SCOPE_LINK; if (cfg->nh_fdb) - nh->is_fdb_nh = 1; + nhi->fdb_nh = 1; if (cfg->nh_blackhole) { nhi->reject_nh = 1; @@ -1349,7 +1365,7 @@ static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg, } /* add the entry to the device based hash */ - if (!nh->is_fdb_nh) + if (!nhi->fdb_nh) nexthop_devhash_add(net, nhi); rcu_assign_pointer(nh->nh_info, nhi); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 15d47d5e7951..ecbba0abd3e5 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1742,14 +1742,48 @@ int tcp_mmap(struct file *file, struct socket *sock, } EXPORT_SYMBOL(tcp_mmap); +static int tcp_zerocopy_vm_insert_batch(struct vm_area_struct *vma, + struct page **pages, + unsigned long pages_to_map, + unsigned long *insert_addr, + u32 *length_with_pending, + u32 *seq, + struct tcp_zerocopy_receive *zc) +{ + unsigned long pages_remaining = pages_to_map; + int bytes_mapped; + int ret; + + ret = vm_insert_pages(vma, *insert_addr, pages, &pages_remaining); + bytes_mapped = PAGE_SIZE * (pages_to_map - pages_remaining); + /* Even if vm_insert_pages fails, it may have partially succeeded in + * mapping (some but not all of the pages). + */ + *seq += bytes_mapped; + *insert_addr += bytes_mapped; + if (ret) { + /* But if vm_insert_pages did fail, we have to unroll some state + * we speculatively touched before. + */ + const int bytes_not_mapped = PAGE_SIZE * pages_remaining; + *length_with_pending -= bytes_not_mapped; + zc->recv_skip_hint += bytes_not_mapped; + } + return ret; +} + static int tcp_zerocopy_receive(struct sock *sk, struct tcp_zerocopy_receive *zc) { unsigned long address = (unsigned long)zc->address; u32 length = 0, seq, offset, zap_len; + #define PAGE_BATCH_SIZE 8 + struct page *pages[PAGE_BATCH_SIZE]; const skb_frag_t *frags = NULL; struct vm_area_struct *vma; struct sk_buff *skb = NULL; + unsigned long pg_idx = 0; + unsigned long curr_addr; struct tcp_sock *tp; int inq; int ret; @@ -1762,6 +1796,8 @@ static int tcp_zerocopy_receive(struct sock *sk, sock_rps_record_flow(sk); + tp = tcp_sk(sk); + down_read(¤t->mm->mmap_sem); vma = find_vma(current->mm, address); @@ -1771,7 +1807,6 @@ static int tcp_zerocopy_receive(struct sock *sk, } zc->length = min_t(unsigned long, zc->length, vma->vm_end - address); - tp = tcp_sk(sk); seq = tp->copied_seq; inq = tcp_inq(sk); zc->length = min_t(u32, zc->length, inq); @@ -1783,8 +1818,20 @@ static int tcp_zerocopy_receive(struct sock *sk, zc->recv_skip_hint = zc->length; } ret = 0; + curr_addr = address; while (length + PAGE_SIZE <= zc->length) { if (zc->recv_skip_hint < PAGE_SIZE) { + /* If we're here, finish the current batch. */ + if (pg_idx) { + ret = tcp_zerocopy_vm_insert_batch(vma, pages, + pg_idx, + &curr_addr, + &length, + &seq, zc); + if (ret) + goto out; + pg_idx = 0; + } if (skb) { if (zc->recv_skip_hint > 0) break; @@ -1793,7 +1840,6 @@ static int tcp_zerocopy_receive(struct sock *sk, } else { skb = tcp_recv_skb(sk, seq, &offset); } - zc->recv_skip_hint = skb->len - offset; offset -= skb_headlen(skb); if ((int)offset < 0 || skb_has_frag_list(skb)) @@ -1817,14 +1863,24 @@ static int tcp_zerocopy_receive(struct sock *sk, zc->recv_skip_hint -= remaining; break; } - ret = vm_insert_page(vma, address + length, - skb_frag_page(frags)); - if (ret) - break; + pages[pg_idx] = skb_frag_page(frags); + pg_idx++; length += PAGE_SIZE; - seq += PAGE_SIZE; zc->recv_skip_hint -= PAGE_SIZE; frags++; + if (pg_idx == PAGE_BATCH_SIZE) { + ret = tcp_zerocopy_vm_insert_batch(vma, pages, pg_idx, + &curr_addr, &length, + &seq, zc); + if (ret) + goto out; + pg_idx = 0; + } + } + if (pg_idx) { + ret = tcp_zerocopy_vm_insert_batch(vma, pages, pg_idx, + &curr_addr, &length, &seq, + zc); } out: up_read(¤t->mm->mmap_sem); |