diff options
Diffstat (limited to 'net/ipv4')
38 files changed, 767 insertions, 440 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 10e3751466b5..c8f7aee587d1 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -350,7 +350,7 @@ lookup_protocol: err = 0; sk->sk_no_check = answer_no_check; if (INET_PROTOSW_REUSE & answer_flags) - sk->sk_reuse = 1; + sk->sk_reuse = SK_CAN_REUSE; inet = inet_sk(sk); inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0; @@ -541,7 +541,7 @@ out: } EXPORT_SYMBOL(inet_bind); -int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr, +int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) { struct sock *sk = sock->sk; diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index fd508b526014..3a280756dd73 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -77,7 +77,7 @@ static inline struct scatterlist *ah_req_sg(struct crypto_ahash *ahash, static int ip_clear_mutable_options(const struct iphdr *iph, __be32 *daddr) { - unsigned char * optptr = (unsigned char*)(iph+1); + unsigned char *optptr = (unsigned char *)(iph+1); int l = iph->ihl*4 - sizeof(struct iphdr); int optlen; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 18d9b81ecb1a..373b56bf8f49 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1059,7 +1059,7 @@ static int arp_req_set(struct net *net, struct arpreq *r, neigh = __neigh_lookup_errno(&arp_tbl, &ip, dev); err = PTR_ERR(neigh); if (!IS_ERR(neigh)) { - unsigned state = NUD_STALE; + unsigned int state = NUD_STALE; if (r->arp_flags & ATF_PERM) state = NUD_PERMANENT; err = neigh_update(neigh, (r->arp_flags & ATF_COM) ? @@ -1071,7 +1071,7 @@ static int arp_req_set(struct net *net, struct arpreq *r, return err; } -static unsigned arp_state_to_flags(struct neighbour *neigh) +static unsigned int arp_state_to_flags(struct neighbour *neigh) { if (neigh->nud_state&NUD_PERMANENT) return ATF_PERM | ATF_COM; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 6e447ff94dfa..88c9e3f68c78 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1125,7 +1125,7 @@ skip: } } -static inline bool inetdev_valid_mtu(unsigned mtu) +static inline bool inetdev_valid_mtu(unsigned int mtu) { return mtu >= 68; } @@ -1266,17 +1266,15 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, ifm->ifa_scope = ifa->ifa_scope; ifm->ifa_index = ifa->ifa_dev->dev->ifindex; - if (ifa->ifa_address) - NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address); - - if (ifa->ifa_local) - NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local); - - if (ifa->ifa_broadcast) - NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast); - - if (ifa->ifa_label[0]) - NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label); + if ((ifa->ifa_address && + nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) || + (ifa->ifa_local && + nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) || + (ifa->ifa_broadcast && + nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) || + (ifa->ifa_label[0] && + nla_put_string(skb, IFA_LABEL, ifa->ifa_label))) + goto nla_put_failure; return nlmsg_end(skb, nlh); @@ -1587,7 +1585,6 @@ static int ipv4_doint_and_flush(ctl_table *ctl, int write, static struct devinet_sysctl_table { struct ctl_table_header *sysctl_header; struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX]; - char *dev_name; } devinet_sysctl = { .devinet_vars = { DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding", @@ -1629,16 +1626,7 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name, { int i; struct devinet_sysctl_table *t; - -#define DEVINET_CTL_PATH_DEV 3 - - struct ctl_path devinet_ctl_path[] = { - { .procname = "net", }, - { .procname = "ipv4", }, - { .procname = "conf", }, - { /* to be set */ }, - { }, - }; + char path[sizeof("net/ipv4/conf/") + IFNAMSIZ]; t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL); if (!t) @@ -1650,27 +1638,15 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name, t->devinet_vars[i].extra2 = net; } - /* - * Make a copy of dev_name, because '.procname' is regarded as const - * by sysctl and we wouldn't want anyone to change it under our feet - * (see SIOCSIFNAME). - */ - t->dev_name = kstrdup(dev_name, GFP_KERNEL); - if (!t->dev_name) - goto free; - - devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name; + snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name); - t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path, - t->devinet_vars); + t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars); if (!t->sysctl_header) - goto free_procname; + goto free; p->sysctl = t; return 0; -free_procname: - kfree(t->dev_name); free: kfree(t); out: @@ -1686,7 +1662,6 @@ static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf) cnf->sysctl = NULL; unregister_net_sysctl_table(t->sysctl_header); - kfree(t->dev_name); kfree(t); } @@ -1716,12 +1691,6 @@ static struct ctl_table ctl_forward_entry[] = { }, { }, }; - -static __net_initdata struct ctl_path net_ipv4_path[] = { - { .procname = "net", }, - { .procname = "ipv4", }, - { }, -}; #endif static __net_init int devinet_init_net(struct net *net) @@ -1767,7 +1736,7 @@ static __net_init int devinet_init_net(struct net *net) goto err_reg_dflt; err = -ENOMEM; - forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl); + forw_hdr = register_net_sysctl(net, "net/ipv4", tbl); if (forw_hdr == NULL) goto err_reg_ctl; net->ipv4.forw_hdr = forw_hdr; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index cbe3a68507cf..3854411fa37c 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -136,13 +136,13 @@ static void fib_flush(struct net *net) * Find address type as if only "dev" was present in the system. If * on_dev is NULL then all interfaces are taken into consideration. */ -static inline unsigned __inet_dev_addr_type(struct net *net, - const struct net_device *dev, - __be32 addr) +static inline unsigned int __inet_dev_addr_type(struct net *net, + const struct net_device *dev, + __be32 addr) { struct flowi4 fl4 = { .daddr = addr }; struct fib_result res; - unsigned ret = RTN_BROADCAST; + unsigned int ret = RTN_BROADCAST; struct fib_table *local_table; if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr)) @@ -740,7 +740,7 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim) #define BRD_OK 2 #define BRD0_OK 4 #define BRD1_OK 8 - unsigned ok = 0; + unsigned int ok = 0; int subnet = 0; /* Primary network */ int gone = 1; /* Address is missing */ int same_prefsrc = 0; /* Another primary with same IP */ diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 799fc790b3cf..2d043f71ef70 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -221,15 +221,15 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb, frh->src_len = rule4->src_len; frh->tos = rule4->tos; - if (rule4->dst_len) - NLA_PUT_BE32(skb, FRA_DST, rule4->dst); - - if (rule4->src_len) - NLA_PUT_BE32(skb, FRA_SRC, rule4->src); - + if ((rule4->dst_len && + nla_put_be32(skb, FRA_DST, rule4->dst)) || + (rule4->src_len && + nla_put_be32(skb, FRA_SRC, rule4->src))) + goto nla_put_failure; #ifdef CONFIG_IP_ROUTE_CLASSID - if (rule4->tclassid) - NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid); + if (rule4->tclassid && + nla_put_u32(skb, FRA_FLOW, rule4->tclassid)) + goto nla_put_failure; #endif return 0; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 5063fa38ac7b..a8bdf7405433 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -931,33 +931,36 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, rtm->rtm_table = tb_id; else rtm->rtm_table = RT_TABLE_COMPAT; - NLA_PUT_U32(skb, RTA_TABLE, tb_id); + if (nla_put_u32(skb, RTA_TABLE, tb_id)) + goto nla_put_failure; rtm->rtm_type = type; rtm->rtm_flags = fi->fib_flags; rtm->rtm_scope = fi->fib_scope; rtm->rtm_protocol = fi->fib_protocol; - if (rtm->rtm_dst_len) - NLA_PUT_BE32(skb, RTA_DST, dst); - - if (fi->fib_priority) - NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority); - + if (rtm->rtm_dst_len && + nla_put_be32(skb, RTA_DST, dst)) + goto nla_put_failure; + if (fi->fib_priority && + nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority)) + goto nla_put_failure; if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0) goto nla_put_failure; - if (fi->fib_prefsrc) - NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc); - + if (fi->fib_prefsrc && + nla_put_be32(skb, RTA_PREFSRC, fi->fib_prefsrc)) + goto nla_put_failure; if (fi->fib_nhs == 1) { - if (fi->fib_nh->nh_gw) - NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw); - - if (fi->fib_nh->nh_oif) - NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif); + if (fi->fib_nh->nh_gw && + nla_put_be32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw)) + goto nla_put_failure; + if (fi->fib_nh->nh_oif && + nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif)) + goto nla_put_failure; #ifdef CONFIG_IP_ROUTE_CLASSID - if (fi->fib_nh[0].nh_tclassid) - NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid); + if (fi->fib_nh[0].nh_tclassid && + nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid)) + goto nla_put_failure; #endif } #ifdef CONFIG_IP_ROUTE_MULTIPATH @@ -978,11 +981,13 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, rtnh->rtnh_hops = nh->nh_weight - 1; rtnh->rtnh_ifindex = nh->nh_oif; - if (nh->nh_gw) - NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw); + if (nh->nh_gw && + nla_put_be32(skb, RTA_GATEWAY, nh->nh_gw)) + goto nla_put_failure; #ifdef CONFIG_IP_ROUTE_CLASSID - if (nh->nh_tclassid) - NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid); + if (nh->nh_tclassid && + nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid)) + goto nla_put_failure; #endif /* length of rtnetlink header + attributes */ rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 5dfecfd7d5e9..6699f23e6f55 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -344,10 +344,10 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) pip->protocol = IPPROTO_IGMP; pip->tot_len = 0; /* filled in later */ ip_select_ident(pip, &rt->dst, NULL); - ((u8*)&pip[1])[0] = IPOPT_RA; - ((u8*)&pip[1])[1] = 4; - ((u8*)&pip[1])[2] = 0; - ((u8*)&pip[1])[3] = 0; + ((u8 *)&pip[1])[0] = IPOPT_RA; + ((u8 *)&pip[1])[1] = 4; + ((u8 *)&pip[1])[2] = 0; + ((u8 *)&pip[1])[3] = 0; skb->transport_header = skb->network_header + sizeof(struct iphdr) + 4; skb_put(skb, sizeof(*pig)); @@ -688,10 +688,10 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, iph->saddr = fl4.saddr; iph->protocol = IPPROTO_IGMP; ip_select_ident(iph, &rt->dst, NULL); - ((u8*)&iph[1])[0] = IPOPT_RA; - ((u8*)&iph[1])[1] = 4; - ((u8*)&iph[1])[2] = 0; - ((u8*)&iph[1])[3] = 0; + ((u8 *)&iph[1])[0] = IPOPT_RA; + ((u8 *)&iph[1])[1] = 4; + ((u8 *)&iph[1])[2] = 0; + ((u8 *)&iph[1])[3] = 0; ih = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); ih->type = type; @@ -774,7 +774,7 @@ static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs) if (psf->sf_count[MCAST_INCLUDE] || pmc->sfcount[MCAST_EXCLUDE] != psf->sf_count[MCAST_EXCLUDE]) - continue; + break; if (srcs[i] == psf->sf_inaddr) { scount++; break; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 19d66cefd7d3..95e61596e605 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -42,7 +42,8 @@ EXPORT_SYMBOL(sysctl_local_reserved_ports); void inet_get_local_port_range(int *low, int *high) { - unsigned seq; + unsigned int seq; + do { seq = read_seqbegin(&sysctl_local_ports.lock); @@ -53,7 +54,7 @@ void inet_get_local_port_range(int *low, int *high) EXPORT_SYMBOL(inet_get_local_port_range); int inet_csk_bind_conflict(const struct sock *sk, - const struct inet_bind_bucket *tb) + const struct inet_bind_bucket *tb, bool relax) { struct sock *sk2; struct hlist_node *node; @@ -79,6 +80,14 @@ int inet_csk_bind_conflict(const struct sock *sk, sk2_rcv_saddr == sk_rcv_saddr(sk)) break; } + if (!relax && reuse && sk2->sk_reuse && + sk2->sk_state != TCP_LISTEN) { + const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); + + if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) || + sk2_rcv_saddr == sk_rcv_saddr(sk)) + break; + } } } return node != NULL; @@ -122,12 +131,13 @@ again: (tb->num_owners < smallest_size || smallest_size == -1)) { smallest_size = tb->num_owners; smallest_rover = rover; - if (atomic_read(&hashinfo->bsockets) > (high - low) + 1) { + if (atomic_read(&hashinfo->bsockets) > (high - low) + 1 && + !inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) { snum = smallest_rover; goto tb_found; } } - if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) { + if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) { snum = rover; goto tb_found; } @@ -172,18 +182,22 @@ have_snum: goto tb_not_found; tb_found: if (!hlist_empty(&tb->owners)) { + if (sk->sk_reuse == SK_FORCE_REUSE) + goto success; + if (tb->fastreuse > 0 && sk->sk_reuse && sk->sk_state != TCP_LISTEN && smallest_size == -1) { goto success; } else { ret = 1; - if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) { + if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) { if (sk->sk_reuse && sk->sk_state != TCP_LISTEN && smallest_size != -1 && --attempts >= 0) { spin_unlock(&head->lock); goto again; } + goto fail_unlock; } } @@ -514,7 +528,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, /* Normally all the openreqs are young and become mature * (i.e. converted to established socket) for first timeout. - * If synack was not acknowledged for 3 seconds, it means + * If synack was not acknowledged for 1 second, it means * one of the following things: synack was lost, ack was lost, * rtt is high or nobody planned to ack (i.e. synflood). * When server is a bit loaded, queue is populated with old @@ -555,8 +569,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, syn_ack_recalc(req, thresh, max_retries, queue->rskq_defer_accept, &expire, &resend); - if (req->rsk_ops->syn_ack_timeout) - req->rsk_ops->syn_ack_timeout(parent, req); + req->rsk_ops->syn_ack_timeout(parent, req); if (!expire && (!resend || !req->rsk_ops->rtx_syn_ack(parent, req, NULL) || diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 984ec656b03b..7880af970208 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -217,7 +217,7 @@ begin: } EXPORT_SYMBOL_GPL(__inet_lookup_listener); -struct sock * __inet_lookup_established(struct net *net, +struct sock *__inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo, const __be32 saddr, const __be16 sport, const __be32 daddr, const u16 hnum, diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 89168c6351ff..543ef6225458 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -263,7 +263,7 @@ rescan: void inet_twdr_hangman(unsigned long data) { struct inet_timewait_death_row *twdr; - int unsigned need_timer; + unsigned int need_timer; twdr = (struct inet_timewait_death_row *)data; spin_lock(&twdr->death_lock); diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 29a07b6c7168..e5c44fc586ab 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -41,7 +41,7 @@ static int ip_forward_finish(struct sk_buff *skb) { - struct ip_options * opt = &(IPCB(skb)->opt); + struct ip_options *opt = &(IPCB(skb)->opt); IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); @@ -55,7 +55,7 @@ int ip_forward(struct sk_buff *skb) { struct iphdr *iph; /* Our header */ struct rtable *rt; /* Route we use */ - struct ip_options * opt = &(IPCB(skb)->opt); + struct ip_options *opt = &(IPCB(skb)->opt); if (skb_warn_if_lro(skb)) goto drop; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 3727e234c884..71e5c328176c 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -569,7 +569,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, skb_morph(head, qp->q.fragments); head->next = qp->q.fragments->next; - kfree_skb(qp->q.fragments); + consume_skb(qp->q.fragments); qp->q.fragments = head; } @@ -782,7 +782,7 @@ static int __net_init ip4_frags_ns_ctl_register(struct net *net) table[2].data = &net->ipv4.frags.timeout; } - hdr = register_net_sysctl_table(net, net_ipv4_ctl_path, table); + hdr = register_net_sysctl(net, "net/ipv4", table); if (hdr == NULL) goto err_reg; @@ -807,7 +807,7 @@ static void __net_exit ip4_frags_ns_ctl_unregister(struct net *net) static void ip4_frags_ctl_register(void) { - register_net_sysctl_rotable(net_ipv4_ctl_path, ip4_frags_ctl_table); + register_net_sysctl(&init_net, "net/ipv4", ip4_frags_ctl_table); } #else static inline int ip4_frags_ns_ctl_register(struct net *net) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index b57532d4742c..f49047b79609 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -169,37 +169,56 @@ struct ipgre_net { /* often modified stats are per cpu, other are shared (netdev->stats) */ struct pcpu_tstats { - unsigned long rx_packets; - unsigned long rx_bytes; - unsigned long tx_packets; - unsigned long tx_bytes; -} __attribute__((aligned(4*sizeof(unsigned long)))); + u64 rx_packets; + u64 rx_bytes; + u64 tx_packets; + u64 tx_bytes; + struct u64_stats_sync syncp; +}; -static struct net_device_stats *ipgre_get_stats(struct net_device *dev) +static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *tot) { - struct pcpu_tstats sum = { 0 }; int i; for_each_possible_cpu(i) { const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); - - sum.rx_packets += tstats->rx_packets; - sum.rx_bytes += tstats->rx_bytes; - sum.tx_packets += tstats->tx_packets; - sum.tx_bytes += tstats->tx_bytes; + u64 rx_packets, rx_bytes, tx_packets, tx_bytes; + unsigned int start; + + do { + start = u64_stats_fetch_begin_bh(&tstats->syncp); + rx_packets = tstats->rx_packets; + tx_packets = tstats->tx_packets; + rx_bytes = tstats->rx_bytes; + tx_bytes = tstats->tx_bytes; + } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); + + tot->rx_packets += rx_packets; + tot->tx_packets += tx_packets; + tot->rx_bytes += rx_bytes; + tot->tx_bytes += tx_bytes; } - dev->stats.rx_packets = sum.rx_packets; - dev->stats.rx_bytes = sum.rx_bytes; - dev->stats.tx_packets = sum.tx_packets; - dev->stats.tx_bytes = sum.tx_bytes; - return &dev->stats; + + tot->multicast = dev->stats.multicast; + tot->rx_crc_errors = dev->stats.rx_crc_errors; + tot->rx_fifo_errors = dev->stats.rx_fifo_errors; + tot->rx_length_errors = dev->stats.rx_length_errors; + tot->rx_errors = dev->stats.rx_errors; + tot->tx_fifo_errors = dev->stats.tx_fifo_errors; + tot->tx_carrier_errors = dev->stats.tx_carrier_errors; + tot->tx_dropped = dev->stats.tx_dropped; + tot->tx_aborted_errors = dev->stats.tx_aborted_errors; + tot->tx_errors = dev->stats.tx_errors; + + return tot; } /* Given src, dst and key, find appropriate for input tunnel. */ -static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev, - __be32 remote, __be32 local, - __be32 key, __be16 gre_proto) +static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev, + __be32 remote, __be32 local, + __be32 key, __be16 gre_proto) { struct net *net = dev_net(dev); int link = dev->ifindex; @@ -464,7 +483,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info) */ const struct iphdr *iph = (const struct iphdr *)skb->data; - __be16 *p = (__be16*)(skb->data+(iph->ihl<<2)); + __be16 *p = (__be16 *)(skb->data+(iph->ihl<<2)); int grehlen = (iph->ihl<<2) + 4; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; @@ -574,7 +593,7 @@ static int ipgre_rcv(struct sk_buff *skb) iph = ip_hdr(skb); h = skb->data; - flags = *(__be16*)h; + flags = *(__be16 *)h; if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) { /* - Version must be 0. @@ -598,11 +617,11 @@ static int ipgre_rcv(struct sk_buff *skb) offset += 4; } if (flags&GRE_KEY) { - key = *(__be32*)(h + offset); + key = *(__be32 *)(h + offset); offset += 4; } if (flags&GRE_SEQ) { - seqno = ntohl(*(__be32*)(h + offset)); + seqno = ntohl(*(__be32 *)(h + offset)); offset += 4; } } @@ -672,8 +691,10 @@ static int ipgre_rcv(struct sk_buff *skb) } tstats = this_cpu_ptr(tunnel->dev->tstats); + u64_stats_update_begin(&tstats->syncp); tstats->rx_packets++; tstats->rx_bytes += skb->len; + u64_stats_update_end(&tstats->syncp); __skb_tunnel_rx(skb, tunnel->dev); @@ -900,7 +921,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev htons(ETH_P_TEB) : skb->protocol; if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { - __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4); + __be32 *ptr = (__be32 *)(((u8 *)iph) + tunnel->hlen - 4); if (tunnel->parms.o_flags&GRE_SEQ) { ++tunnel->o_seqno; @@ -913,7 +934,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev } if (tunnel->parms.o_flags&GRE_CSUM) { *ptr = 0; - *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr)); + *(__sum16 *)ptr = ip_compute_csum((void *)(iph+1), skb->len - sizeof(struct iphdr)); } } @@ -1169,7 +1190,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, { struct ip_tunnel *t = netdev_priv(dev); struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); - __be16 *p = (__be16*)(iph+1); + __be16 *p = (__be16 *)(iph+1); memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); p[0] = t->parms.o_flags; @@ -1253,7 +1274,7 @@ static const struct net_device_ops ipgre_netdev_ops = { .ndo_start_xmit = ipgre_tunnel_xmit, .ndo_do_ioctl = ipgre_tunnel_ioctl, .ndo_change_mtu = ipgre_tunnel_change_mtu, - .ndo_get_stats = ipgre_get_stats, + .ndo_get_stats64 = ipgre_get_stats64, }; static void ipgre_dev_free(struct net_device *dev) @@ -1507,7 +1528,7 @@ static const struct net_device_ops ipgre_tap_netdev_ops = { .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = ipgre_tunnel_change_mtu, - .ndo_get_stats = ipgre_get_stats, + .ndo_get_stats64 = ipgre_get_stats64, }; static void ipgre_tap_setup(struct net_device *dev) @@ -1654,17 +1675,18 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_parm *p = &t->parms; - NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link); - NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags); - NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags); - NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key); - NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key); - NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr); - NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr); - NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl); - NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos); - NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF))); - + if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || + nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) || + nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) || + nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || + nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || + nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) || + nla_put_be32(skb, IFLA_GRE_REMOTE, p->iph.daddr) || + nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) || + nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) || + nla_put_u8(skb, IFLA_GRE_PMTUDISC, + !!(p->iph.frag_off & htons(IP_DF)))) + goto nla_put_failure; return 0; nla_put_failure: diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index a0d0d9d9b870..95722ed0e5bb 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -210,10 +210,10 @@ int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb) * Simple and stupid 8), but the most efficient way. */ -void ip_options_fragment(struct sk_buff * skb) +void ip_options_fragment(struct sk_buff *skb) { unsigned char *optptr = skb_network_header(skb) + sizeof(struct iphdr); - struct ip_options * opt = &(IPCB(skb)->opt); + struct ip_options *opt = &(IPCB(skb)->opt); int l = opt->optlen; int optlen; @@ -248,13 +248,13 @@ void ip_options_fragment(struct sk_buff * skb) */ int ip_options_compile(struct net *net, - struct ip_options * opt, struct sk_buff * skb) + struct ip_options *opt, struct sk_buff *skb) { int l; - unsigned char * iph; - unsigned char * optptr; + unsigned char *iph; + unsigned char *optptr; int optlen; - unsigned char * pp_ptr = NULL; + unsigned char *pp_ptr = NULL; struct rtable *rt = NULL; if (skb != NULL) { @@ -413,7 +413,7 @@ int ip_options_compile(struct net *net, opt->is_changed = 1; } } else { - unsigned overflow = optptr[3]>>4; + unsigned int overflow = optptr[3]>>4; if (overflow == 15) { pp_ptr = optptr + 3; goto error; @@ -473,20 +473,20 @@ EXPORT_SYMBOL(ip_options_compile); * Undo all the changes done by ip_options_compile(). */ -void ip_options_undo(struct ip_options * opt) +void ip_options_undo(struct ip_options *opt) { if (opt->srr) { - unsigned char * optptr = opt->__data+opt->srr-sizeof(struct iphdr); + unsigned char *optptr = opt->__data+opt->srr-sizeof(struct iphdr); memmove(optptr+7, optptr+3, optptr[1]-7); memcpy(optptr+3, &opt->faddr, 4); } if (opt->rr_needaddr) { - unsigned char * optptr = opt->__data+opt->rr-sizeof(struct iphdr); + unsigned char *optptr = opt->__data+opt->rr-sizeof(struct iphdr); optptr[2] -= 4; memset(&optptr[optptr[2]-1], 0, 4); } if (opt->ts) { - unsigned char * optptr = opt->__data+opt->ts-sizeof(struct iphdr); + unsigned char *optptr = opt->__data+opt->ts-sizeof(struct iphdr); if (opt->ts_needtime) { optptr[2] -= 4; memset(&optptr[optptr[2]-1], 0, 4); @@ -549,8 +549,8 @@ int ip_options_get(struct net *net, struct ip_options_rcu **optp, void ip_forward_options(struct sk_buff *skb) { - struct ip_options * opt = &(IPCB(skb)->opt); - unsigned char * optptr; + struct ip_options *opt = &(IPCB(skb)->opt); + unsigned char *optptr; struct rtable *rt = skb_rtable(skb); unsigned char *raw = skb_network_header(skb); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 2fd0fba77124..51c6c672c8aa 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -90,7 +90,7 @@ static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb) static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb) { unsigned char optbuf[sizeof(struct ip_options) + 40]; - struct ip_options * opt = (struct ip_options *)optbuf; + struct ip_options *opt = (struct ip_options *)optbuf; if (IPCB(skb)->opt.optlen == 0) return; @@ -147,7 +147,7 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) { struct inet_sock *inet = inet_sk(skb->sk); - unsigned flags = inet->cmsg_flags; + unsigned int flags = inet->cmsg_flags; /* Ordered by supposed usage frequency */ if (flags & 1) @@ -1094,7 +1094,7 @@ EXPORT_SYMBOL(compat_ip_setsockopt); */ static int do_ip_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen, unsigned flags) + char __user *optval, int __user *optlen, unsigned int flags) { struct inet_sock *inet = inet_sk(sk); int val; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 92ac7e7363a0..f267280d8709 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1198,7 +1198,7 @@ static int __init ic_dynamic(void) d = ic_first_dev; retries = CONF_SEND_RETRIES; get_random_bytes(&timeout, sizeof(timeout)); - timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned) CONF_TIMEOUT_RANDOM); + timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned int) CONF_TIMEOUT_RANDOM); for (;;) { /* Track the device we are configuring */ ic_dev_xid = d->xid; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index ae1413e3f2f8..2d0f99bf61b3 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -144,33 +144,48 @@ static void ipip_dev_free(struct net_device *dev); /* often modified stats are per cpu, other are shared (netdev->stats) */ struct pcpu_tstats { - unsigned long rx_packets; - unsigned long rx_bytes; - unsigned long tx_packets; - unsigned long tx_bytes; -} __attribute__((aligned(4*sizeof(unsigned long)))); + u64 rx_packets; + u64 rx_bytes; + u64 tx_packets; + u64 tx_bytes; + struct u64_stats_sync syncp; +}; -static struct net_device_stats *ipip_get_stats(struct net_device *dev) +static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *tot) { - struct pcpu_tstats sum = { 0 }; int i; for_each_possible_cpu(i) { const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); - - sum.rx_packets += tstats->rx_packets; - sum.rx_bytes += tstats->rx_bytes; - sum.tx_packets += tstats->tx_packets; - sum.tx_bytes += tstats->tx_bytes; + u64 rx_packets, rx_bytes, tx_packets, tx_bytes; + unsigned int start; + + do { + start = u64_stats_fetch_begin_bh(&tstats->syncp); + rx_packets = tstats->rx_packets; + tx_packets = tstats->tx_packets; + rx_bytes = tstats->rx_bytes; + tx_bytes = tstats->tx_bytes; + } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); + + tot->rx_packets += rx_packets; + tot->tx_packets += tx_packets; + tot->rx_bytes += rx_bytes; + tot->tx_bytes += tx_bytes; } - dev->stats.rx_packets = sum.rx_packets; - dev->stats.rx_bytes = sum.rx_bytes; - dev->stats.tx_packets = sum.tx_packets; - dev->stats.tx_bytes = sum.tx_bytes; - return &dev->stats; + + tot->tx_fifo_errors = dev->stats.tx_fifo_errors; + tot->tx_carrier_errors = dev->stats.tx_carrier_errors; + tot->tx_dropped = dev->stats.tx_dropped; + tot->tx_aborted_errors = dev->stats.tx_aborted_errors; + tot->tx_errors = dev->stats.tx_errors; + tot->collisions = dev->stats.collisions; + + return tot; } -static struct ip_tunnel * ipip_tunnel_lookup(struct net *net, +static struct ip_tunnel *ipip_tunnel_lookup(struct net *net, __be32 remote, __be32 local) { unsigned int h0 = HASH(remote); @@ -245,7 +260,7 @@ static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t) rcu_assign_pointer(*tp, t); } -static struct ip_tunnel * ipip_tunnel_locate(struct net *net, +static struct ip_tunnel *ipip_tunnel_locate(struct net *net, struct ip_tunnel_parm *parms, int create) { __be32 remote = parms->iph.daddr; @@ -404,8 +419,10 @@ static int ipip_rcv(struct sk_buff *skb) skb->pkt_type = PACKET_HOST; tstats = this_cpu_ptr(tunnel->dev->tstats); + u64_stats_update_begin(&tstats->syncp); tstats->rx_packets++; tstats->rx_bytes += skb->len; + u64_stats_update_end(&tstats->syncp); __skb_tunnel_rx(skb, tunnel->dev); @@ -730,7 +747,7 @@ static const struct net_device_ops ipip_netdev_ops = { .ndo_start_xmit = ipip_tunnel_xmit, .ndo_do_ioctl = ipip_tunnel_ioctl, .ndo_change_mtu = ipip_tunnel_change_mtu, - .ndo_get_stats = ipip_get_stats, + .ndo_get_stats64 = ipip_get_stats64, }; static void ipip_dev_free(struct net_device *dev) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 960fbfc3e976..5bef604ac0fa 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2119,15 +2119,16 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, rtm->rtm_src_len = 32; rtm->rtm_tos = 0; rtm->rtm_table = mrt->id; - NLA_PUT_U32(skb, RTA_TABLE, mrt->id); + if (nla_put_u32(skb, RTA_TABLE, mrt->id)) + goto nla_put_failure; rtm->rtm_type = RTN_MULTICAST; rtm->rtm_scope = RT_SCOPE_UNIVERSE; rtm->rtm_protocol = RTPROT_UNSPEC; rtm->rtm_flags = 0; - NLA_PUT_BE32(skb, RTA_SRC, c->mfc_origin); - NLA_PUT_BE32(skb, RTA_DST, c->mfc_mcastgrp); - + if (nla_put_be32(skb, RTA_SRC, c->mfc_origin) || + nla_put_be32(skb, RTA_DST, c->mfc_mcastgrp)) + goto nla_put_failure; if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0) goto nla_put_failure; diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 4f47e064e262..ed1b36783192 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -12,7 +12,7 @@ #include <net/netfilter/nf_queue.h> /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ -int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) +int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type) { struct net *net = dev_net(skb_dst(skb)->dev); const struct iphdr *iph = ip_hdr(skb); @@ -237,13 +237,3 @@ static void ipv4_netfilter_fini(void) module_init(ipv4_netfilter_init); module_exit(ipv4_netfilter_fini); - -#ifdef CONFIG_SYSCTL -struct ctl_path nf_net_ipv4_netfilter_sysctl_path[] = { - { .procname = "net", }, - { .procname = "ipv4", }, - { .procname = "netfilter", }, - { } -}; -EXPORT_SYMBOL_GPL(nf_net_ipv4_netfilter_sysctl_path); -#endif /* CONFIG_SYSCTL */ diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index fd7a3f68917f..a3935273869f 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -303,7 +303,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, if (v < 0) { /* Pop from stack? */ if (v != XT_RETURN) { - verdict = (unsigned)(-v) - 1; + verdict = (unsigned int)(-v) - 1; break; } e = back; diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 94d45e1f8882..09775a1e1348 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -586,7 +586,7 @@ static int __init ip_queue_init(void) #endif register_netdevice_notifier(&ipq_dev_notifier); #ifdef CONFIG_SYSCTL - ipq_sysctl_header = register_sysctl_paths(net_ipv4_ctl_path, ipq_table); + ipq_sysctl_header = register_net_sysctl(&init_net, "net/ipv4", ipq_table); #endif status = nf_register_queue_handler(NFPROTO_IPV4, &nfqh); if (status < 0) { @@ -597,7 +597,7 @@ static int __init ip_queue_init(void) cleanup_sysctl: #ifdef CONFIG_SYSCTL - unregister_sysctl_table(ipq_sysctl_header); + unregister_net_sysctl_table(ipq_sysctl_header); #endif unregister_netdevice_notifier(&ipq_dev_notifier); proc_net_remove(&init_net, IPQ_PROC_FS_NAME); @@ -618,7 +618,7 @@ static void __exit ip_queue_fini(void) ipq_flush(NULL, 0); #ifdef CONFIG_SYSCTL - unregister_sysctl_table(ipq_sysctl_header); + unregister_net_sysctl_table(ipq_sysctl_header); #endif unregister_netdevice_notifier(&ipq_dev_notifier); proc_net_remove(&init_net, IPQ_PROC_FS_NAME); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 24e556e83a3b..585b80f3cc68 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -377,7 +377,7 @@ ipt_do_table(struct sk_buff *skb, if (v < 0) { /* Pop from stack? */ if (v != XT_RETURN) { - verdict = (unsigned)(-v) - 1; + verdict = (unsigned int)(-v) - 1; break; } if (*stackptr <= origptr) { diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index cf73cc70ed2d..91747d4ebc26 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -311,8 +311,9 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) static int ipv4_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple) { - NLA_PUT_BE32(skb, CTA_IP_V4_SRC, tuple->src.u3.ip); - NLA_PUT_BE32(skb, CTA_IP_V4_DST, tuple->dst.u3.ip); + if (nla_put_be32(skb, CTA_IP_V4_SRC, tuple->src.u3.ip) || + nla_put_be32(skb, CTA_IP_V4_DST, tuple->dst.u3.ip)) + goto nla_put_failure; return 0; nla_put_failure: @@ -364,7 +365,7 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { .nla_policy = ipv4_nla_policy, #endif #if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) - .ctl_table_path = nf_net_ipv4_netfilter_sysctl_path, + .ctl_table_path = "net/ipv4/netfilter", .ctl_table = ip_ct_sysctl_table, #endif .me = THIS_MODULE, diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 7cbe9cb261c2..0847e373d33c 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -228,10 +228,10 @@ icmp_error(struct net *net, struct nf_conn *tmpl, static int icmp_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *t) { - NLA_PUT_BE16(skb, CTA_PROTO_ICMP_ID, t->src.u.icmp.id); - NLA_PUT_U8(skb, CTA_PROTO_ICMP_TYPE, t->dst.u.icmp.type); - NLA_PUT_U8(skb, CTA_PROTO_ICMP_CODE, t->dst.u.icmp.code); - + if (nla_put_be16(skb, CTA_PROTO_ICMP_ID, t->src.u.icmp.id) || + nla_put_u8(skb, CTA_PROTO_ICMP_TYPE, t->dst.u.icmp.type) || + nla_put_u8(skb, CTA_PROTO_ICMP_CODE, t->dst.u.icmp.code)) + goto nla_put_failure; return 0; nla_put_failure: @@ -293,8 +293,8 @@ icmp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) { const unsigned int *timeout = data; - NLA_PUT_BE32(skb, CTA_TIMEOUT_ICMP_TIMEOUT, htonl(*timeout / HZ)); - + if (nla_put_be32(skb, CTA_TIMEOUT_ICMP_TIMEOUT, htonl(*timeout / HZ))) + goto nla_put_failure; return 0; nla_put_failure: diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index 57932c43960e..ea4a23813d26 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -283,7 +283,7 @@ static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int dataoff, __be32 newip; u_int16_t port; char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; - unsigned buflen; + unsigned int buflen; /* Connection will come from reply */ if (ct->tuplehash[dir].tuple.src.u3.ip == ct->tuplehash[!dir].tuple.dst.u3.ip) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 50009c787bcd..6e930c7174dd 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -51,15 +51,16 @@ static struct ping_table ping_table; static u16 ping_port_rover; -static inline int ping_hashfn(struct net *net, unsigned num, unsigned mask) +static inline int ping_hashfn(struct net *net, unsigned int num, unsigned int mask) { int res = (num + net_hash_mix(net)) & mask; + pr_debug("hash(%d) = %d\n", num, res); return res; } static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table, - struct net *net, unsigned num) + struct net *net, unsigned int num) { return &table->hash[ping_hashfn(net, num, PING_HTABLE_MASK)]; } @@ -188,7 +189,8 @@ static void inet_get_ping_group_range_net(struct net *net, gid_t *low, gid_t *high) { gid_t *data = net->ipv4.sysctl_ping_group_range; - unsigned seq; + unsigned int seq; + do { seq = read_seqbegin(&sysctl_local_ports.lock); @@ -410,7 +412,7 @@ struct pingfakehdr { __wsum wcheck; }; -static int ping_getfrag(void *from, char * to, +static int ping_getfrag(void *from, char *to, int offset, int fraglen, int odd, struct sk_buff *skb) { struct pingfakehdr *pfh = (struct pingfakehdr *)from; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index bbd604c68e68..4032b818f3e4 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -288,7 +288,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info) read_unlock(&raw_v4_hashinfo.lock); } -static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb) +static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb) { /* Charge it to the socket. */ diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 167ea10b521a..5773f5d9e213 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -109,6 +109,7 @@ #include <net/rtnetlink.h> #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> +#include <linux/kmemleak.h> #endif #include <net/secure_seq.h> @@ -229,7 +230,7 @@ const __u8 ip_tos2prio[16] = { TC_PRIO_INTERACTIVE_BULK, ECN_OR_COST(INTERACTIVE_BULK) }; - +EXPORT_SYMBOL(ip_tos2prio); /* * Route cache. @@ -296,7 +297,7 @@ static inline void rt_hash_lock_init(void) #endif static struct rt_hash_bucket *rt_hash_table __read_mostly; -static unsigned rt_hash_mask __read_mostly; +static unsigned int rt_hash_mask __read_mostly; static unsigned int rt_hash_log __read_mostly; static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); @@ -1143,7 +1144,7 @@ static int rt_bind_neighbour(struct rtable *rt) return 0; } -static struct rtable *rt_intern_hash(unsigned hash, struct rtable *rt, +static struct rtable *rt_intern_hash(unsigned int hash, struct rtable *rt, struct sk_buff *skb, int ifindex) { struct rtable *rth, *cand; @@ -1384,7 +1385,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) } EXPORT_SYMBOL(__ip_select_ident); -static void rt_del(unsigned hash, struct rtable *rt) +static void rt_del(unsigned int hash, struct rtable *rt) { struct rtable __rcu **rthp; struct rtable *aux; @@ -1538,7 +1539,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) ip_rt_put(rt); ret = NULL; } else if (rt->rt_flags & RTCF_REDIRECTED) { - unsigned hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, + unsigned int hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, rt->rt_oif, rt_genid(dev_net(dst->dev))); rt_del(hash, rt); @@ -2215,9 +2216,9 @@ static int ip_mkroute_input(struct sk_buff *skb, struct in_device *in_dev, __be32 daddr, __be32 saddr, u32 tos) { - struct rtable* rth = NULL; + struct rtable *rth = NULL; int err; - unsigned hash; + unsigned int hash; #ifdef CONFIG_IP_ROUTE_MULTIPATH if (res->fi && res->fi->fib_nhs > 1) @@ -2255,13 +2256,13 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, struct fib_result res; struct in_device *in_dev = __in_dev_get_rcu(dev); struct flowi4 fl4; - unsigned flags = 0; + unsigned int flags = 0; u32 itag = 0; - struct rtable * rth; - unsigned hash; + struct rtable *rth; + unsigned int hash; __be32 spec_dst; int err = -EINVAL; - struct net * net = dev_net(dev); + struct net *net = dev_net(dev); /* IP on this device is disabled. */ @@ -2433,8 +2434,8 @@ martian_source_keep_err: int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, u8 tos, struct net_device *dev, bool noref) { - struct rtable * rth; - unsigned hash; + struct rtable *rth; + unsigned int hash; int iif = dev->ifindex; struct net *net; int res; @@ -2972,7 +2973,8 @@ static int rt_fill_info(struct net *net, r->rtm_src_len = 0; r->rtm_tos = rt->rt_key_tos; r->rtm_table = RT_TABLE_MAIN; - NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); + if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN)) + goto nla_put_failure; r->rtm_type = rt->rt_type; r->rtm_scope = RT_SCOPE_UNIVERSE; r->rtm_protocol = RTPROT_UNSPEC; @@ -2980,31 +2982,38 @@ static int rt_fill_info(struct net *net, if (rt->rt_flags & RTCF_NOTIFY) r->rtm_flags |= RTM_F_NOTIFY; - NLA_PUT_BE32(skb, RTA_DST, rt->rt_dst); - + if (nla_put_be32(skb, RTA_DST, rt->rt_dst)) + goto nla_put_failure; if (rt->rt_key_src) { r->rtm_src_len = 32; - NLA_PUT_BE32(skb, RTA_SRC, rt->rt_key_src); + if (nla_put_be32(skb, RTA_SRC, rt->rt_key_src)) + goto nla_put_failure; } - if (rt->dst.dev) - NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex); + if (rt->dst.dev && + nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) + goto nla_put_failure; #ifdef CONFIG_IP_ROUTE_CLASSID - if (rt->dst.tclassid) - NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid); + if (rt->dst.tclassid && + nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid)) + goto nla_put_failure; #endif - if (rt_is_input_route(rt)) - NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); - else if (rt->rt_src != rt->rt_key_src) - NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src); - - if (rt->rt_dst != rt->rt_gateway) - NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway); + if (rt_is_input_route(rt)) { + if (nla_put_be32(skb, RTA_PREFSRC, rt->rt_spec_dst)) + goto nla_put_failure; + } else if (rt->rt_src != rt->rt_key_src) { + if (nla_put_be32(skb, RTA_PREFSRC, rt->rt_src)) + goto nla_put_failure; + } + if (rt->rt_dst != rt->rt_gateway && + nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway)) + goto nla_put_failure; if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) goto nla_put_failure; - if (rt->rt_mark) - NLA_PUT_BE32(skb, RTA_MARK, rt->rt_mark); + if (rt->rt_mark && + nla_put_be32(skb, RTA_MARK, rt->rt_mark)) + goto nla_put_failure; error = rt->dst.error; if (peer) { @@ -3045,7 +3054,8 @@ static int rt_fill_info(struct net *net, } } else #endif - NLA_PUT_U32(skb, RTA_IIF, rt->rt_iif); + if (nla_put_u32(skb, RTA_IIF, rt->rt_iif)) + goto nla_put_failure; } if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage, @@ -3059,7 +3069,7 @@ nla_put_failure: return -EMSGSIZE; } -static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) +static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(in_skb->sk); struct rtmsg *rtm; @@ -3334,23 +3344,6 @@ static ctl_table ipv4_route_table[] = { { } }; -static struct ctl_table empty[1]; - -static struct ctl_table ipv4_skeleton[] = -{ - { .procname = "route", - .mode = 0555, .child = ipv4_route_table}, - { .procname = "neigh", - .mode = 0555, .child = empty}, - { } -}; - -static __net_initdata struct ctl_path ipv4_path[] = { - { .procname = "net", }, - { .procname = "ipv4", }, - { }, -}; - static struct ctl_table ipv4_route_flush_table[] = { { .procname = "flush", @@ -3361,13 +3354,6 @@ static struct ctl_table ipv4_route_flush_table[] = { { }, }; -static __net_initdata struct ctl_path ipv4_route_path[] = { - { .procname = "net", }, - { .procname = "ipv4", }, - { .procname = "route", }, - { }, -}; - static __net_init int sysctl_route_net_init(struct net *net) { struct ctl_table *tbl; @@ -3380,8 +3366,7 @@ static __net_init int sysctl_route_net_init(struct net *net) } tbl[0].extra1 = net; - net->ipv4.route_hdr = - register_net_sysctl_table(net, ipv4_route_path, tbl); + net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl); if (net->ipv4.route_hdr == NULL) goto err_reg; return 0; @@ -3505,6 +3490,6 @@ int __init ip_rt_init(void) */ void __init ip_static_sysctl_init(void) { - register_sysctl_paths(ipv4_path, ipv4_skeleton); + register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table); } #endif diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 7a7724da9bff..33417f84e07f 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -78,7 +78,7 @@ static int ipv4_local_port_range(ctl_table *table, int write, static void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t *high) { gid_t *data = table->data; - unsigned seq; + unsigned int seq; do { seq = read_seqbegin(&sysctl_local_ports.lock); @@ -768,13 +768,6 @@ static struct ctl_table ipv4_net_table[] = { { } }; -struct ctl_path net_ipv4_ctl_path[] = { - { .procname = "net", }, - { .procname = "ipv4", }, - { }, -}; -EXPORT_SYMBOL_GPL(net_ipv4_ctl_path); - static __net_init int ipv4_sysctl_init_net(struct net *net) { struct ctl_table *table; @@ -815,8 +808,7 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) tcp_init_mem(net); - net->ipv4.ipv4_hdr = register_net_sysctl_table(net, - net_ipv4_ctl_path, table); + net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table); if (net->ipv4.ipv4_hdr == NULL) goto err_reg; @@ -857,12 +849,12 @@ static __init int sysctl_ipv4_init(void) if (!i->procname) return -EINVAL; - hdr = register_sysctl_paths(net_ipv4_ctl_path, ipv4_table); + hdr = register_net_sysctl(&init_net, "net/ipv4", ipv4_table); if (hdr == NULL) return -ENOMEM; if (register_pernet_subsys(&ipv4_sysctl_ops)) { - unregister_sysctl_table(hdr); + unregister_net_sysctl_table(hdr); return -ENOMEM; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8bb6adeb62c0..bcc4eab5f251 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -363,6 +363,70 @@ static int retrans_to_secs(u8 retrans, int timeout, int rto_max) return period; } +/* Address-family independent initialization for a tcp_sock. + * + * NOTE: A lot of things set to zero explicitly by call to + * sk_alloc() so need not be done here. + */ +void tcp_init_sock(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + + skb_queue_head_init(&tp->out_of_order_queue); + tcp_init_xmit_timers(sk); + tcp_prequeue_init(tp); + + icsk->icsk_rto = TCP_TIMEOUT_INIT; + tp->mdev = TCP_TIMEOUT_INIT; + + /* So many TCP implementations out there (incorrectly) count the + * initial SYN frame in their delayed-ACK and congestion control + * algorithms that we must have the following bandaid to talk + * efficiently to them. -DaveM + */ + tp->snd_cwnd = TCP_INIT_CWND; + + /* See draft-stevens-tcpca-spec-01 for discussion of the + * initialization of these values. + */ + tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; + tp->snd_cwnd_clamp = ~0; + tp->mss_cache = TCP_MSS_DEFAULT; + + tp->reordering = sysctl_tcp_reordering; + icsk->icsk_ca_ops = &tcp_init_congestion_ops; + + sk->sk_state = TCP_CLOSE; + + sk->sk_write_space = sk_stream_write_space; + sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); + + icsk->icsk_sync_mss = tcp_sync_mss; + + /* TCP Cookie Transactions */ + if (sysctl_tcp_cookie_size > 0) { + /* Default, cookies without s_data_payload. */ + tp->cookie_values = + kzalloc(sizeof(*tp->cookie_values), + sk->sk_allocation); + if (tp->cookie_values != NULL) + kref_init(&tp->cookie_values->kref); + } + /* Presumed zeroed, in order of appearance: + * cookie_in_always, cookie_out_never, + * s_data_constant, s_data_in, s_data_out + */ + sk->sk_sndbuf = sysctl_tcp_wmem[1]; + sk->sk_rcvbuf = sysctl_tcp_rmem[1]; + + local_bh_disable(); + sock_update_memcg(sk); + sk_sockets_allocated_inc(sk); + local_bh_enable(); +} +EXPORT_SYMBOL(tcp_init_sock); + /* * Wait for a TCP event. * @@ -912,6 +976,39 @@ static inline int select_size(const struct sock *sk, bool sg) return tmp; } +static int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) +{ + struct sk_buff *skb; + struct tcp_skb_cb *cb; + struct tcphdr *th; + + skb = alloc_skb(size + sizeof(*th), sk->sk_allocation); + if (!skb) + goto err; + + th = (struct tcphdr *)skb_put(skb, sizeof(*th)); + skb_reset_transport_header(skb); + memset(th, 0, sizeof(*th)); + + if (memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size)) + goto err_free; + + cb = TCP_SKB_CB(skb); + + TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt; + TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size; + TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1; + + tcp_queue_rcv(sk, skb, sizeof(*th)); + + return size; + +err_free: + kfree_skb(skb); +err: + return -ENOMEM; +} + int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t size) { @@ -919,7 +1016,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; int iovlen, flags, err, copied; - int mss_now, size_goal; + int mss_now = 0, size_goal; bool sg; long timeo; @@ -933,6 +1030,19 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) goto out_err; + if (unlikely(tp->repair)) { + if (tp->repair_queue == TCP_RECV_QUEUE) { + copied = tcp_send_rcvq(sk, msg, size); + goto out; + } + + err = -EINVAL; + if (tp->repair_queue == TCP_NO_QUEUE) + goto out_err; + + /* 'common' sending to sendq */ + } + /* This should be in poll */ clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); @@ -1089,7 +1199,7 @@ new_segment: if ((seglen -= copy) == 0 && iovlen == 0) goto out; - if (skb->len < max || (flags & MSG_OOB)) + if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair)) continue; if (forced_push(tp)) { @@ -1102,7 +1212,7 @@ new_segment: wait_for_sndbuf: set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); wait_for_memory: - if (copied) + if (copied && likely(!tp->repair)) tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) @@ -1113,7 +1223,7 @@ wait_for_memory: } out: - if (copied) + if (copied && likely(!tp->repair)) tcp_push(sk, flags, mss_now, tp->nonagle); release_sock(sk); return copied; @@ -1187,6 +1297,24 @@ static int tcp_recv_urg(struct sock *sk, struct msghdr *msg, int len, int flags) return -EAGAIN; } +static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len) +{ + struct sk_buff *skb; + int copied = 0, err = 0; + + /* XXX -- need to support SO_PEEK_OFF */ + + skb_queue_walk(&sk->sk_write_queue, skb) { + err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, skb->len); + if (err) + break; + + copied += skb->len; + } + + return err ?: copied; +} + /* Clean up the receive buffer for full frames taken by the user, * then send an ACK if necessary. COPIED is the number of bytes * tcp_recvmsg has given to the user so far, it speeds up the @@ -1432,6 +1560,21 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (flags & MSG_OOB) goto recv_urg; + if (unlikely(tp->repair)) { + err = -EPERM; + if (!(flags & MSG_PEEK)) + goto out; + + if (tp->repair_queue == TCP_SEND_QUEUE) + goto recv_sndq; + + err = -EINVAL; + if (tp->repair_queue == TCP_NO_QUEUE) + goto out; + + /* 'common' recv queue MSG_PEEK-ing */ + } + seq = &tp->copied_seq; if (flags & MSG_PEEK) { peek_seq = tp->copied_seq; @@ -1783,6 +1926,10 @@ out: recv_urg: err = tcp_recv_urg(sk, msg, len, flags); goto out; + +recv_sndq: + err = tcp_peek_sndq(sk, msg, len); + goto out; } EXPORT_SYMBOL(tcp_recvmsg); @@ -1935,7 +2082,9 @@ void tcp_close(struct sock *sk, long timeout) * advertise a zero window, then kill -9 the FTP client, wheee... * Note: timeout is always zero in such a case. */ - if (data_was_unread) { + if (unlikely(tcp_sk(sk)->repair)) { + sk->sk_prot->disconnect(sk, 0); + } else if (data_was_unread) { /* Unread data was tossed, zap the connection. */ NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE); tcp_set_state(sk, TCP_CLOSE); @@ -2074,6 +2223,8 @@ int tcp_disconnect(struct sock *sk, int flags) /* ABORT function of RFC793 */ if (old_state == TCP_LISTEN) { inet_csk_listen_stop(sk); + } else if (unlikely(tp->repair)) { + sk->sk_err = ECONNABORTED; } else if (tcp_need_reset(old_state) || (tp->snd_nxt != tp->write_seq && (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) { @@ -2125,6 +2276,74 @@ int tcp_disconnect(struct sock *sk, int flags) } EXPORT_SYMBOL(tcp_disconnect); +static inline int tcp_can_repair_sock(struct sock *sk) +{ + return capable(CAP_NET_ADMIN) && + ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED)); +} + +static int tcp_repair_options_est(struct tcp_sock *tp, char __user *optbuf, unsigned int len) +{ + /* + * Options are stored in CODE:VALUE form where CODE is 8bit and VALUE + * fits the respective TCPOLEN_ size + */ + + while (len > 0) { + u8 opcode; + + if (get_user(opcode, optbuf)) + return -EFAULT; + + optbuf++; + len--; + + switch (opcode) { + case TCPOPT_MSS: { + u16 in_mss; + + if (len < sizeof(in_mss)) + return -ENODATA; + if (get_user(in_mss, optbuf)) + return -EFAULT; + + tp->rx_opt.mss_clamp = in_mss; + + optbuf += sizeof(in_mss); + len -= sizeof(in_mss); + break; + } + case TCPOPT_WINDOW: { + u8 wscale; + + if (len < sizeof(wscale)) + return -ENODATA; + if (get_user(wscale, optbuf)) + return -EFAULT; + + if (wscale > 14) + return -EFBIG; + + tp->rx_opt.snd_wscale = wscale; + + optbuf += sizeof(wscale); + len -= sizeof(wscale); + break; + } + case TCPOPT_SACK_PERM: + tp->rx_opt.sack_ok |= TCP_SACK_SEEN; + if (sysctl_tcp_fack) + tcp_enable_fack(tp); + break; + case TCPOPT_TIMESTAMP: + tp->rx_opt.tstamp_ok = 1; + break; + } + } + + return 0; +} + /* * Socket option code for TCP. */ @@ -2297,6 +2516,51 @@ static int do_tcp_setsockopt(struct sock *sk, int level, tp->thin_dupack = val; break; + case TCP_REPAIR: + if (!tcp_can_repair_sock(sk)) + err = -EPERM; + else if (val == 1) { + tp->repair = 1; + sk->sk_reuse = SK_FORCE_REUSE; + tp->repair_queue = TCP_NO_QUEUE; + } else if (val == 0) { + tp->repair = 0; + sk->sk_reuse = SK_NO_REUSE; + tcp_send_window_probe(sk); + } else + err = -EINVAL; + + break; + + case TCP_REPAIR_QUEUE: + if (!tp->repair) + err = -EPERM; + else if (val < TCP_QUEUES_NR) + tp->repair_queue = val; + else + err = -EINVAL; + break; + + case TCP_QUEUE_SEQ: + if (sk->sk_state != TCP_CLOSE) + err = -EPERM; + else if (tp->repair_queue == TCP_SEND_QUEUE) + tp->write_seq = val; + else if (tp->repair_queue == TCP_RECV_QUEUE) + tp->rcv_nxt = val; + else + err = -EINVAL; + break; + + case TCP_REPAIR_OPTIONS: + if (!tp->repair) + err = -EINVAL; + else if (sk->sk_state == TCP_ESTABLISHED) + err = tcp_repair_options_est(tp, optval, optlen); + else + err = -EPERM; + break; + case TCP_CORK: /* When set indicates to always queue non-full frames. * Later the user clears this option and we transmit @@ -2530,6 +2794,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level, val = tp->mss_cache; if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) val = tp->rx_opt.user_mss; + if (tp->repair) + val = tp->rx_opt.mss_clamp; break; case TCP_NODELAY: val = !!(tp->nonagle&TCP_NAGLE_OFF); @@ -2632,6 +2898,26 @@ static int do_tcp_getsockopt(struct sock *sk, int level, val = tp->thin_dupack; break; + case TCP_REPAIR: + val = tp->repair; + break; + + case TCP_REPAIR_QUEUE: + if (tp->repair) + val = tp->repair_queue; + else + return -EINVAL; + break; + + case TCP_QUEUE_SEQ: + if (tp->repair_queue == TCP_SEND_QUEUE) + val = tp->write_seq; + else if (tp->repair_queue == TCP_RECV_QUEUE) + val = tp->rcv_nxt; + else + return -EINVAL; + break; + case TCP_USER_TIMEOUT: val = jiffies_to_msecs(icsk->icsk_user_timeout); break; @@ -2675,7 +2961,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, { struct sk_buff *segs = ERR_PTR(-EINVAL); struct tcphdr *th; - unsigned thlen; + unsigned int thlen; unsigned int seq; __be32 delta; unsigned int oldlen; @@ -3033,9 +3319,9 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, struct scatterlist sg; const struct tcphdr *tp = tcp_hdr(skb); struct hash_desc *desc = &hp->md5_desc; - unsigned i; - const unsigned head_data_len = skb_headlen(skb) > header_len ? - skb_headlen(skb) - header_len : 0; + unsigned int i; + const unsigned int head_data_len = skb_headlen(skb) > header_len ? + skb_headlen(skb) - header_len : 0; const struct skb_shared_info *shi = skb_shinfo(skb); struct sk_buff *frag_iter; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3ff364065376..c1c611b385a7 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -175,7 +175,7 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb) static void tcp_incr_quickack(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); - unsigned quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss); + unsigned int quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss); if (quickacks == 0) quickacks = 2; @@ -937,7 +937,7 @@ static void tcp_init_metrics(struct sock *sk) tcp_set_rto(sk); reset: if (tp->srtt == 0) { - /* RFC2988bis: We've failed to get a valid RTT sample from + /* RFC6298: 5.7 We've failed to get a valid RTT sample from * 3WHS. This is most likely due to retransmission, * including spurious one. Reset the RTO back to 3secs * from the more aggressive 1sec to avoid more spurious @@ -947,7 +947,7 @@ reset: inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK; } /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been - * retransmitted. In light of RFC2988bis' more aggressive 1sec + * retransmitted. In light of RFC6298 more aggressive 1sec * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK * retransmission has occurred. */ @@ -4450,6 +4450,58 @@ static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size) return 0; } +/** + * tcp_try_coalesce - try to merge skb to prior one + * @sk: socket + * @to: prior buffer + * @from: buffer to add in queue + * + * Before queueing skb @from after @to, try to merge them + * to reduce overall memory use and queue lengths, if cost is small. + * Packets in ofo or receive queues can stay a long time. + * Better try to coalesce them right now to avoid future collapses. + * Returns > 0 value if caller should free @from instead of queueing it + */ +static int tcp_try_coalesce(struct sock *sk, + struct sk_buff *to, + struct sk_buff *from) +{ + int len = from->len; + + if (tcp_hdr(from)->fin) + return 0; + if (len <= skb_tailroom(to)) { + BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len)); +merge: + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE); + TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq; + TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq; + return 1; + } + if (skb_headlen(from) == 0 && + !skb_has_frag_list(to) && + !skb_has_frag_list(from) && + (skb_shinfo(to)->nr_frags + + skb_shinfo(from)->nr_frags <= MAX_SKB_FRAGS)) { + int delta = from->truesize - ksize(from->head) - + SKB_DATA_ALIGN(sizeof(struct sk_buff)); + + WARN_ON_ONCE(delta < len); + memcpy(skb_shinfo(to)->frags + skb_shinfo(to)->nr_frags, + skb_shinfo(from)->frags, + skb_shinfo(from)->nr_frags * sizeof(skb_frag_t)); + skb_shinfo(to)->nr_frags += skb_shinfo(from)->nr_frags; + skb_shinfo(from)->nr_frags = 0; + to->truesize += delta; + atomic_add(delta, &sk->sk_rmem_alloc); + sk_mem_charge(sk, delta); + to->len += len; + to->data_len += len; + goto merge; + } + return 0; +} + static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); @@ -4488,23 +4540,11 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) end_seq = TCP_SKB_CB(skb)->end_seq; if (seq == TCP_SKB_CB(skb1)->end_seq) { - /* Packets in ofo can stay in queue a long time. - * Better try to coalesce them right now - * to avoid future tcp_collapse_ofo_queue(), - * probably the most expensive function in tcp stack. - */ - if (skb->len <= skb_tailroom(skb1) && !tcp_hdr(skb)->fin) { - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPRCVCOALESCE); - BUG_ON(skb_copy_bits(skb, 0, - skb_put(skb1, skb->len), - skb->len)); - TCP_SKB_CB(skb1)->end_seq = end_seq; - TCP_SKB_CB(skb1)->ack_seq = TCP_SKB_CB(skb)->ack_seq; + if (tcp_try_coalesce(sk, skb1, skb) <= 0) { + __skb_queue_after(&tp->out_of_order_queue, skb1, skb); + } else { __kfree_skb(skb); skb = NULL; - } else { - __skb_queue_after(&tp->out_of_order_queue, skb1, skb); } if (!tp->rx_opt.num_sacks || @@ -4625,13 +4665,18 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) } if (eaten <= 0) { + struct sk_buff *tail; queue_and_out: if (eaten < 0 && tcp_try_rmem_schedule(sk, skb->truesize)) goto drop; - skb_set_owner_r(skb, sk); - __skb_queue_tail(&sk->sk_receive_queue, skb); + tail = skb_peek_tail(&sk->sk_receive_queue); + eaten = tail ? tcp_try_coalesce(sk, tail, skb) : -1; + if (eaten <= 0) { + skb_set_owner_r(skb, sk); + __skb_queue_tail(&sk->sk_receive_queue, skb); + } } tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; if (skb->len) @@ -5326,6 +5371,14 @@ discard: return 0; } +void tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen) +{ + __skb_pull(skb, hdrlen); + __skb_queue_tail(&sk->sk_receive_queue, skb); + skb_set_owner_r(skb, sk); + tcp_sk(sk)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; +} + /* * TCP receive function for the ESTABLISHED state. * @@ -5491,10 +5544,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS); /* Bulk data transfer: receiver */ - __skb_pull(skb, tcp_header_len); - __skb_queue_tail(&sk->sk_receive_queue, skb); - skb_set_owner_r(skb, sk); - tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; + tcp_queue_rcv(sk, skb, tcp_header_len); } tcp_event_data_recv(sk, skb); @@ -5560,6 +5610,44 @@ discard: } EXPORT_SYMBOL(tcp_rcv_established); +void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); + + tcp_set_state(sk, TCP_ESTABLISHED); + + if (skb != NULL) + security_inet_conn_established(sk, skb); + + /* Make sure socket is routed, for correct metrics. */ + icsk->icsk_af_ops->rebuild_header(sk); + + tcp_init_metrics(sk); + + tcp_init_congestion_control(sk); + + /* Prevent spurious tcp_cwnd_restart() on first data + * packet. + */ + tp->lsndtime = tcp_time_stamp; + + tcp_init_buffer_space(sk); + + if (sock_flag(sk, SOCK_KEEPOPEN)) + inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp)); + + if (!tp->rx_opt.snd_wscale) + __tcp_fast_path_on(tp, tp->snd_wnd); + else + tp->pred_flags = 0; + + if (!sock_flag(sk, SOCK_DEAD)) { + sk->sk_state_change(sk); + sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); + } +} + static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th, unsigned int len) { @@ -5692,36 +5780,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, } smp_mb(); - tcp_set_state(sk, TCP_ESTABLISHED); - - security_inet_conn_established(sk, skb); - - /* Make sure socket is routed, for correct metrics. */ - icsk->icsk_af_ops->rebuild_header(sk); - - tcp_init_metrics(sk); - - tcp_init_congestion_control(sk); - - /* Prevent spurious tcp_cwnd_restart() on first data - * packet. - */ - tp->lsndtime = tcp_time_stamp; - tcp_init_buffer_space(sk); - - if (sock_flag(sk, SOCK_KEEPOPEN)) - inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp)); - - if (!tp->rx_opt.snd_wscale) - __tcp_fast_path_on(tp, tp->snd_wnd); - else - tp->pred_flags = 0; - - if (!sock_flag(sk, SOCK_DEAD)) { - sk->sk_state_change(sk); - sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); - } + tcp_finish_connect(sk, skb); if (sk->sk_write_pending || icsk->icsk_accept_queue.rskq_defer_accept || @@ -5735,8 +5795,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, */ inet_csk_schedule_ack(sk); icsk->icsk_ack.lrcvtime = tcp_time_stamp; - icsk->icsk_ack.ato = TCP_ATO_MIN; - tcp_incr_quickack(sk); tcp_enter_quickack_mode(sk); inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX, TCP_RTO_MAX); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0cb86ceb652f..cf97e9821d76 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -138,6 +138,14 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) } EXPORT_SYMBOL_GPL(tcp_twsk_unique); +static int tcp_repair_connect(struct sock *sk) +{ + tcp_connect_init(sk); + tcp_finish_connect(sk, NULL); + + return 0; +} + /* This will initiate an outgoing connection. */ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { @@ -196,7 +204,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) /* Reset inherited state */ tp->rx_opt.ts_recent = 0; tp->rx_opt.ts_recent_stamp = 0; - tp->write_seq = 0; + if (likely(!tp->repair)) + tp->write_seq = 0; } if (tcp_death_row.sysctl_tw_recycle && @@ -247,7 +256,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) sk->sk_gso_type = SKB_GSO_TCPV4; sk_setup_caps(sk, &rt->dst); - if (!tp->write_seq) + if (!tp->write_seq && likely(!tp->repair)) tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, inet->inet_daddr, inet->inet_sport, @@ -255,7 +264,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_id = tp->write_seq ^ jiffies; - err = tcp_connect(sk); + if (likely(!tp->repair)) + err = tcp_connect(sk); + else + err = tcp_repair_connect(sk); + rt = NULL; if (err) goto failure; @@ -1739,7 +1752,8 @@ process: if (!tcp_prequeue(sk, skb)) ret = tcp_v4_do_rcv(sk, skb); } - } else if (unlikely(sk_add_backlog(sk, skb))) { + } else if (unlikely(sk_add_backlog(sk, skb, + sk->sk_rcvbuf + sk->sk_sndbuf))) { bh_unlock_sock(sk); NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); goto discard_and_relse; @@ -1875,64 +1889,15 @@ static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { static int tcp_v4_init_sock(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); - struct tcp_sock *tp = tcp_sk(sk); - - skb_queue_head_init(&tp->out_of_order_queue); - tcp_init_xmit_timers(sk); - tcp_prequeue_init(tp); - icsk->icsk_rto = TCP_TIMEOUT_INIT; - tp->mdev = TCP_TIMEOUT_INIT; - - /* So many TCP implementations out there (incorrectly) count the - * initial SYN frame in their delayed-ACK and congestion control - * algorithms that we must have the following bandaid to talk - * efficiently to them. -DaveM - */ - tp->snd_cwnd = TCP_INIT_CWND; - - /* See draft-stevens-tcpca-spec-01 for discussion of the - * initialization of these values. - */ - tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; - tp->snd_cwnd_clamp = ~0; - tp->mss_cache = TCP_MSS_DEFAULT; - - tp->reordering = sysctl_tcp_reordering; - icsk->icsk_ca_ops = &tcp_init_congestion_ops; - - sk->sk_state = TCP_CLOSE; - - sk->sk_write_space = sk_stream_write_space; - sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); + tcp_init_sock(sk); icsk->icsk_af_ops = &ipv4_specific; - icsk->icsk_sync_mss = tcp_sync_mss; + #ifdef CONFIG_TCP_MD5SIG - tp->af_specific = &tcp_sock_ipv4_specific; + tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific; #endif - /* TCP Cookie Transactions */ - if (sysctl_tcp_cookie_size > 0) { - /* Default, cookies without s_data_payload. */ - tp->cookie_values = - kzalloc(sizeof(*tp->cookie_values), - sk->sk_allocation); - if (tp->cookie_values != NULL) - kref_init(&tp->cookie_values->kref); - } - /* Presumed zeroed, in order of appearance: - * cookie_in_always, cookie_out_never, - * s_data_constant, s_data_in, s_data_out - */ - sk->sk_sndbuf = sysctl_tcp_wmem[1]; - sk->sk_rcvbuf = sysctl_tcp_rmem[1]; - - local_bh_disable(); - sock_update_memcg(sk); - sk_sockets_allocated_inc(sk); - local_bh_enable(); - return 0; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7ac6423117ad..7b7cf3811348 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -563,13 +563,13 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, /* Compute TCP options for SYN packets. This is not the final * network wire format yet. */ -static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, +static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, struct tcp_out_options *opts, struct tcp_md5sig_key **md5) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_cookie_values *cvp = tp->cookie_values; - unsigned remaining = MAX_TCP_OPTION_SPACE; + unsigned int remaining = MAX_TCP_OPTION_SPACE; u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ? tcp_cookie_size_check(cvp->cookie_desired) : 0; @@ -663,15 +663,15 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, } /* Set up TCP options for SYN-ACKs. */ -static unsigned tcp_synack_options(struct sock *sk, +static unsigned int tcp_synack_options(struct sock *sk, struct request_sock *req, - unsigned mss, struct sk_buff *skb, + unsigned int mss, struct sk_buff *skb, struct tcp_out_options *opts, struct tcp_md5sig_key **md5, struct tcp_extend_values *xvp) { struct inet_request_sock *ireq = inet_rsk(req); - unsigned remaining = MAX_TCP_OPTION_SPACE; + unsigned int remaining = MAX_TCP_OPTION_SPACE; u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ? xvp->cookie_plus : 0; @@ -742,13 +742,13 @@ static unsigned tcp_synack_options(struct sock *sk, /* Compute TCP options for ESTABLISHED sockets. This is not the * final wire format yet. */ -static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb, +static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb, struct tcp_out_options *opts, struct tcp_md5sig_key **md5) { struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL; struct tcp_sock *tp = tcp_sk(sk); - unsigned size = 0; + unsigned int size = 0; unsigned int eff_sacks; #ifdef CONFIG_TCP_MD5SIG @@ -770,9 +770,9 @@ static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb, eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack; if (unlikely(eff_sacks)) { - const unsigned remaining = MAX_TCP_OPTION_SPACE - size; + const unsigned int remaining = MAX_TCP_OPTION_SPACE - size; opts->num_sack_blocks = - min_t(unsigned, eff_sacks, + min_t(unsigned int, eff_sacks, (remaining - TCPOLEN_SACK_BASE_ALIGNED) / TCPOLEN_SACK_PERBLOCK); size += TCPOLEN_SACK_BASE_ALIGNED + @@ -801,7 +801,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, struct tcp_sock *tp; struct tcp_skb_cb *tcb; struct tcp_out_options opts; - unsigned tcp_options_size, tcp_header_size; + unsigned int tcp_options_size, tcp_header_size; struct tcp_md5sig_key *md5; struct tcphdr *th; int err; @@ -1259,7 +1259,7 @@ unsigned int tcp_current_mss(struct sock *sk) const struct tcp_sock *tp = tcp_sk(sk); const struct dst_entry *dst = __sk_dst_get(sk); u32 mss_now; - unsigned header_len; + unsigned int header_len; struct tcp_out_options opts; struct tcp_md5sig_key *md5; @@ -1390,7 +1390,7 @@ static inline int tcp_minshall_check(const struct tcp_sock *tp) */ static inline int tcp_nagle_check(const struct tcp_sock *tp, const struct sk_buff *skb, - unsigned mss_now, int nonagle) + unsigned int mss_now, int nonagle) { return skb->len < mss_now && ((nonagle & TCP_NAGLE_CORK) || @@ -2562,7 +2562,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, EXPORT_SYMBOL(tcp_make_synack); /* Do all connect socket setups that can be done AF independent. */ -static void tcp_connect_init(struct sock *sk) +void tcp_connect_init(struct sock *sk) { const struct dst_entry *dst = __sk_dst_get(sk); struct tcp_sock *tp = tcp_sk(sk); @@ -2617,9 +2617,12 @@ static void tcp_connect_init(struct sock *sk) tp->snd_una = tp->write_seq; tp->snd_sml = tp->write_seq; tp->snd_up = tp->write_seq; - tp->rcv_nxt = 0; - tp->rcv_wup = 0; - tp->copied_seq = 0; + tp->snd_nxt = tp->write_seq; + + if (likely(!tp->repair)) + tp->rcv_nxt = 0; + tp->rcv_wup = tp->rcv_nxt; + tp->copied_seq = tp->rcv_nxt; inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; inet_csk(sk)->icsk_retransmits = 0; @@ -2642,7 +2645,6 @@ int tcp_connect(struct sock *sk) /* Reserve space for headers. */ skb_reserve(buff, MAX_TCP_HEADER); - tp->snd_nxt = tp->write_seq; tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); TCP_ECN_send_syn(sk, buff); @@ -2791,6 +2793,15 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent) return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); } +void tcp_send_window_probe(struct sock *sk) +{ + if (sk->sk_state == TCP_ESTABLISHED) { + tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1; + tcp_sk(sk)->snd_nxt = tcp_sk(sk)->write_seq; + tcp_xmit_probe_skb(sk, 0); + } +} + /* Initiate keepalive or window probe from timer. */ int tcp_write_wakeup(struct sock *sk) { diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index a981cdc0a6e9..4526fe68e60e 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -91,7 +91,7 @@ static inline int tcp_probe_avail(void) * Note: arguments must match tcp_rcv_established()! */ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, - struct tcphdr *th, unsigned len) + struct tcphdr *th, unsigned int len) { const struct tcp_sock *tp = tcp_sk(sk); const struct inet_sock *inet = inet_sk(sk); @@ -138,7 +138,7 @@ static struct jprobe tcp_jprobe = { .entry = jtcp_rcv_established, }; -static int tcpprobe_open(struct inode * inode, struct file * file) +static int tcpprobe_open(struct inode *inode, struct file *file) { /* Reset (empty) log */ spin_lock_bh(&tcp_probe.lock); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index fe141052a1be..279fd0846302 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -107,6 +107,7 @@ #include <net/checksum.h> #include <net/xfrm.h> #include <trace/events/udp.h> +#include <linux/static_key.h> #include "udp_impl.h" struct udp_table udp_table __read_mostly; @@ -206,7 +207,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, if (!snum) { int low, high, remaining; - unsigned rand; + unsigned int rand; unsigned short first, last; DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN); @@ -846,7 +847,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, * Get and verify the address. */ if (msg->msg_name) { - struct sockaddr_in * usin = (struct sockaddr_in *)msg->msg_name; + struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name; if (msg->msg_namelen < sizeof(*usin)) return -EINVAL; if (usin->sin_family != AF_INET) { @@ -1379,6 +1380,14 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } +static struct static_key udp_encap_needed __read_mostly; +void udp_encap_enable(void) +{ + if (!static_key_enabled(&udp_encap_needed)) + static_key_slow_inc(&udp_encap_needed); +} +EXPORT_SYMBOL(udp_encap_enable); + /* returns: * -1: error * 0: success @@ -1400,7 +1409,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) goto drop; nf_reset(skb); - if (up->encap_type) { + if (static_key_false(&udp_encap_needed) && up->encap_type) { int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); /* @@ -1470,7 +1479,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) goto drop; - if (sk_rcvqueues_full(sk, skb)) + if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) goto drop; rc = 0; @@ -1479,7 +1488,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) bh_lock_sock(sk); if (!sock_owned_by_user(sk)) rc = __udp_queue_rcv_skb(sk, skb); - else if (sk_add_backlog(sk, skb)) { + else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) { bh_unlock_sock(sk); goto drop; } @@ -1760,6 +1769,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, /* FALLTHROUGH */ case UDP_ENCAP_L2TPINUDP: up->encap_type = val; + udp_encap_enable(); break; default: err = -ENOPROTOOPT; diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index aaad650d47d9..5a681e298b90 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h @@ -25,7 +25,7 @@ extern int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len); extern int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); -extern int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb); +extern int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); extern void udp_destroy_sock(struct sock *sk); #ifdef CONFIG_PROC_FS diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index a0b4c5da8d43..0d3426cb5c4f 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -152,7 +152,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_AH: if (pskb_may_pull(skb, xprth + 8 - skb->data)) { - __be32 *ah_hdr = (__be32*)xprth; + __be32 *ah_hdr = (__be32 *)xprth; fl4->fl4_ipsec_spi = ah_hdr[1]; } @@ -298,8 +298,8 @@ void __init xfrm4_init(int rt_max_size) xfrm4_state_init(); xfrm4_policy_init(); #ifdef CONFIG_SYSCTL - sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv4_ctl_path, - xfrm4_policy_table); + sysctl_hdr = register_net_sysctl(&init_net, "net/ipv4", + xfrm4_policy_table); #endif } |