summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c4
-rw-r--r--net/ipv4/ah4.c2
-rw-r--r--net/ipv4/arp.c4
-rw-r--r--net/ipv4/devinet.c61
-rw-r--r--net/ipv4/fib_frontend.c10
-rw-r--r--net/ipv4/fib_rules.c16
-rw-r--r--net/ipv4/fib_semantics.c47
-rw-r--r--net/ipv4/igmp.c18
-rw-r--r--net/ipv4/inet_connection_sock.c29
-rw-r--r--net/ipv4/inet_hashtables.c2
-rw-r--r--net/ipv4/inet_timewait_sock.c2
-rw-r--r--net/ipv4/ip_forward.c4
-rw-r--r--net/ipv4/ip_fragment.c6
-rw-r--r--net/ipv4/ip_gre.c102
-rw-r--r--net/ipv4/ip_options.c26
-rw-r--r--net/ipv4/ip_sockglue.c6
-rw-r--r--net/ipv4/ipconfig.c2
-rw-r--r--net/ipv4/ipip.c57
-rw-r--r--net/ipv4/ipmr.c9
-rw-r--r--net/ipv4/netfilter.c12
-rw-r--r--net/ipv4/netfilter/arp_tables.c2
-rw-r--r--net/ipv4/netfilter/ip_queue.c6
-rw-r--r--net/ipv4/netfilter/ip_tables.c2
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c7
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c12
-rw-r--r--net/ipv4/netfilter/nf_nat_sip.c2
-rw-r--r--net/ipv4/ping.c10
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/route.c103
-rw-r--r--net/ipv4/sysctl_net_ipv4.c16
-rw-r--r--net/ipv4/tcp.c304
-rw-r--r--net/ipv4/tcp_input.c168
-rw-r--r--net/ipv4/tcp_ipv4.c77
-rw-r--r--net/ipv4/tcp_output.c45
-rw-r--r--net/ipv4/tcp_probe.c4
-rw-r--r--net/ipv4/udp.c20
-rw-r--r--net/ipv4/udp_impl.h2
-rw-r--r--net/ipv4/xfrm4_policy.c6
38 files changed, 767 insertions, 440 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 10e3751466b5..c8f7aee587d1 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -350,7 +350,7 @@ lookup_protocol:
err = 0;
sk->sk_no_check = answer_no_check;
if (INET_PROTOSW_REUSE & answer_flags)
- sk->sk_reuse = 1;
+ sk->sk_reuse = SK_CAN_REUSE;
inet = inet_sk(sk);
inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
@@ -541,7 +541,7 @@ out:
}
EXPORT_SYMBOL(inet_bind);
-int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr,
+int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags)
{
struct sock *sk = sock->sk;
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index fd508b526014..3a280756dd73 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -77,7 +77,7 @@ static inline struct scatterlist *ah_req_sg(struct crypto_ahash *ahash,
static int ip_clear_mutable_options(const struct iphdr *iph, __be32 *daddr)
{
- unsigned char * optptr = (unsigned char*)(iph+1);
+ unsigned char *optptr = (unsigned char *)(iph+1);
int l = iph->ihl*4 - sizeof(struct iphdr);
int optlen;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 18d9b81ecb1a..373b56bf8f49 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1059,7 +1059,7 @@ static int arp_req_set(struct net *net, struct arpreq *r,
neigh = __neigh_lookup_errno(&arp_tbl, &ip, dev);
err = PTR_ERR(neigh);
if (!IS_ERR(neigh)) {
- unsigned state = NUD_STALE;
+ unsigned int state = NUD_STALE;
if (r->arp_flags & ATF_PERM)
state = NUD_PERMANENT;
err = neigh_update(neigh, (r->arp_flags & ATF_COM) ?
@@ -1071,7 +1071,7 @@ static int arp_req_set(struct net *net, struct arpreq *r,
return err;
}
-static unsigned arp_state_to_flags(struct neighbour *neigh)
+static unsigned int arp_state_to_flags(struct neighbour *neigh)
{
if (neigh->nud_state&NUD_PERMANENT)
return ATF_PERM | ATF_COM;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 6e447ff94dfa..88c9e3f68c78 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1125,7 +1125,7 @@ skip:
}
}
-static inline bool inetdev_valid_mtu(unsigned mtu)
+static inline bool inetdev_valid_mtu(unsigned int mtu)
{
return mtu >= 68;
}
@@ -1266,17 +1266,15 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
ifm->ifa_scope = ifa->ifa_scope;
ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
- if (ifa->ifa_address)
- NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
-
- if (ifa->ifa_local)
- NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
-
- if (ifa->ifa_broadcast)
- NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
-
- if (ifa->ifa_label[0])
- NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
+ if ((ifa->ifa_address &&
+ nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
+ (ifa->ifa_local &&
+ nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
+ (ifa->ifa_broadcast &&
+ nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
+ (ifa->ifa_label[0] &&
+ nla_put_string(skb, IFA_LABEL, ifa->ifa_label)))
+ goto nla_put_failure;
return nlmsg_end(skb, nlh);
@@ -1587,7 +1585,6 @@ static int ipv4_doint_and_flush(ctl_table *ctl, int write,
static struct devinet_sysctl_table {
struct ctl_table_header *sysctl_header;
struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
- char *dev_name;
} devinet_sysctl = {
.devinet_vars = {
DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
@@ -1629,16 +1626,7 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name,
{
int i;
struct devinet_sysctl_table *t;
-
-#define DEVINET_CTL_PATH_DEV 3
-
- struct ctl_path devinet_ctl_path[] = {
- { .procname = "net", },
- { .procname = "ipv4", },
- { .procname = "conf", },
- { /* to be set */ },
- { },
- };
+ char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
if (!t)
@@ -1650,27 +1638,15 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name,
t->devinet_vars[i].extra2 = net;
}
- /*
- * Make a copy of dev_name, because '.procname' is regarded as const
- * by sysctl and we wouldn't want anyone to change it under our feet
- * (see SIOCSIFNAME).
- */
- t->dev_name = kstrdup(dev_name, GFP_KERNEL);
- if (!t->dev_name)
- goto free;
-
- devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
+ snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
- t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
- t->devinet_vars);
+ t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
if (!t->sysctl_header)
- goto free_procname;
+ goto free;
p->sysctl = t;
return 0;
-free_procname:
- kfree(t->dev_name);
free:
kfree(t);
out:
@@ -1686,7 +1662,6 @@ static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
cnf->sysctl = NULL;
unregister_net_sysctl_table(t->sysctl_header);
- kfree(t->dev_name);
kfree(t);
}
@@ -1716,12 +1691,6 @@ static struct ctl_table ctl_forward_entry[] = {
},
{ },
};
-
-static __net_initdata struct ctl_path net_ipv4_path[] = {
- { .procname = "net", },
- { .procname = "ipv4", },
- { },
-};
#endif
static __net_init int devinet_init_net(struct net *net)
@@ -1767,7 +1736,7 @@ static __net_init int devinet_init_net(struct net *net)
goto err_reg_dflt;
err = -ENOMEM;
- forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
+ forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
if (forw_hdr == NULL)
goto err_reg_ctl;
net->ipv4.forw_hdr = forw_hdr;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index cbe3a68507cf..3854411fa37c 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -136,13 +136,13 @@ static void fib_flush(struct net *net)
* Find address type as if only "dev" was present in the system. If
* on_dev is NULL then all interfaces are taken into consideration.
*/
-static inline unsigned __inet_dev_addr_type(struct net *net,
- const struct net_device *dev,
- __be32 addr)
+static inline unsigned int __inet_dev_addr_type(struct net *net,
+ const struct net_device *dev,
+ __be32 addr)
{
struct flowi4 fl4 = { .daddr = addr };
struct fib_result res;
- unsigned ret = RTN_BROADCAST;
+ unsigned int ret = RTN_BROADCAST;
struct fib_table *local_table;
if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
@@ -740,7 +740,7 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
#define BRD_OK 2
#define BRD0_OK 4
#define BRD1_OK 8
- unsigned ok = 0;
+ unsigned int ok = 0;
int subnet = 0; /* Primary network */
int gone = 1; /* Address is missing */
int same_prefsrc = 0; /* Another primary with same IP */
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 799fc790b3cf..2d043f71ef70 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -221,15 +221,15 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
frh->src_len = rule4->src_len;
frh->tos = rule4->tos;
- if (rule4->dst_len)
- NLA_PUT_BE32(skb, FRA_DST, rule4->dst);
-
- if (rule4->src_len)
- NLA_PUT_BE32(skb, FRA_SRC, rule4->src);
-
+ if ((rule4->dst_len &&
+ nla_put_be32(skb, FRA_DST, rule4->dst)) ||
+ (rule4->src_len &&
+ nla_put_be32(skb, FRA_SRC, rule4->src)))
+ goto nla_put_failure;
#ifdef CONFIG_IP_ROUTE_CLASSID
- if (rule4->tclassid)
- NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid);
+ if (rule4->tclassid &&
+ nla_put_u32(skb, FRA_FLOW, rule4->tclassid))
+ goto nla_put_failure;
#endif
return 0;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 5063fa38ac7b..a8bdf7405433 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -931,33 +931,36 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
rtm->rtm_table = tb_id;
else
rtm->rtm_table = RT_TABLE_COMPAT;
- NLA_PUT_U32(skb, RTA_TABLE, tb_id);
+ if (nla_put_u32(skb, RTA_TABLE, tb_id))
+ goto nla_put_failure;
rtm->rtm_type = type;
rtm->rtm_flags = fi->fib_flags;
rtm->rtm_scope = fi->fib_scope;
rtm->rtm_protocol = fi->fib_protocol;
- if (rtm->rtm_dst_len)
- NLA_PUT_BE32(skb, RTA_DST, dst);
-
- if (fi->fib_priority)
- NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority);
-
+ if (rtm->rtm_dst_len &&
+ nla_put_be32(skb, RTA_DST, dst))
+ goto nla_put_failure;
+ if (fi->fib_priority &&
+ nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority))
+ goto nla_put_failure;
if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
goto nla_put_failure;
- if (fi->fib_prefsrc)
- NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc);
-
+ if (fi->fib_prefsrc &&
+ nla_put_be32(skb, RTA_PREFSRC, fi->fib_prefsrc))
+ goto nla_put_failure;
if (fi->fib_nhs == 1) {
- if (fi->fib_nh->nh_gw)
- NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw);
-
- if (fi->fib_nh->nh_oif)
- NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
+ if (fi->fib_nh->nh_gw &&
+ nla_put_be32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw))
+ goto nla_put_failure;
+ if (fi->fib_nh->nh_oif &&
+ nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif))
+ goto nla_put_failure;
#ifdef CONFIG_IP_ROUTE_CLASSID
- if (fi->fib_nh[0].nh_tclassid)
- NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
+ if (fi->fib_nh[0].nh_tclassid &&
+ nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid))
+ goto nla_put_failure;
#endif
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -978,11 +981,13 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
rtnh->rtnh_hops = nh->nh_weight - 1;
rtnh->rtnh_ifindex = nh->nh_oif;
- if (nh->nh_gw)
- NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
+ if (nh->nh_gw &&
+ nla_put_be32(skb, RTA_GATEWAY, nh->nh_gw))
+ goto nla_put_failure;
#ifdef CONFIG_IP_ROUTE_CLASSID
- if (nh->nh_tclassid)
- NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
+ if (nh->nh_tclassid &&
+ nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid))
+ goto nla_put_failure;
#endif
/* length of rtnetlink header + attributes */
rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 5dfecfd7d5e9..6699f23e6f55 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -344,10 +344,10 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
pip->protocol = IPPROTO_IGMP;
pip->tot_len = 0; /* filled in later */
ip_select_ident(pip, &rt->dst, NULL);
- ((u8*)&pip[1])[0] = IPOPT_RA;
- ((u8*)&pip[1])[1] = 4;
- ((u8*)&pip[1])[2] = 0;
- ((u8*)&pip[1])[3] = 0;
+ ((u8 *)&pip[1])[0] = IPOPT_RA;
+ ((u8 *)&pip[1])[1] = 4;
+ ((u8 *)&pip[1])[2] = 0;
+ ((u8 *)&pip[1])[3] = 0;
skb->transport_header = skb->network_header + sizeof(struct iphdr) + 4;
skb_put(skb, sizeof(*pig));
@@ -688,10 +688,10 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
iph->saddr = fl4.saddr;
iph->protocol = IPPROTO_IGMP;
ip_select_ident(iph, &rt->dst, NULL);
- ((u8*)&iph[1])[0] = IPOPT_RA;
- ((u8*)&iph[1])[1] = 4;
- ((u8*)&iph[1])[2] = 0;
- ((u8*)&iph[1])[3] = 0;
+ ((u8 *)&iph[1])[0] = IPOPT_RA;
+ ((u8 *)&iph[1])[1] = 4;
+ ((u8 *)&iph[1])[2] = 0;
+ ((u8 *)&iph[1])[3] = 0;
ih = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
ih->type = type;
@@ -774,7 +774,7 @@ static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs)
if (psf->sf_count[MCAST_INCLUDE] ||
pmc->sfcount[MCAST_EXCLUDE] !=
psf->sf_count[MCAST_EXCLUDE])
- continue;
+ break;
if (srcs[i] == psf->sf_inaddr) {
scount++;
break;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 19d66cefd7d3..95e61596e605 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -42,7 +42,8 @@ EXPORT_SYMBOL(sysctl_local_reserved_ports);
void inet_get_local_port_range(int *low, int *high)
{
- unsigned seq;
+ unsigned int seq;
+
do {
seq = read_seqbegin(&sysctl_local_ports.lock);
@@ -53,7 +54,7 @@ void inet_get_local_port_range(int *low, int *high)
EXPORT_SYMBOL(inet_get_local_port_range);
int inet_csk_bind_conflict(const struct sock *sk,
- const struct inet_bind_bucket *tb)
+ const struct inet_bind_bucket *tb, bool relax)
{
struct sock *sk2;
struct hlist_node *node;
@@ -79,6 +80,14 @@ int inet_csk_bind_conflict(const struct sock *sk,
sk2_rcv_saddr == sk_rcv_saddr(sk))
break;
}
+ if (!relax && reuse && sk2->sk_reuse &&
+ sk2->sk_state != TCP_LISTEN) {
+ const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
+
+ if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
+ sk2_rcv_saddr == sk_rcv_saddr(sk))
+ break;
+ }
}
}
return node != NULL;
@@ -122,12 +131,13 @@ again:
(tb->num_owners < smallest_size || smallest_size == -1)) {
smallest_size = tb->num_owners;
smallest_rover = rover;
- if (atomic_read(&hashinfo->bsockets) > (high - low) + 1) {
+ if (atomic_read(&hashinfo->bsockets) > (high - low) + 1 &&
+ !inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) {
snum = smallest_rover;
goto tb_found;
}
}
- if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) {
+ if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) {
snum = rover;
goto tb_found;
}
@@ -172,18 +182,22 @@ have_snum:
goto tb_not_found;
tb_found:
if (!hlist_empty(&tb->owners)) {
+ if (sk->sk_reuse == SK_FORCE_REUSE)
+ goto success;
+
if (tb->fastreuse > 0 &&
sk->sk_reuse && sk->sk_state != TCP_LISTEN &&
smallest_size == -1) {
goto success;
} else {
ret = 1;
- if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) {
+ if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) {
if (sk->sk_reuse && sk->sk_state != TCP_LISTEN &&
smallest_size != -1 && --attempts >= 0) {
spin_unlock(&head->lock);
goto again;
}
+
goto fail_unlock;
}
}
@@ -514,7 +528,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
/* Normally all the openreqs are young and become mature
* (i.e. converted to established socket) for first timeout.
- * If synack was not acknowledged for 3 seconds, it means
+ * If synack was not acknowledged for 1 second, it means
* one of the following things: synack was lost, ack was lost,
* rtt is high or nobody planned to ack (i.e. synflood).
* When server is a bit loaded, queue is populated with old
@@ -555,8 +569,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
syn_ack_recalc(req, thresh, max_retries,
queue->rskq_defer_accept,
&expire, &resend);
- if (req->rsk_ops->syn_ack_timeout)
- req->rsk_ops->syn_ack_timeout(parent, req);
+ req->rsk_ops->syn_ack_timeout(parent, req);
if (!expire &&
(!resend ||
!req->rsk_ops->rtx_syn_ack(parent, req, NULL) ||
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 984ec656b03b..7880af970208 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -217,7 +217,7 @@ begin:
}
EXPORT_SYMBOL_GPL(__inet_lookup_listener);
-struct sock * __inet_lookup_established(struct net *net,
+struct sock *__inet_lookup_established(struct net *net,
struct inet_hashinfo *hashinfo,
const __be32 saddr, const __be16 sport,
const __be32 daddr, const u16 hnum,
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 89168c6351ff..543ef6225458 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -263,7 +263,7 @@ rescan:
void inet_twdr_hangman(unsigned long data)
{
struct inet_timewait_death_row *twdr;
- int unsigned need_timer;
+ unsigned int need_timer;
twdr = (struct inet_timewait_death_row *)data;
spin_lock(&twdr->death_lock);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 29a07b6c7168..e5c44fc586ab 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -41,7 +41,7 @@
static int ip_forward_finish(struct sk_buff *skb)
{
- struct ip_options * opt = &(IPCB(skb)->opt);
+ struct ip_options *opt = &(IPCB(skb)->opt);
IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
@@ -55,7 +55,7 @@ int ip_forward(struct sk_buff *skb)
{
struct iphdr *iph; /* Our header */
struct rtable *rt; /* Route we use */
- struct ip_options * opt = &(IPCB(skb)->opt);
+ struct ip_options *opt = &(IPCB(skb)->opt);
if (skb_warn_if_lro(skb))
goto drop;
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 3727e234c884..71e5c328176c 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -569,7 +569,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
skb_morph(head, qp->q.fragments);
head->next = qp->q.fragments->next;
- kfree_skb(qp->q.fragments);
+ consume_skb(qp->q.fragments);
qp->q.fragments = head;
}
@@ -782,7 +782,7 @@ static int __net_init ip4_frags_ns_ctl_register(struct net *net)
table[2].data = &net->ipv4.frags.timeout;
}
- hdr = register_net_sysctl_table(net, net_ipv4_ctl_path, table);
+ hdr = register_net_sysctl(net, "net/ipv4", table);
if (hdr == NULL)
goto err_reg;
@@ -807,7 +807,7 @@ static void __net_exit ip4_frags_ns_ctl_unregister(struct net *net)
static void ip4_frags_ctl_register(void)
{
- register_net_sysctl_rotable(net_ipv4_ctl_path, ip4_frags_ctl_table);
+ register_net_sysctl(&init_net, "net/ipv4", ip4_frags_ctl_table);
}
#else
static inline int ip4_frags_ns_ctl_register(struct net *net)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index b57532d4742c..f49047b79609 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -169,37 +169,56 @@ struct ipgre_net {
/* often modified stats are per cpu, other are shared (netdev->stats) */
struct pcpu_tstats {
- unsigned long rx_packets;
- unsigned long rx_bytes;
- unsigned long tx_packets;
- unsigned long tx_bytes;
-} __attribute__((aligned(4*sizeof(unsigned long))));
+ u64 rx_packets;
+ u64 rx_bytes;
+ u64 tx_packets;
+ u64 tx_bytes;
+ struct u64_stats_sync syncp;
+};
-static struct net_device_stats *ipgre_get_stats(struct net_device *dev)
+static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev,
+ struct rtnl_link_stats64 *tot)
{
- struct pcpu_tstats sum = { 0 };
int i;
for_each_possible_cpu(i) {
const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
-
- sum.rx_packets += tstats->rx_packets;
- sum.rx_bytes += tstats->rx_bytes;
- sum.tx_packets += tstats->tx_packets;
- sum.tx_bytes += tstats->tx_bytes;
+ u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
+ unsigned int start;
+
+ do {
+ start = u64_stats_fetch_begin_bh(&tstats->syncp);
+ rx_packets = tstats->rx_packets;
+ tx_packets = tstats->tx_packets;
+ rx_bytes = tstats->rx_bytes;
+ tx_bytes = tstats->tx_bytes;
+ } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
+
+ tot->rx_packets += rx_packets;
+ tot->tx_packets += tx_packets;
+ tot->rx_bytes += rx_bytes;
+ tot->tx_bytes += tx_bytes;
}
- dev->stats.rx_packets = sum.rx_packets;
- dev->stats.rx_bytes = sum.rx_bytes;
- dev->stats.tx_packets = sum.tx_packets;
- dev->stats.tx_bytes = sum.tx_bytes;
- return &dev->stats;
+
+ tot->multicast = dev->stats.multicast;
+ tot->rx_crc_errors = dev->stats.rx_crc_errors;
+ tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
+ tot->rx_length_errors = dev->stats.rx_length_errors;
+ tot->rx_errors = dev->stats.rx_errors;
+ tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
+ tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
+ tot->tx_dropped = dev->stats.tx_dropped;
+ tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
+ tot->tx_errors = dev->stats.tx_errors;
+
+ return tot;
}
/* Given src, dst and key, find appropriate for input tunnel. */
-static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
- __be32 remote, __be32 local,
- __be32 key, __be16 gre_proto)
+static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
+ __be32 remote, __be32 local,
+ __be32 key, __be16 gre_proto)
{
struct net *net = dev_net(dev);
int link = dev->ifindex;
@@ -464,7 +483,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
*/
const struct iphdr *iph = (const struct iphdr *)skb->data;
- __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
+ __be16 *p = (__be16 *)(skb->data+(iph->ihl<<2));
int grehlen = (iph->ihl<<2) + 4;
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
@@ -574,7 +593,7 @@ static int ipgre_rcv(struct sk_buff *skb)
iph = ip_hdr(skb);
h = skb->data;
- flags = *(__be16*)h;
+ flags = *(__be16 *)h;
if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
/* - Version must be 0.
@@ -598,11 +617,11 @@ static int ipgre_rcv(struct sk_buff *skb)
offset += 4;
}
if (flags&GRE_KEY) {
- key = *(__be32*)(h + offset);
+ key = *(__be32 *)(h + offset);
offset += 4;
}
if (flags&GRE_SEQ) {
- seqno = ntohl(*(__be32*)(h + offset));
+ seqno = ntohl(*(__be32 *)(h + offset));
offset += 4;
}
}
@@ -672,8 +691,10 @@ static int ipgre_rcv(struct sk_buff *skb)
}
tstats = this_cpu_ptr(tunnel->dev->tstats);
+ u64_stats_update_begin(&tstats->syncp);
tstats->rx_packets++;
tstats->rx_bytes += skb->len;
+ u64_stats_update_end(&tstats->syncp);
__skb_tunnel_rx(skb, tunnel->dev);
@@ -900,7 +921,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
htons(ETH_P_TEB) : skb->protocol;
if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
- __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
+ __be32 *ptr = (__be32 *)(((u8 *)iph) + tunnel->hlen - 4);
if (tunnel->parms.o_flags&GRE_SEQ) {
++tunnel->o_seqno;
@@ -913,7 +934,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
}
if (tunnel->parms.o_flags&GRE_CSUM) {
*ptr = 0;
- *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
+ *(__sum16 *)ptr = ip_compute_csum((void *)(iph+1), skb->len - sizeof(struct iphdr));
}
}
@@ -1169,7 +1190,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
{
struct ip_tunnel *t = netdev_priv(dev);
struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
- __be16 *p = (__be16*)(iph+1);
+ __be16 *p = (__be16 *)(iph+1);
memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
p[0] = t->parms.o_flags;
@@ -1253,7 +1274,7 @@ static const struct net_device_ops ipgre_netdev_ops = {
.ndo_start_xmit = ipgre_tunnel_xmit,
.ndo_do_ioctl = ipgre_tunnel_ioctl,
.ndo_change_mtu = ipgre_tunnel_change_mtu,
- .ndo_get_stats = ipgre_get_stats,
+ .ndo_get_stats64 = ipgre_get_stats64,
};
static void ipgre_dev_free(struct net_device *dev)
@@ -1507,7 +1528,7 @@ static const struct net_device_ops ipgre_tap_netdev_ops = {
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
.ndo_change_mtu = ipgre_tunnel_change_mtu,
- .ndo_get_stats = ipgre_get_stats,
+ .ndo_get_stats64 = ipgre_get_stats64,
};
static void ipgre_tap_setup(struct net_device *dev)
@@ -1654,17 +1675,18 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
struct ip_tunnel *t = netdev_priv(dev);
struct ip_tunnel_parm *p = &t->parms;
- NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
- NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
- NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
- NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
- NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
- NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
- NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
- NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
- NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
- NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
-
+ if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
+ nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) ||
+ nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) ||
+ nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
+ nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
+ nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
+ nla_put_be32(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
+ nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
+ nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
+ nla_put_u8(skb, IFLA_GRE_PMTUDISC,
+ !!(p->iph.frag_off & htons(IP_DF))))
+ goto nla_put_failure;
return 0;
nla_put_failure:
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index a0d0d9d9b870..95722ed0e5bb 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -210,10 +210,10 @@ int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb)
* Simple and stupid 8), but the most efficient way.
*/
-void ip_options_fragment(struct sk_buff * skb)
+void ip_options_fragment(struct sk_buff *skb)
{
unsigned char *optptr = skb_network_header(skb) + sizeof(struct iphdr);
- struct ip_options * opt = &(IPCB(skb)->opt);
+ struct ip_options *opt = &(IPCB(skb)->opt);
int l = opt->optlen;
int optlen;
@@ -248,13 +248,13 @@ void ip_options_fragment(struct sk_buff * skb)
*/
int ip_options_compile(struct net *net,
- struct ip_options * opt, struct sk_buff * skb)
+ struct ip_options *opt, struct sk_buff *skb)
{
int l;
- unsigned char * iph;
- unsigned char * optptr;
+ unsigned char *iph;
+ unsigned char *optptr;
int optlen;
- unsigned char * pp_ptr = NULL;
+ unsigned char *pp_ptr = NULL;
struct rtable *rt = NULL;
if (skb != NULL) {
@@ -413,7 +413,7 @@ int ip_options_compile(struct net *net,
opt->is_changed = 1;
}
} else {
- unsigned overflow = optptr[3]>>4;
+ unsigned int overflow = optptr[3]>>4;
if (overflow == 15) {
pp_ptr = optptr + 3;
goto error;
@@ -473,20 +473,20 @@ EXPORT_SYMBOL(ip_options_compile);
* Undo all the changes done by ip_options_compile().
*/
-void ip_options_undo(struct ip_options * opt)
+void ip_options_undo(struct ip_options *opt)
{
if (opt->srr) {
- unsigned char * optptr = opt->__data+opt->srr-sizeof(struct iphdr);
+ unsigned char *optptr = opt->__data+opt->srr-sizeof(struct iphdr);
memmove(optptr+7, optptr+3, optptr[1]-7);
memcpy(optptr+3, &opt->faddr, 4);
}
if (opt->rr_needaddr) {
- unsigned char * optptr = opt->__data+opt->rr-sizeof(struct iphdr);
+ unsigned char *optptr = opt->__data+opt->rr-sizeof(struct iphdr);
optptr[2] -= 4;
memset(&optptr[optptr[2]-1], 0, 4);
}
if (opt->ts) {
- unsigned char * optptr = opt->__data+opt->ts-sizeof(struct iphdr);
+ unsigned char *optptr = opt->__data+opt->ts-sizeof(struct iphdr);
if (opt->ts_needtime) {
optptr[2] -= 4;
memset(&optptr[optptr[2]-1], 0, 4);
@@ -549,8 +549,8 @@ int ip_options_get(struct net *net, struct ip_options_rcu **optp,
void ip_forward_options(struct sk_buff *skb)
{
- struct ip_options * opt = &(IPCB(skb)->opt);
- unsigned char * optptr;
+ struct ip_options *opt = &(IPCB(skb)->opt);
+ unsigned char *optptr;
struct rtable *rt = skb_rtable(skb);
unsigned char *raw = skb_network_header(skb);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 2fd0fba77124..51c6c672c8aa 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -90,7 +90,7 @@ static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb)
static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb)
{
unsigned char optbuf[sizeof(struct ip_options) + 40];
- struct ip_options * opt = (struct ip_options *)optbuf;
+ struct ip_options *opt = (struct ip_options *)optbuf;
if (IPCB(skb)->opt.optlen == 0)
return;
@@ -147,7 +147,7 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
{
struct inet_sock *inet = inet_sk(skb->sk);
- unsigned flags = inet->cmsg_flags;
+ unsigned int flags = inet->cmsg_flags;
/* Ordered by supposed usage frequency */
if (flags & 1)
@@ -1094,7 +1094,7 @@ EXPORT_SYMBOL(compat_ip_setsockopt);
*/
static int do_ip_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen, unsigned flags)
+ char __user *optval, int __user *optlen, unsigned int flags)
{
struct inet_sock *inet = inet_sk(sk);
int val;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 92ac7e7363a0..f267280d8709 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1198,7 +1198,7 @@ static int __init ic_dynamic(void)
d = ic_first_dev;
retries = CONF_SEND_RETRIES;
get_random_bytes(&timeout, sizeof(timeout));
- timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned) CONF_TIMEOUT_RANDOM);
+ timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned int) CONF_TIMEOUT_RANDOM);
for (;;) {
/* Track the device we are configuring */
ic_dev_xid = d->xid;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index ae1413e3f2f8..2d0f99bf61b3 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -144,33 +144,48 @@ static void ipip_dev_free(struct net_device *dev);
/* often modified stats are per cpu, other are shared (netdev->stats) */
struct pcpu_tstats {
- unsigned long rx_packets;
- unsigned long rx_bytes;
- unsigned long tx_packets;
- unsigned long tx_bytes;
-} __attribute__((aligned(4*sizeof(unsigned long))));
+ u64 rx_packets;
+ u64 rx_bytes;
+ u64 tx_packets;
+ u64 tx_bytes;
+ struct u64_stats_sync syncp;
+};
-static struct net_device_stats *ipip_get_stats(struct net_device *dev)
+static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev,
+ struct rtnl_link_stats64 *tot)
{
- struct pcpu_tstats sum = { 0 };
int i;
for_each_possible_cpu(i) {
const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
-
- sum.rx_packets += tstats->rx_packets;
- sum.rx_bytes += tstats->rx_bytes;
- sum.tx_packets += tstats->tx_packets;
- sum.tx_bytes += tstats->tx_bytes;
+ u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
+ unsigned int start;
+
+ do {
+ start = u64_stats_fetch_begin_bh(&tstats->syncp);
+ rx_packets = tstats->rx_packets;
+ tx_packets = tstats->tx_packets;
+ rx_bytes = tstats->rx_bytes;
+ tx_bytes = tstats->tx_bytes;
+ } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
+
+ tot->rx_packets += rx_packets;
+ tot->tx_packets += tx_packets;
+ tot->rx_bytes += rx_bytes;
+ tot->tx_bytes += tx_bytes;
}
- dev->stats.rx_packets = sum.rx_packets;
- dev->stats.rx_bytes = sum.rx_bytes;
- dev->stats.tx_packets = sum.tx_packets;
- dev->stats.tx_bytes = sum.tx_bytes;
- return &dev->stats;
+
+ tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
+ tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
+ tot->tx_dropped = dev->stats.tx_dropped;
+ tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
+ tot->tx_errors = dev->stats.tx_errors;
+ tot->collisions = dev->stats.collisions;
+
+ return tot;
}
-static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
+static struct ip_tunnel *ipip_tunnel_lookup(struct net *net,
__be32 remote, __be32 local)
{
unsigned int h0 = HASH(remote);
@@ -245,7 +260,7 @@ static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
rcu_assign_pointer(*tp, t);
}
-static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
+static struct ip_tunnel *ipip_tunnel_locate(struct net *net,
struct ip_tunnel_parm *parms, int create)
{
__be32 remote = parms->iph.daddr;
@@ -404,8 +419,10 @@ static int ipip_rcv(struct sk_buff *skb)
skb->pkt_type = PACKET_HOST;
tstats = this_cpu_ptr(tunnel->dev->tstats);
+ u64_stats_update_begin(&tstats->syncp);
tstats->rx_packets++;
tstats->rx_bytes += skb->len;
+ u64_stats_update_end(&tstats->syncp);
__skb_tunnel_rx(skb, tunnel->dev);
@@ -730,7 +747,7 @@ static const struct net_device_ops ipip_netdev_ops = {
.ndo_start_xmit = ipip_tunnel_xmit,
.ndo_do_ioctl = ipip_tunnel_ioctl,
.ndo_change_mtu = ipip_tunnel_change_mtu,
- .ndo_get_stats = ipip_get_stats,
+ .ndo_get_stats64 = ipip_get_stats64,
};
static void ipip_dev_free(struct net_device *dev)
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 960fbfc3e976..5bef604ac0fa 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -2119,15 +2119,16 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
rtm->rtm_src_len = 32;
rtm->rtm_tos = 0;
rtm->rtm_table = mrt->id;
- NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
+ if (nla_put_u32(skb, RTA_TABLE, mrt->id))
+ goto nla_put_failure;
rtm->rtm_type = RTN_MULTICAST;
rtm->rtm_scope = RT_SCOPE_UNIVERSE;
rtm->rtm_protocol = RTPROT_UNSPEC;
rtm->rtm_flags = 0;
- NLA_PUT_BE32(skb, RTA_SRC, c->mfc_origin);
- NLA_PUT_BE32(skb, RTA_DST, c->mfc_mcastgrp);
-
+ if (nla_put_be32(skb, RTA_SRC, c->mfc_origin) ||
+ nla_put_be32(skb, RTA_DST, c->mfc_mcastgrp))
+ goto nla_put_failure;
if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0)
goto nla_put_failure;
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 4f47e064e262..ed1b36783192 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -12,7 +12,7 @@
#include <net/netfilter/nf_queue.h>
/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
-int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
+int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type)
{
struct net *net = dev_net(skb_dst(skb)->dev);
const struct iphdr *iph = ip_hdr(skb);
@@ -237,13 +237,3 @@ static void ipv4_netfilter_fini(void)
module_init(ipv4_netfilter_init);
module_exit(ipv4_netfilter_fini);
-
-#ifdef CONFIG_SYSCTL
-struct ctl_path nf_net_ipv4_netfilter_sysctl_path[] = {
- { .procname = "net", },
- { .procname = "ipv4", },
- { .procname = "netfilter", },
- { }
-};
-EXPORT_SYMBOL_GPL(nf_net_ipv4_netfilter_sysctl_path);
-#endif /* CONFIG_SYSCTL */
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index fd7a3f68917f..a3935273869f 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -303,7 +303,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
if (v < 0) {
/* Pop from stack? */
if (v != XT_RETURN) {
- verdict = (unsigned)(-v) - 1;
+ verdict = (unsigned int)(-v) - 1;
break;
}
e = back;
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 94d45e1f8882..09775a1e1348 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -586,7 +586,7 @@ static int __init ip_queue_init(void)
#endif
register_netdevice_notifier(&ipq_dev_notifier);
#ifdef CONFIG_SYSCTL
- ipq_sysctl_header = register_sysctl_paths(net_ipv4_ctl_path, ipq_table);
+ ipq_sysctl_header = register_net_sysctl(&init_net, "net/ipv4", ipq_table);
#endif
status = nf_register_queue_handler(NFPROTO_IPV4, &nfqh);
if (status < 0) {
@@ -597,7 +597,7 @@ static int __init ip_queue_init(void)
cleanup_sysctl:
#ifdef CONFIG_SYSCTL
- unregister_sysctl_table(ipq_sysctl_header);
+ unregister_net_sysctl_table(ipq_sysctl_header);
#endif
unregister_netdevice_notifier(&ipq_dev_notifier);
proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
@@ -618,7 +618,7 @@ static void __exit ip_queue_fini(void)
ipq_flush(NULL, 0);
#ifdef CONFIG_SYSCTL
- unregister_sysctl_table(ipq_sysctl_header);
+ unregister_net_sysctl_table(ipq_sysctl_header);
#endif
unregister_netdevice_notifier(&ipq_dev_notifier);
proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 24e556e83a3b..585b80f3cc68 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -377,7 +377,7 @@ ipt_do_table(struct sk_buff *skb,
if (v < 0) {
/* Pop from stack? */
if (v != XT_RETURN) {
- verdict = (unsigned)(-v) - 1;
+ verdict = (unsigned int)(-v) - 1;
break;
}
if (*stackptr <= origptr) {
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index cf73cc70ed2d..91747d4ebc26 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -311,8 +311,9 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
static int ipv4_tuple_to_nlattr(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple)
{
- NLA_PUT_BE32(skb, CTA_IP_V4_SRC, tuple->src.u3.ip);
- NLA_PUT_BE32(skb, CTA_IP_V4_DST, tuple->dst.u3.ip);
+ if (nla_put_be32(skb, CTA_IP_V4_SRC, tuple->src.u3.ip) ||
+ nla_put_be32(skb, CTA_IP_V4_DST, tuple->dst.u3.ip))
+ goto nla_put_failure;
return 0;
nla_put_failure:
@@ -364,7 +365,7 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
.nla_policy = ipv4_nla_policy,
#endif
#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
- .ctl_table_path = nf_net_ipv4_netfilter_sysctl_path,
+ .ctl_table_path = "net/ipv4/netfilter",
.ctl_table = ip_ct_sysctl_table,
#endif
.me = THIS_MODULE,
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 7cbe9cb261c2..0847e373d33c 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -228,10 +228,10 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
static int icmp_tuple_to_nlattr(struct sk_buff *skb,
const struct nf_conntrack_tuple *t)
{
- NLA_PUT_BE16(skb, CTA_PROTO_ICMP_ID, t->src.u.icmp.id);
- NLA_PUT_U8(skb, CTA_PROTO_ICMP_TYPE, t->dst.u.icmp.type);
- NLA_PUT_U8(skb, CTA_PROTO_ICMP_CODE, t->dst.u.icmp.code);
-
+ if (nla_put_be16(skb, CTA_PROTO_ICMP_ID, t->src.u.icmp.id) ||
+ nla_put_u8(skb, CTA_PROTO_ICMP_TYPE, t->dst.u.icmp.type) ||
+ nla_put_u8(skb, CTA_PROTO_ICMP_CODE, t->dst.u.icmp.code))
+ goto nla_put_failure;
return 0;
nla_put_failure:
@@ -293,8 +293,8 @@ icmp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
{
const unsigned int *timeout = data;
- NLA_PUT_BE32(skb, CTA_TIMEOUT_ICMP_TIMEOUT, htonl(*timeout / HZ));
-
+ if (nla_put_be32(skb, CTA_TIMEOUT_ICMP_TIMEOUT, htonl(*timeout / HZ)))
+ goto nla_put_failure;
return 0;
nla_put_failure:
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index 57932c43960e..ea4a23813d26 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -283,7 +283,7 @@ static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int dataoff,
__be32 newip;
u_int16_t port;
char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
- unsigned buflen;
+ unsigned int buflen;
/* Connection will come from reply */
if (ct->tuplehash[dir].tuple.src.u3.ip == ct->tuplehash[!dir].tuple.dst.u3.ip)
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 50009c787bcd..6e930c7174dd 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -51,15 +51,16 @@ static struct ping_table ping_table;
static u16 ping_port_rover;
-static inline int ping_hashfn(struct net *net, unsigned num, unsigned mask)
+static inline int ping_hashfn(struct net *net, unsigned int num, unsigned int mask)
{
int res = (num + net_hash_mix(net)) & mask;
+
pr_debug("hash(%d) = %d\n", num, res);
return res;
}
static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table,
- struct net *net, unsigned num)
+ struct net *net, unsigned int num)
{
return &table->hash[ping_hashfn(net, num, PING_HTABLE_MASK)];
}
@@ -188,7 +189,8 @@ static void inet_get_ping_group_range_net(struct net *net, gid_t *low,
gid_t *high)
{
gid_t *data = net->ipv4.sysctl_ping_group_range;
- unsigned seq;
+ unsigned int seq;
+
do {
seq = read_seqbegin(&sysctl_local_ports.lock);
@@ -410,7 +412,7 @@ struct pingfakehdr {
__wsum wcheck;
};
-static int ping_getfrag(void *from, char * to,
+static int ping_getfrag(void *from, char *to,
int offset, int fraglen, int odd, struct sk_buff *skb)
{
struct pingfakehdr *pfh = (struct pingfakehdr *)from;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index bbd604c68e68..4032b818f3e4 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -288,7 +288,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
read_unlock(&raw_v4_hashinfo.lock);
}
-static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb)
+static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
/* Charge it to the socket. */
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 167ea10b521a..5773f5d9e213 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -109,6 +109,7 @@
#include <net/rtnetlink.h>
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
+#include <linux/kmemleak.h>
#endif
#include <net/secure_seq.h>
@@ -229,7 +230,7 @@ const __u8 ip_tos2prio[16] = {
TC_PRIO_INTERACTIVE_BULK,
ECN_OR_COST(INTERACTIVE_BULK)
};
-
+EXPORT_SYMBOL(ip_tos2prio);
/*
* Route cache.
@@ -296,7 +297,7 @@ static inline void rt_hash_lock_init(void)
#endif
static struct rt_hash_bucket *rt_hash_table __read_mostly;
-static unsigned rt_hash_mask __read_mostly;
+static unsigned int rt_hash_mask __read_mostly;
static unsigned int rt_hash_log __read_mostly;
static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
@@ -1143,7 +1144,7 @@ static int rt_bind_neighbour(struct rtable *rt)
return 0;
}
-static struct rtable *rt_intern_hash(unsigned hash, struct rtable *rt,
+static struct rtable *rt_intern_hash(unsigned int hash, struct rtable *rt,
struct sk_buff *skb, int ifindex)
{
struct rtable *rth, *cand;
@@ -1384,7 +1385,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
}
EXPORT_SYMBOL(__ip_select_ident);
-static void rt_del(unsigned hash, struct rtable *rt)
+static void rt_del(unsigned int hash, struct rtable *rt)
{
struct rtable __rcu **rthp;
struct rtable *aux;
@@ -1538,7 +1539,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
ip_rt_put(rt);
ret = NULL;
} else if (rt->rt_flags & RTCF_REDIRECTED) {
- unsigned hash = rt_hash(rt->rt_key_dst, rt->rt_key_src,
+ unsigned int hash = rt_hash(rt->rt_key_dst, rt->rt_key_src,
rt->rt_oif,
rt_genid(dev_net(dst->dev)));
rt_del(hash, rt);
@@ -2215,9 +2216,9 @@ static int ip_mkroute_input(struct sk_buff *skb,
struct in_device *in_dev,
__be32 daddr, __be32 saddr, u32 tos)
{
- struct rtable* rth = NULL;
+ struct rtable *rth = NULL;
int err;
- unsigned hash;
+ unsigned int hash;
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (res->fi && res->fi->fib_nhs > 1)
@@ -2255,13 +2256,13 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
struct fib_result res;
struct in_device *in_dev = __in_dev_get_rcu(dev);
struct flowi4 fl4;
- unsigned flags = 0;
+ unsigned int flags = 0;
u32 itag = 0;
- struct rtable * rth;
- unsigned hash;
+ struct rtable *rth;
+ unsigned int hash;
__be32 spec_dst;
int err = -EINVAL;
- struct net * net = dev_net(dev);
+ struct net *net = dev_net(dev);
/* IP on this device is disabled. */
@@ -2433,8 +2434,8 @@ martian_source_keep_err:
int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
u8 tos, struct net_device *dev, bool noref)
{
- struct rtable * rth;
- unsigned hash;
+ struct rtable *rth;
+ unsigned int hash;
int iif = dev->ifindex;
struct net *net;
int res;
@@ -2972,7 +2973,8 @@ static int rt_fill_info(struct net *net,
r->rtm_src_len = 0;
r->rtm_tos = rt->rt_key_tos;
r->rtm_table = RT_TABLE_MAIN;
- NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN);
+ if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN))
+ goto nla_put_failure;
r->rtm_type = rt->rt_type;
r->rtm_scope = RT_SCOPE_UNIVERSE;
r->rtm_protocol = RTPROT_UNSPEC;
@@ -2980,31 +2982,38 @@ static int rt_fill_info(struct net *net,
if (rt->rt_flags & RTCF_NOTIFY)
r->rtm_flags |= RTM_F_NOTIFY;
- NLA_PUT_BE32(skb, RTA_DST, rt->rt_dst);
-
+ if (nla_put_be32(skb, RTA_DST, rt->rt_dst))
+ goto nla_put_failure;
if (rt->rt_key_src) {
r->rtm_src_len = 32;
- NLA_PUT_BE32(skb, RTA_SRC, rt->rt_key_src);
+ if (nla_put_be32(skb, RTA_SRC, rt->rt_key_src))
+ goto nla_put_failure;
}
- if (rt->dst.dev)
- NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
+ if (rt->dst.dev &&
+ nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
+ goto nla_put_failure;
#ifdef CONFIG_IP_ROUTE_CLASSID
- if (rt->dst.tclassid)
- NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid);
+ if (rt->dst.tclassid &&
+ nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
+ goto nla_put_failure;
#endif
- if (rt_is_input_route(rt))
- NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst);
- else if (rt->rt_src != rt->rt_key_src)
- NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src);
-
- if (rt->rt_dst != rt->rt_gateway)
- NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway);
+ if (rt_is_input_route(rt)) {
+ if (nla_put_be32(skb, RTA_PREFSRC, rt->rt_spec_dst))
+ goto nla_put_failure;
+ } else if (rt->rt_src != rt->rt_key_src) {
+ if (nla_put_be32(skb, RTA_PREFSRC, rt->rt_src))
+ goto nla_put_failure;
+ }
+ if (rt->rt_dst != rt->rt_gateway &&
+ nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway))
+ goto nla_put_failure;
if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
goto nla_put_failure;
- if (rt->rt_mark)
- NLA_PUT_BE32(skb, RTA_MARK, rt->rt_mark);
+ if (rt->rt_mark &&
+ nla_put_be32(skb, RTA_MARK, rt->rt_mark))
+ goto nla_put_failure;
error = rt->dst.error;
if (peer) {
@@ -3045,7 +3054,8 @@ static int rt_fill_info(struct net *net,
}
} else
#endif
- NLA_PUT_U32(skb, RTA_IIF, rt->rt_iif);
+ if (nla_put_u32(skb, RTA_IIF, rt->rt_iif))
+ goto nla_put_failure;
}
if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage,
@@ -3059,7 +3069,7 @@ nla_put_failure:
return -EMSGSIZE;
}
-static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
+static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
{
struct net *net = sock_net(in_skb->sk);
struct rtmsg *rtm;
@@ -3334,23 +3344,6 @@ static ctl_table ipv4_route_table[] = {
{ }
};
-static struct ctl_table empty[1];
-
-static struct ctl_table ipv4_skeleton[] =
-{
- { .procname = "route",
- .mode = 0555, .child = ipv4_route_table},
- { .procname = "neigh",
- .mode = 0555, .child = empty},
- { }
-};
-
-static __net_initdata struct ctl_path ipv4_path[] = {
- { .procname = "net", },
- { .procname = "ipv4", },
- { },
-};
-
static struct ctl_table ipv4_route_flush_table[] = {
{
.procname = "flush",
@@ -3361,13 +3354,6 @@ static struct ctl_table ipv4_route_flush_table[] = {
{ },
};
-static __net_initdata struct ctl_path ipv4_route_path[] = {
- { .procname = "net", },
- { .procname = "ipv4", },
- { .procname = "route", },
- { },
-};
-
static __net_init int sysctl_route_net_init(struct net *net)
{
struct ctl_table *tbl;
@@ -3380,8 +3366,7 @@ static __net_init int sysctl_route_net_init(struct net *net)
}
tbl[0].extra1 = net;
- net->ipv4.route_hdr =
- register_net_sysctl_table(net, ipv4_route_path, tbl);
+ net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl);
if (net->ipv4.route_hdr == NULL)
goto err_reg;
return 0;
@@ -3505,6 +3490,6 @@ int __init ip_rt_init(void)
*/
void __init ip_static_sysctl_init(void)
{
- register_sysctl_paths(ipv4_path, ipv4_skeleton);
+ register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table);
}
#endif
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 7a7724da9bff..33417f84e07f 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -78,7 +78,7 @@ static int ipv4_local_port_range(ctl_table *table, int write,
static void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t *high)
{
gid_t *data = table->data;
- unsigned seq;
+ unsigned int seq;
do {
seq = read_seqbegin(&sysctl_local_ports.lock);
@@ -768,13 +768,6 @@ static struct ctl_table ipv4_net_table[] = {
{ }
};
-struct ctl_path net_ipv4_ctl_path[] = {
- { .procname = "net", },
- { .procname = "ipv4", },
- { },
-};
-EXPORT_SYMBOL_GPL(net_ipv4_ctl_path);
-
static __net_init int ipv4_sysctl_init_net(struct net *net)
{
struct ctl_table *table;
@@ -815,8 +808,7 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
tcp_init_mem(net);
- net->ipv4.ipv4_hdr = register_net_sysctl_table(net,
- net_ipv4_ctl_path, table);
+ net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table);
if (net->ipv4.ipv4_hdr == NULL)
goto err_reg;
@@ -857,12 +849,12 @@ static __init int sysctl_ipv4_init(void)
if (!i->procname)
return -EINVAL;
- hdr = register_sysctl_paths(net_ipv4_ctl_path, ipv4_table);
+ hdr = register_net_sysctl(&init_net, "net/ipv4", ipv4_table);
if (hdr == NULL)
return -ENOMEM;
if (register_pernet_subsys(&ipv4_sysctl_ops)) {
- unregister_sysctl_table(hdr);
+ unregister_net_sysctl_table(hdr);
return -ENOMEM;
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 8bb6adeb62c0..bcc4eab5f251 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -363,6 +363,70 @@ static int retrans_to_secs(u8 retrans, int timeout, int rto_max)
return period;
}
+/* Address-family independent initialization for a tcp_sock.
+ *
+ * NOTE: A lot of things set to zero explicitly by call to
+ * sk_alloc() so need not be done here.
+ */
+void tcp_init_sock(struct sock *sk)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ skb_queue_head_init(&tp->out_of_order_queue);
+ tcp_init_xmit_timers(sk);
+ tcp_prequeue_init(tp);
+
+ icsk->icsk_rto = TCP_TIMEOUT_INIT;
+ tp->mdev = TCP_TIMEOUT_INIT;
+
+ /* So many TCP implementations out there (incorrectly) count the
+ * initial SYN frame in their delayed-ACK and congestion control
+ * algorithms that we must have the following bandaid to talk
+ * efficiently to them. -DaveM
+ */
+ tp->snd_cwnd = TCP_INIT_CWND;
+
+ /* See draft-stevens-tcpca-spec-01 for discussion of the
+ * initialization of these values.
+ */
+ tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
+ tp->snd_cwnd_clamp = ~0;
+ tp->mss_cache = TCP_MSS_DEFAULT;
+
+ tp->reordering = sysctl_tcp_reordering;
+ icsk->icsk_ca_ops = &tcp_init_congestion_ops;
+
+ sk->sk_state = TCP_CLOSE;
+
+ sk->sk_write_space = sk_stream_write_space;
+ sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
+
+ icsk->icsk_sync_mss = tcp_sync_mss;
+
+ /* TCP Cookie Transactions */
+ if (sysctl_tcp_cookie_size > 0) {
+ /* Default, cookies without s_data_payload. */
+ tp->cookie_values =
+ kzalloc(sizeof(*tp->cookie_values),
+ sk->sk_allocation);
+ if (tp->cookie_values != NULL)
+ kref_init(&tp->cookie_values->kref);
+ }
+ /* Presumed zeroed, in order of appearance:
+ * cookie_in_always, cookie_out_never,
+ * s_data_constant, s_data_in, s_data_out
+ */
+ sk->sk_sndbuf = sysctl_tcp_wmem[1];
+ sk->sk_rcvbuf = sysctl_tcp_rmem[1];
+
+ local_bh_disable();
+ sock_update_memcg(sk);
+ sk_sockets_allocated_inc(sk);
+ local_bh_enable();
+}
+EXPORT_SYMBOL(tcp_init_sock);
+
/*
* Wait for a TCP event.
*
@@ -912,6 +976,39 @@ static inline int select_size(const struct sock *sk, bool sg)
return tmp;
}
+static int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
+{
+ struct sk_buff *skb;
+ struct tcp_skb_cb *cb;
+ struct tcphdr *th;
+
+ skb = alloc_skb(size + sizeof(*th), sk->sk_allocation);
+ if (!skb)
+ goto err;
+
+ th = (struct tcphdr *)skb_put(skb, sizeof(*th));
+ skb_reset_transport_header(skb);
+ memset(th, 0, sizeof(*th));
+
+ if (memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size))
+ goto err_free;
+
+ cb = TCP_SKB_CB(skb);
+
+ TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
+ TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size;
+ TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1;
+
+ tcp_queue_rcv(sk, skb, sizeof(*th));
+
+ return size;
+
+err_free:
+ kfree_skb(skb);
+err:
+ return -ENOMEM;
+}
+
int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
size_t size)
{
@@ -919,7 +1016,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
int iovlen, flags, err, copied;
- int mss_now, size_goal;
+ int mss_now = 0, size_goal;
bool sg;
long timeo;
@@ -933,6 +1030,19 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
if ((err = sk_stream_wait_connect(sk, &timeo)) != 0)
goto out_err;
+ if (unlikely(tp->repair)) {
+ if (tp->repair_queue == TCP_RECV_QUEUE) {
+ copied = tcp_send_rcvq(sk, msg, size);
+ goto out;
+ }
+
+ err = -EINVAL;
+ if (tp->repair_queue == TCP_NO_QUEUE)
+ goto out_err;
+
+ /* 'common' sending to sendq */
+ }
+
/* This should be in poll */
clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
@@ -1089,7 +1199,7 @@ new_segment:
if ((seglen -= copy) == 0 && iovlen == 0)
goto out;
- if (skb->len < max || (flags & MSG_OOB))
+ if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair))
continue;
if (forced_push(tp)) {
@@ -1102,7 +1212,7 @@ new_segment:
wait_for_sndbuf:
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
wait_for_memory:
- if (copied)
+ if (copied && likely(!tp->repair))
tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
@@ -1113,7 +1223,7 @@ wait_for_memory:
}
out:
- if (copied)
+ if (copied && likely(!tp->repair))
tcp_push(sk, flags, mss_now, tp->nonagle);
release_sock(sk);
return copied;
@@ -1187,6 +1297,24 @@ static int tcp_recv_urg(struct sock *sk, struct msghdr *msg, int len, int flags)
return -EAGAIN;
}
+static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len)
+{
+ struct sk_buff *skb;
+ int copied = 0, err = 0;
+
+ /* XXX -- need to support SO_PEEK_OFF */
+
+ skb_queue_walk(&sk->sk_write_queue, skb) {
+ err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, skb->len);
+ if (err)
+ break;
+
+ copied += skb->len;
+ }
+
+ return err ?: copied;
+}
+
/* Clean up the receive buffer for full frames taken by the user,
* then send an ACK if necessary. COPIED is the number of bytes
* tcp_recvmsg has given to the user so far, it speeds up the
@@ -1432,6 +1560,21 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
if (flags & MSG_OOB)
goto recv_urg;
+ if (unlikely(tp->repair)) {
+ err = -EPERM;
+ if (!(flags & MSG_PEEK))
+ goto out;
+
+ if (tp->repair_queue == TCP_SEND_QUEUE)
+ goto recv_sndq;
+
+ err = -EINVAL;
+ if (tp->repair_queue == TCP_NO_QUEUE)
+ goto out;
+
+ /* 'common' recv queue MSG_PEEK-ing */
+ }
+
seq = &tp->copied_seq;
if (flags & MSG_PEEK) {
peek_seq = tp->copied_seq;
@@ -1783,6 +1926,10 @@ out:
recv_urg:
err = tcp_recv_urg(sk, msg, len, flags);
goto out;
+
+recv_sndq:
+ err = tcp_peek_sndq(sk, msg, len);
+ goto out;
}
EXPORT_SYMBOL(tcp_recvmsg);
@@ -1935,7 +2082,9 @@ void tcp_close(struct sock *sk, long timeout)
* advertise a zero window, then kill -9 the FTP client, wheee...
* Note: timeout is always zero in such a case.
*/
- if (data_was_unread) {
+ if (unlikely(tcp_sk(sk)->repair)) {
+ sk->sk_prot->disconnect(sk, 0);
+ } else if (data_was_unread) {
/* Unread data was tossed, zap the connection. */
NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
tcp_set_state(sk, TCP_CLOSE);
@@ -2074,6 +2223,8 @@ int tcp_disconnect(struct sock *sk, int flags)
/* ABORT function of RFC793 */
if (old_state == TCP_LISTEN) {
inet_csk_listen_stop(sk);
+ } else if (unlikely(tp->repair)) {
+ sk->sk_err = ECONNABORTED;
} else if (tcp_need_reset(old_state) ||
(tp->snd_nxt != tp->write_seq &&
(1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {
@@ -2125,6 +2276,74 @@ int tcp_disconnect(struct sock *sk, int flags)
}
EXPORT_SYMBOL(tcp_disconnect);
+static inline int tcp_can_repair_sock(struct sock *sk)
+{
+ return capable(CAP_NET_ADMIN) &&
+ ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED));
+}
+
+static int tcp_repair_options_est(struct tcp_sock *tp, char __user *optbuf, unsigned int len)
+{
+ /*
+ * Options are stored in CODE:VALUE form where CODE is 8bit and VALUE
+ * fits the respective TCPOLEN_ size
+ */
+
+ while (len > 0) {
+ u8 opcode;
+
+ if (get_user(opcode, optbuf))
+ return -EFAULT;
+
+ optbuf++;
+ len--;
+
+ switch (opcode) {
+ case TCPOPT_MSS: {
+ u16 in_mss;
+
+ if (len < sizeof(in_mss))
+ return -ENODATA;
+ if (get_user(in_mss, optbuf))
+ return -EFAULT;
+
+ tp->rx_opt.mss_clamp = in_mss;
+
+ optbuf += sizeof(in_mss);
+ len -= sizeof(in_mss);
+ break;
+ }
+ case TCPOPT_WINDOW: {
+ u8 wscale;
+
+ if (len < sizeof(wscale))
+ return -ENODATA;
+ if (get_user(wscale, optbuf))
+ return -EFAULT;
+
+ if (wscale > 14)
+ return -EFBIG;
+
+ tp->rx_opt.snd_wscale = wscale;
+
+ optbuf += sizeof(wscale);
+ len -= sizeof(wscale);
+ break;
+ }
+ case TCPOPT_SACK_PERM:
+ tp->rx_opt.sack_ok |= TCP_SACK_SEEN;
+ if (sysctl_tcp_fack)
+ tcp_enable_fack(tp);
+ break;
+ case TCPOPT_TIMESTAMP:
+ tp->rx_opt.tstamp_ok = 1;
+ break;
+ }
+ }
+
+ return 0;
+}
+
/*
* Socket option code for TCP.
*/
@@ -2297,6 +2516,51 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
tp->thin_dupack = val;
break;
+ case TCP_REPAIR:
+ if (!tcp_can_repair_sock(sk))
+ err = -EPERM;
+ else if (val == 1) {
+ tp->repair = 1;
+ sk->sk_reuse = SK_FORCE_REUSE;
+ tp->repair_queue = TCP_NO_QUEUE;
+ } else if (val == 0) {
+ tp->repair = 0;
+ sk->sk_reuse = SK_NO_REUSE;
+ tcp_send_window_probe(sk);
+ } else
+ err = -EINVAL;
+
+ break;
+
+ case TCP_REPAIR_QUEUE:
+ if (!tp->repair)
+ err = -EPERM;
+ else if (val < TCP_QUEUES_NR)
+ tp->repair_queue = val;
+ else
+ err = -EINVAL;
+ break;
+
+ case TCP_QUEUE_SEQ:
+ if (sk->sk_state != TCP_CLOSE)
+ err = -EPERM;
+ else if (tp->repair_queue == TCP_SEND_QUEUE)
+ tp->write_seq = val;
+ else if (tp->repair_queue == TCP_RECV_QUEUE)
+ tp->rcv_nxt = val;
+ else
+ err = -EINVAL;
+ break;
+
+ case TCP_REPAIR_OPTIONS:
+ if (!tp->repair)
+ err = -EINVAL;
+ else if (sk->sk_state == TCP_ESTABLISHED)
+ err = tcp_repair_options_est(tp, optval, optlen);
+ else
+ err = -EPERM;
+ break;
+
case TCP_CORK:
/* When set indicates to always queue non-full frames.
* Later the user clears this option and we transmit
@@ -2530,6 +2794,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
val = tp->mss_cache;
if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
val = tp->rx_opt.user_mss;
+ if (tp->repair)
+ val = tp->rx_opt.mss_clamp;
break;
case TCP_NODELAY:
val = !!(tp->nonagle&TCP_NAGLE_OFF);
@@ -2632,6 +2898,26 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
val = tp->thin_dupack;
break;
+ case TCP_REPAIR:
+ val = tp->repair;
+ break;
+
+ case TCP_REPAIR_QUEUE:
+ if (tp->repair)
+ val = tp->repair_queue;
+ else
+ return -EINVAL;
+ break;
+
+ case TCP_QUEUE_SEQ:
+ if (tp->repair_queue == TCP_SEND_QUEUE)
+ val = tp->write_seq;
+ else if (tp->repair_queue == TCP_RECV_QUEUE)
+ val = tp->rcv_nxt;
+ else
+ return -EINVAL;
+ break;
+
case TCP_USER_TIMEOUT:
val = jiffies_to_msecs(icsk->icsk_user_timeout);
break;
@@ -2675,7 +2961,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
struct tcphdr *th;
- unsigned thlen;
+ unsigned int thlen;
unsigned int seq;
__be32 delta;
unsigned int oldlen;
@@ -3033,9 +3319,9 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
struct scatterlist sg;
const struct tcphdr *tp = tcp_hdr(skb);
struct hash_desc *desc = &hp->md5_desc;
- unsigned i;
- const unsigned head_data_len = skb_headlen(skb) > header_len ?
- skb_headlen(skb) - header_len : 0;
+ unsigned int i;
+ const unsigned int head_data_len = skb_headlen(skb) > header_len ?
+ skb_headlen(skb) - header_len : 0;
const struct skb_shared_info *shi = skb_shinfo(skb);
struct sk_buff *frag_iter;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3ff364065376..c1c611b385a7 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -175,7 +175,7 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
static void tcp_incr_quickack(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
- unsigned quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
+ unsigned int quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
if (quickacks == 0)
quickacks = 2;
@@ -937,7 +937,7 @@ static void tcp_init_metrics(struct sock *sk)
tcp_set_rto(sk);
reset:
if (tp->srtt == 0) {
- /* RFC2988bis: We've failed to get a valid RTT sample from
+ /* RFC6298: 5.7 We've failed to get a valid RTT sample from
* 3WHS. This is most likely due to retransmission,
* including spurious one. Reset the RTO back to 3secs
* from the more aggressive 1sec to avoid more spurious
@@ -947,7 +947,7 @@ reset:
inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK;
}
/* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been
- * retransmitted. In light of RFC2988bis' more aggressive 1sec
+ * retransmitted. In light of RFC6298 more aggressive 1sec
* initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK
* retransmission has occurred.
*/
@@ -4450,6 +4450,58 @@ static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
return 0;
}
+/**
+ * tcp_try_coalesce - try to merge skb to prior one
+ * @sk: socket
+ * @to: prior buffer
+ * @from: buffer to add in queue
+ *
+ * Before queueing skb @from after @to, try to merge them
+ * to reduce overall memory use and queue lengths, if cost is small.
+ * Packets in ofo or receive queues can stay a long time.
+ * Better try to coalesce them right now to avoid future collapses.
+ * Returns > 0 value if caller should free @from instead of queueing it
+ */
+static int tcp_try_coalesce(struct sock *sk,
+ struct sk_buff *to,
+ struct sk_buff *from)
+{
+ int len = from->len;
+
+ if (tcp_hdr(from)->fin)
+ return 0;
+ if (len <= skb_tailroom(to)) {
+ BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len));
+merge:
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE);
+ TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
+ TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
+ return 1;
+ }
+ if (skb_headlen(from) == 0 &&
+ !skb_has_frag_list(to) &&
+ !skb_has_frag_list(from) &&
+ (skb_shinfo(to)->nr_frags +
+ skb_shinfo(from)->nr_frags <= MAX_SKB_FRAGS)) {
+ int delta = from->truesize - ksize(from->head) -
+ SKB_DATA_ALIGN(sizeof(struct sk_buff));
+
+ WARN_ON_ONCE(delta < len);
+ memcpy(skb_shinfo(to)->frags + skb_shinfo(to)->nr_frags,
+ skb_shinfo(from)->frags,
+ skb_shinfo(from)->nr_frags * sizeof(skb_frag_t));
+ skb_shinfo(to)->nr_frags += skb_shinfo(from)->nr_frags;
+ skb_shinfo(from)->nr_frags = 0;
+ to->truesize += delta;
+ atomic_add(delta, &sk->sk_rmem_alloc);
+ sk_mem_charge(sk, delta);
+ to->len += len;
+ to->data_len += len;
+ goto merge;
+ }
+ return 0;
+}
+
static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -4488,23 +4540,11 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
end_seq = TCP_SKB_CB(skb)->end_seq;
if (seq == TCP_SKB_CB(skb1)->end_seq) {
- /* Packets in ofo can stay in queue a long time.
- * Better try to coalesce them right now
- * to avoid future tcp_collapse_ofo_queue(),
- * probably the most expensive function in tcp stack.
- */
- if (skb->len <= skb_tailroom(skb1) && !tcp_hdr(skb)->fin) {
- NET_INC_STATS_BH(sock_net(sk),
- LINUX_MIB_TCPRCVCOALESCE);
- BUG_ON(skb_copy_bits(skb, 0,
- skb_put(skb1, skb->len),
- skb->len));
- TCP_SKB_CB(skb1)->end_seq = end_seq;
- TCP_SKB_CB(skb1)->ack_seq = TCP_SKB_CB(skb)->ack_seq;
+ if (tcp_try_coalesce(sk, skb1, skb) <= 0) {
+ __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
+ } else {
__kfree_skb(skb);
skb = NULL;
- } else {
- __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
}
if (!tp->rx_opt.num_sacks ||
@@ -4625,13 +4665,18 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
}
if (eaten <= 0) {
+ struct sk_buff *tail;
queue_and_out:
if (eaten < 0 &&
tcp_try_rmem_schedule(sk, skb->truesize))
goto drop;
- skb_set_owner_r(skb, sk);
- __skb_queue_tail(&sk->sk_receive_queue, skb);
+ tail = skb_peek_tail(&sk->sk_receive_queue);
+ eaten = tail ? tcp_try_coalesce(sk, tail, skb) : -1;
+ if (eaten <= 0) {
+ skb_set_owner_r(skb, sk);
+ __skb_queue_tail(&sk->sk_receive_queue, skb);
+ }
}
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
if (skb->len)
@@ -5326,6 +5371,14 @@ discard:
return 0;
}
+void tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen)
+{
+ __skb_pull(skb, hdrlen);
+ __skb_queue_tail(&sk->sk_receive_queue, skb);
+ skb_set_owner_r(skb, sk);
+ tcp_sk(sk)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+}
+
/*
* TCP receive function for the ESTABLISHED state.
*
@@ -5491,10 +5544,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS);
/* Bulk data transfer: receiver */
- __skb_pull(skb, tcp_header_len);
- __skb_queue_tail(&sk->sk_receive_queue, skb);
- skb_set_owner_r(skb, sk);
- tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+ tcp_queue_rcv(sk, skb, tcp_header_len);
}
tcp_event_data_recv(sk, skb);
@@ -5560,6 +5610,44 @@ discard:
}
EXPORT_SYMBOL(tcp_rcv_established);
+void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ tcp_set_state(sk, TCP_ESTABLISHED);
+
+ if (skb != NULL)
+ security_inet_conn_established(sk, skb);
+
+ /* Make sure socket is routed, for correct metrics. */
+ icsk->icsk_af_ops->rebuild_header(sk);
+
+ tcp_init_metrics(sk);
+
+ tcp_init_congestion_control(sk);
+
+ /* Prevent spurious tcp_cwnd_restart() on first data
+ * packet.
+ */
+ tp->lsndtime = tcp_time_stamp;
+
+ tcp_init_buffer_space(sk);
+
+ if (sock_flag(sk, SOCK_KEEPOPEN))
+ inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
+
+ if (!tp->rx_opt.snd_wscale)
+ __tcp_fast_path_on(tp, tp->snd_wnd);
+ else
+ tp->pred_flags = 0;
+
+ if (!sock_flag(sk, SOCK_DEAD)) {
+ sk->sk_state_change(sk);
+ sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
+ }
+}
+
static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
const struct tcphdr *th, unsigned int len)
{
@@ -5692,36 +5780,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
}
smp_mb();
- tcp_set_state(sk, TCP_ESTABLISHED);
-
- security_inet_conn_established(sk, skb);
-
- /* Make sure socket is routed, for correct metrics. */
- icsk->icsk_af_ops->rebuild_header(sk);
-
- tcp_init_metrics(sk);
-
- tcp_init_congestion_control(sk);
-
- /* Prevent spurious tcp_cwnd_restart() on first data
- * packet.
- */
- tp->lsndtime = tcp_time_stamp;
- tcp_init_buffer_space(sk);
-
- if (sock_flag(sk, SOCK_KEEPOPEN))
- inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
-
- if (!tp->rx_opt.snd_wscale)
- __tcp_fast_path_on(tp, tp->snd_wnd);
- else
- tp->pred_flags = 0;
-
- if (!sock_flag(sk, SOCK_DEAD)) {
- sk->sk_state_change(sk);
- sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
- }
+ tcp_finish_connect(sk, skb);
if (sk->sk_write_pending ||
icsk->icsk_accept_queue.rskq_defer_accept ||
@@ -5735,8 +5795,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
*/
inet_csk_schedule_ack(sk);
icsk->icsk_ack.lrcvtime = tcp_time_stamp;
- icsk->icsk_ack.ato = TCP_ATO_MIN;
- tcp_incr_quickack(sk);
tcp_enter_quickack_mode(sk);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
TCP_DELACK_MAX, TCP_RTO_MAX);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 0cb86ceb652f..cf97e9821d76 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -138,6 +138,14 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
}
EXPORT_SYMBOL_GPL(tcp_twsk_unique);
+static int tcp_repair_connect(struct sock *sk)
+{
+ tcp_connect_init(sk);
+ tcp_finish_connect(sk, NULL);
+
+ return 0;
+}
+
/* This will initiate an outgoing connection. */
int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
@@ -196,7 +204,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
/* Reset inherited state */
tp->rx_opt.ts_recent = 0;
tp->rx_opt.ts_recent_stamp = 0;
- tp->write_seq = 0;
+ if (likely(!tp->repair))
+ tp->write_seq = 0;
}
if (tcp_death_row.sysctl_tw_recycle &&
@@ -247,7 +256,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
sk->sk_gso_type = SKB_GSO_TCPV4;
sk_setup_caps(sk, &rt->dst);
- if (!tp->write_seq)
+ if (!tp->write_seq && likely(!tp->repair))
tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
inet->inet_daddr,
inet->inet_sport,
@@ -255,7 +264,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_id = tp->write_seq ^ jiffies;
- err = tcp_connect(sk);
+ if (likely(!tp->repair))
+ err = tcp_connect(sk);
+ else
+ err = tcp_repair_connect(sk);
+
rt = NULL;
if (err)
goto failure;
@@ -1739,7 +1752,8 @@ process:
if (!tcp_prequeue(sk, skb))
ret = tcp_v4_do_rcv(sk, skb);
}
- } else if (unlikely(sk_add_backlog(sk, skb))) {
+ } else if (unlikely(sk_add_backlog(sk, skb,
+ sk->sk_rcvbuf + sk->sk_sndbuf))) {
bh_unlock_sock(sk);
NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
goto discard_and_relse;
@@ -1875,64 +1889,15 @@ static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
static int tcp_v4_init_sock(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
- struct tcp_sock *tp = tcp_sk(sk);
-
- skb_queue_head_init(&tp->out_of_order_queue);
- tcp_init_xmit_timers(sk);
- tcp_prequeue_init(tp);
- icsk->icsk_rto = TCP_TIMEOUT_INIT;
- tp->mdev = TCP_TIMEOUT_INIT;
-
- /* So many TCP implementations out there (incorrectly) count the
- * initial SYN frame in their delayed-ACK and congestion control
- * algorithms that we must have the following bandaid to talk
- * efficiently to them. -DaveM
- */
- tp->snd_cwnd = TCP_INIT_CWND;
-
- /* See draft-stevens-tcpca-spec-01 for discussion of the
- * initialization of these values.
- */
- tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
- tp->snd_cwnd_clamp = ~0;
- tp->mss_cache = TCP_MSS_DEFAULT;
-
- tp->reordering = sysctl_tcp_reordering;
- icsk->icsk_ca_ops = &tcp_init_congestion_ops;
-
- sk->sk_state = TCP_CLOSE;
-
- sk->sk_write_space = sk_stream_write_space;
- sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
+ tcp_init_sock(sk);
icsk->icsk_af_ops = &ipv4_specific;
- icsk->icsk_sync_mss = tcp_sync_mss;
+
#ifdef CONFIG_TCP_MD5SIG
- tp->af_specific = &tcp_sock_ipv4_specific;
+ tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
#endif
- /* TCP Cookie Transactions */
- if (sysctl_tcp_cookie_size > 0) {
- /* Default, cookies without s_data_payload. */
- tp->cookie_values =
- kzalloc(sizeof(*tp->cookie_values),
- sk->sk_allocation);
- if (tp->cookie_values != NULL)
- kref_init(&tp->cookie_values->kref);
- }
- /* Presumed zeroed, in order of appearance:
- * cookie_in_always, cookie_out_never,
- * s_data_constant, s_data_in, s_data_out
- */
- sk->sk_sndbuf = sysctl_tcp_wmem[1];
- sk->sk_rcvbuf = sysctl_tcp_rmem[1];
-
- local_bh_disable();
- sock_update_memcg(sk);
- sk_sockets_allocated_inc(sk);
- local_bh_enable();
-
return 0;
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7ac6423117ad..7b7cf3811348 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -563,13 +563,13 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
/* Compute TCP options for SYN packets. This is not the final
* network wire format yet.
*/
-static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
+static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
struct tcp_out_options *opts,
struct tcp_md5sig_key **md5)
{
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_cookie_values *cvp = tp->cookie_values;
- unsigned remaining = MAX_TCP_OPTION_SPACE;
+ unsigned int remaining = MAX_TCP_OPTION_SPACE;
u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ?
tcp_cookie_size_check(cvp->cookie_desired) :
0;
@@ -663,15 +663,15 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
}
/* Set up TCP options for SYN-ACKs. */
-static unsigned tcp_synack_options(struct sock *sk,
+static unsigned int tcp_synack_options(struct sock *sk,
struct request_sock *req,
- unsigned mss, struct sk_buff *skb,
+ unsigned int mss, struct sk_buff *skb,
struct tcp_out_options *opts,
struct tcp_md5sig_key **md5,
struct tcp_extend_values *xvp)
{
struct inet_request_sock *ireq = inet_rsk(req);
- unsigned remaining = MAX_TCP_OPTION_SPACE;
+ unsigned int remaining = MAX_TCP_OPTION_SPACE;
u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ?
xvp->cookie_plus :
0;
@@ -742,13 +742,13 @@ static unsigned tcp_synack_options(struct sock *sk,
/* Compute TCP options for ESTABLISHED sockets. This is not the
* final wire format yet.
*/
-static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb,
+static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb,
struct tcp_out_options *opts,
struct tcp_md5sig_key **md5)
{
struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL;
struct tcp_sock *tp = tcp_sk(sk);
- unsigned size = 0;
+ unsigned int size = 0;
unsigned int eff_sacks;
#ifdef CONFIG_TCP_MD5SIG
@@ -770,9 +770,9 @@ static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb,
eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
if (unlikely(eff_sacks)) {
- const unsigned remaining = MAX_TCP_OPTION_SPACE - size;
+ const unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
opts->num_sack_blocks =
- min_t(unsigned, eff_sacks,
+ min_t(unsigned int, eff_sacks,
(remaining - TCPOLEN_SACK_BASE_ALIGNED) /
TCPOLEN_SACK_PERBLOCK);
size += TCPOLEN_SACK_BASE_ALIGNED +
@@ -801,7 +801,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
struct tcp_sock *tp;
struct tcp_skb_cb *tcb;
struct tcp_out_options opts;
- unsigned tcp_options_size, tcp_header_size;
+ unsigned int tcp_options_size, tcp_header_size;
struct tcp_md5sig_key *md5;
struct tcphdr *th;
int err;
@@ -1259,7 +1259,7 @@ unsigned int tcp_current_mss(struct sock *sk)
const struct tcp_sock *tp = tcp_sk(sk);
const struct dst_entry *dst = __sk_dst_get(sk);
u32 mss_now;
- unsigned header_len;
+ unsigned int header_len;
struct tcp_out_options opts;
struct tcp_md5sig_key *md5;
@@ -1390,7 +1390,7 @@ static inline int tcp_minshall_check(const struct tcp_sock *tp)
*/
static inline int tcp_nagle_check(const struct tcp_sock *tp,
const struct sk_buff *skb,
- unsigned mss_now, int nonagle)
+ unsigned int mss_now, int nonagle)
{
return skb->len < mss_now &&
((nonagle & TCP_NAGLE_CORK) ||
@@ -2562,7 +2562,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
EXPORT_SYMBOL(tcp_make_synack);
/* Do all connect socket setups that can be done AF independent. */
-static void tcp_connect_init(struct sock *sk)
+void tcp_connect_init(struct sock *sk)
{
const struct dst_entry *dst = __sk_dst_get(sk);
struct tcp_sock *tp = tcp_sk(sk);
@@ -2617,9 +2617,12 @@ static void tcp_connect_init(struct sock *sk)
tp->snd_una = tp->write_seq;
tp->snd_sml = tp->write_seq;
tp->snd_up = tp->write_seq;
- tp->rcv_nxt = 0;
- tp->rcv_wup = 0;
- tp->copied_seq = 0;
+ tp->snd_nxt = tp->write_seq;
+
+ if (likely(!tp->repair))
+ tp->rcv_nxt = 0;
+ tp->rcv_wup = tp->rcv_nxt;
+ tp->copied_seq = tp->rcv_nxt;
inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
inet_csk(sk)->icsk_retransmits = 0;
@@ -2642,7 +2645,6 @@ int tcp_connect(struct sock *sk)
/* Reserve space for headers. */
skb_reserve(buff, MAX_TCP_HEADER);
- tp->snd_nxt = tp->write_seq;
tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
TCP_ECN_send_syn(sk, buff);
@@ -2791,6 +2793,15 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
}
+void tcp_send_window_probe(struct sock *sk)
+{
+ if (sk->sk_state == TCP_ESTABLISHED) {
+ tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1;
+ tcp_sk(sk)->snd_nxt = tcp_sk(sk)->write_seq;
+ tcp_xmit_probe_skb(sk, 0);
+ }
+}
+
/* Initiate keepalive or window probe from timer. */
int tcp_write_wakeup(struct sock *sk)
{
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index a981cdc0a6e9..4526fe68e60e 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -91,7 +91,7 @@ static inline int tcp_probe_avail(void)
* Note: arguments must match tcp_rcv_established()!
*/
static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
- struct tcphdr *th, unsigned len)
+ struct tcphdr *th, unsigned int len)
{
const struct tcp_sock *tp = tcp_sk(sk);
const struct inet_sock *inet = inet_sk(sk);
@@ -138,7 +138,7 @@ static struct jprobe tcp_jprobe = {
.entry = jtcp_rcv_established,
};
-static int tcpprobe_open(struct inode * inode, struct file * file)
+static int tcpprobe_open(struct inode *inode, struct file *file)
{
/* Reset (empty) log */
spin_lock_bh(&tcp_probe.lock);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index fe141052a1be..279fd0846302 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -107,6 +107,7 @@
#include <net/checksum.h>
#include <net/xfrm.h>
#include <trace/events/udp.h>
+#include <linux/static_key.h>
#include "udp_impl.h"
struct udp_table udp_table __read_mostly;
@@ -206,7 +207,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
if (!snum) {
int low, high, remaining;
- unsigned rand;
+ unsigned int rand;
unsigned short first, last;
DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN);
@@ -846,7 +847,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
* Get and verify the address.
*/
if (msg->msg_name) {
- struct sockaddr_in * usin = (struct sockaddr_in *)msg->msg_name;
+ struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name;
if (msg->msg_namelen < sizeof(*usin))
return -EINVAL;
if (usin->sin_family != AF_INET) {
@@ -1379,6 +1380,14 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
}
+static struct static_key udp_encap_needed __read_mostly;
+void udp_encap_enable(void)
+{
+ if (!static_key_enabled(&udp_encap_needed))
+ static_key_slow_inc(&udp_encap_needed);
+}
+EXPORT_SYMBOL(udp_encap_enable);
+
/* returns:
* -1: error
* 0: success
@@ -1400,7 +1409,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
goto drop;
nf_reset(skb);
- if (up->encap_type) {
+ if (static_key_false(&udp_encap_needed) && up->encap_type) {
int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
/*
@@ -1470,7 +1479,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
goto drop;
- if (sk_rcvqueues_full(sk, skb))
+ if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf))
goto drop;
rc = 0;
@@ -1479,7 +1488,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
bh_lock_sock(sk);
if (!sock_owned_by_user(sk))
rc = __udp_queue_rcv_skb(sk, skb);
- else if (sk_add_backlog(sk, skb)) {
+ else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
bh_unlock_sock(sk);
goto drop;
}
@@ -1760,6 +1769,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
/* FALLTHROUGH */
case UDP_ENCAP_L2TPINUDP:
up->encap_type = val;
+ udp_encap_enable();
break;
default:
err = -ENOPROTOOPT;
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index aaad650d47d9..5a681e298b90 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h
@@ -25,7 +25,7 @@ extern int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
size_t len, int noblock, int flags, int *addr_len);
extern int udp_sendpage(struct sock *sk, struct page *page, int offset,
size_t size, int flags);
-extern int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb);
+extern int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
extern void udp_destroy_sock(struct sock *sk);
#ifdef CONFIG_PROC_FS
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index a0b4c5da8d43..0d3426cb5c4f 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -152,7 +152,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
case IPPROTO_AH:
if (pskb_may_pull(skb, xprth + 8 - skb->data)) {
- __be32 *ah_hdr = (__be32*)xprth;
+ __be32 *ah_hdr = (__be32 *)xprth;
fl4->fl4_ipsec_spi = ah_hdr[1];
}
@@ -298,8 +298,8 @@ void __init xfrm4_init(int rt_max_size)
xfrm4_state_init();
xfrm4_policy_init();
#ifdef CONFIG_SYSCTL
- sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv4_ctl_path,
- xfrm4_policy_table);
+ sysctl_hdr = register_net_sysctl(&init_net, "net/ipv4",
+ xfrm4_policy_table);
#endif
}