summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/devinet.c2
-rw-r--r--net/ipv4/igmp.c3
-rw-r--r--net/ipv4/inet_connection_sock.c14
-rw-r--r--net/ipv4/ip_gre.c13
-rw-r--r--net/ipv4/ip_output.c5
-rw-r--r--net/ipv4/ip_sockglue.c2
-rw-r--r--net/ipv4/ip_tunnel.c23
-rw-r--r--net/ipv4/ping.c4
-rw-r--r--net/ipv4/raw.c4
-rw-r--r--net/ipv4/route.c77
-rw-r--r--net/ipv4/tcp.c6
-rw-r--r--net/ipv4/tcp_input.c5
-rw-r--r--net/ipv4/tcp_ipv4.c31
-rw-r--r--net/ipv4/tcp_metrics.c2
-rw-r--r--net/ipv4/tcp_minisocks.c3
-rw-r--r--net/ipv4/udp.c4
-rw-r--r--net/ipv4/udp_tunnel.c2
17 files changed, 148 insertions, 52 deletions
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index cebd9d31e65a..f6303b17546b 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1847,7 +1847,7 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
if (err < 0)
goto errout;
- err = EINVAL;
+ err = -EINVAL;
if (!tb[NETCONFA_IFINDEX])
goto errout;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 05e4cba14162..b3086cf27027 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -356,9 +356,8 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
skb_dst_set(skb, &rt->dst);
skb->dev = dev;
- skb->reserved_tailroom = skb_end_offset(skb) -
- min(mtu, skb_end_offset(skb));
skb_reserve(skb, hlen);
+ skb_tailroom_reserve(skb, mtu, tlen);
skb_reset_network_header(skb);
pip = ip_hdr(skb);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 46b9c887bede..64148914803a 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -789,14 +789,16 @@ static void inet_child_forget(struct sock *sk, struct request_sock *req,
reqsk_put(req);
}
-void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req,
- struct sock *child)
+struct sock *inet_csk_reqsk_queue_add(struct sock *sk,
+ struct request_sock *req,
+ struct sock *child)
{
struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
spin_lock(&queue->rskq_lock);
if (unlikely(sk->sk_state != TCP_LISTEN)) {
inet_child_forget(sk, req, child);
+ child = NULL;
} else {
req->sk = child;
req->dl_next = NULL;
@@ -808,6 +810,7 @@ void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req,
sk_acceptq_added(sk);
}
spin_unlock(&queue->rskq_lock);
+ return child;
}
EXPORT_SYMBOL(inet_csk_reqsk_queue_add);
@@ -817,11 +820,8 @@ struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
if (own_req) {
inet_csk_reqsk_queue_drop(sk, req);
reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
- inet_csk_reqsk_queue_add(sk, req, child);
- /* Warning: caller must not call reqsk_put(req);
- * child stole last reference on it.
- */
- return child;
+ if (inet_csk_reqsk_queue_add(sk, req, child))
+ return child;
}
/* Too bad, another child took ownership of the request, undo. */
bh_unlock_sock(child);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 7c51c4e1661f..41ba68de46d8 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1054,8 +1054,9 @@ static const struct net_device_ops gre_tap_netdev_ops = {
static void ipgre_tap_setup(struct net_device *dev)
{
ether_setup(dev);
- dev->netdev_ops = &gre_tap_netdev_ops;
- dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+ dev->netdev_ops = &gre_tap_netdev_ops;
+ dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+ dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
ip_tunnel_setup(dev, gre_tap_net_id);
}
@@ -1240,6 +1241,14 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
err = ipgre_newlink(net, dev, tb, NULL);
if (err < 0)
goto out;
+
+ /* openvswitch users expect packet sizes to be unrestricted,
+ * so set the largest MTU we can.
+ */
+ err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
+ if (err)
+ goto out;
+
return dev;
out:
free_netdev(dev);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 64878efa045c..565bf64b2b7d 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1236,13 +1236,16 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
if (!skb)
return -EINVAL;
- cork->length += size;
if ((size + skb->len > mtu) &&
(sk->sk_protocol == IPPROTO_UDP) &&
(rt->dst.dev->features & NETIF_F_UFO)) {
+ if (skb->ip_summed != CHECKSUM_PARTIAL)
+ return -EOPNOTSUPP;
+
skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
}
+ cork->length += size;
while (size > 0) {
if (skb_is_gso(skb)) {
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 5f73a7c03e27..a50124260f5a 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -249,6 +249,8 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc,
switch (cmsg->cmsg_type) {
case IP_RETOPTS:
err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
+
+ /* Our caller is responsible for freeing ipc->opt */
err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg),
err < 40 ? err : 40);
if (err)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index c7bd72e9b544..336e6892a93c 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -661,6 +661,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
connected = (tunnel->parms.iph.daddr != 0);
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
dst = tnl_params->daddr;
if (dst == 0) {
/* NBMA tunnel */
@@ -758,7 +760,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
tunnel->err_count--;
- memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
dst_link_failure(skb);
} else
tunnel->err_count = 0;
@@ -943,17 +944,31 @@ done:
}
EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
-int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
+int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
int t_hlen = tunnel->hlen + sizeof(struct iphdr);
+ int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
- if (new_mtu < 68 ||
- new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
+ if (new_mtu < 68)
return -EINVAL;
+
+ if (new_mtu > max_mtu) {
+ if (strict)
+ return -EINVAL;
+
+ new_mtu = max_mtu;
+ }
+
dev->mtu = new_mtu;
return 0;
}
+EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
+
+int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
+{
+ return __ip_tunnel_change_mtu(dev, new_mtu, true);
+}
EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
static void ip_tunnel_dev_free(struct net_device *dev)
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index c117b21b937d..d3a27165f9cc 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -746,8 +746,10 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (msg->msg_controllen) {
err = ip_cmsg_send(sock_net(sk), msg, &ipc, false);
- if (err)
+ if (unlikely(err)) {
+ kfree(ipc.opt);
return err;
+ }
if (ipc.opt)
free = 1;
}
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index bc35f1842512..7113bae4e6a0 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -547,8 +547,10 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (msg->msg_controllen) {
err = ip_cmsg_send(net, msg, &ipc, false);
- if (err)
+ if (unlikely(err)) {
+ kfree(ipc.opt);
goto out;
+ }
if (ipc.opt)
free = 1;
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 85f184e429c6..02c62299d717 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -129,6 +129,7 @@ static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
static int ip_rt_min_advmss __read_mostly = 256;
+static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
/*
* Interface to generic destination cache.
*/
@@ -755,7 +756,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
struct fib_nh *nh = &FIB_RES_NH(res);
update_or_create_fnhe(nh, fl4->daddr, new_gw,
- 0, 0);
+ 0, jiffies + ip_rt_gc_timeout);
}
if (kill_route)
rt->dst.obsolete = DST_OBSOLETE_KILL;
@@ -1556,6 +1557,36 @@ static void ip_handle_martian_source(struct net_device *dev,
#endif
}
+static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
+{
+ struct fnhe_hash_bucket *hash;
+ struct fib_nh_exception *fnhe, __rcu **fnhe_p;
+ u32 hval = fnhe_hashfun(daddr);
+
+ spin_lock_bh(&fnhe_lock);
+
+ hash = rcu_dereference_protected(nh->nh_exceptions,
+ lockdep_is_held(&fnhe_lock));
+ hash += hval;
+
+ fnhe_p = &hash->chain;
+ fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
+ while (fnhe) {
+ if (fnhe->fnhe_daddr == daddr) {
+ rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
+ fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
+ fnhe_flush_routes(fnhe);
+ kfree_rcu(fnhe, rcu);
+ break;
+ }
+ fnhe_p = &fnhe->fnhe_next;
+ fnhe = rcu_dereference_protected(fnhe->fnhe_next,
+ lockdep_is_held(&fnhe_lock));
+ }
+
+ spin_unlock_bh(&fnhe_lock);
+}
+
/* called in rcu_read_lock() section */
static int __mkroute_input(struct sk_buff *skb,
const struct fib_result *res,
@@ -1609,11 +1640,20 @@ static int __mkroute_input(struct sk_buff *skb,
fnhe = find_exception(&FIB_RES_NH(*res), daddr);
if (do_cache) {
- if (fnhe)
+ if (fnhe) {
rth = rcu_dereference(fnhe->fnhe_rth_input);
- else
- rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
+ if (rth && rth->dst.expires &&
+ time_after(jiffies, rth->dst.expires)) {
+ ip_del_fnhe(&FIB_RES_NH(*res), daddr);
+ fnhe = NULL;
+ } else {
+ goto rt_cache;
+ }
+ }
+
+ rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
+rt_cache:
if (rt_cache_valid(rth)) {
skb_dst_set_noref(skb, &rth->dst);
goto out;
@@ -2014,19 +2054,29 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
struct fib_nh *nh = &FIB_RES_NH(*res);
fnhe = find_exception(nh, fl4->daddr);
- if (fnhe)
+ if (fnhe) {
prth = &fnhe->fnhe_rth_output;
- else {
- if (unlikely(fl4->flowi4_flags &
- FLOWI_FLAG_KNOWN_NH &&
- !(nh->nh_gw &&
- nh->nh_scope == RT_SCOPE_LINK))) {
- do_cache = false;
- goto add;
+ rth = rcu_dereference(*prth);
+ if (rth && rth->dst.expires &&
+ time_after(jiffies, rth->dst.expires)) {
+ ip_del_fnhe(nh, fl4->daddr);
+ fnhe = NULL;
+ } else {
+ goto rt_cache;
}
- prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
}
+
+ if (unlikely(fl4->flowi4_flags &
+ FLOWI_FLAG_KNOWN_NH &&
+ !(nh->nh_gw &&
+ nh->nh_scope == RT_SCOPE_LINK))) {
+ do_cache = false;
+ goto add;
+ }
+ prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
rth = rcu_dereference(*prth);
+
+rt_cache:
if (rt_cache_valid(rth)) {
dst_hold(&rth->dst);
return rth;
@@ -2569,7 +2619,6 @@ void ip_rt_multicast_event(struct in_device *in_dev)
}
#ifdef CONFIG_SYSCTL
-static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
static int ip_rt_gc_interval __read_mostly = 60 * HZ;
static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
static int ip_rt_gc_elasticity __read_mostly = 8;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 19746b3fcbbe..483ffdf5aa4d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -940,7 +940,7 @@ new_segment:
i = skb_shinfo(skb)->nr_frags;
can_coalesce = skb_can_coalesce(skb, i, page, offset);
- if (!can_coalesce && i >= MAX_SKB_FRAGS) {
+ if (!can_coalesce && i >= sysctl_max_skb_frags) {
tcp_mark_push(tp, skb);
goto new_segment;
}
@@ -1213,7 +1213,7 @@ new_segment:
if (!skb_can_coalesce(skb, i, pfrag->page,
pfrag->offset)) {
- if (i == MAX_SKB_FRAGS || !sg) {
+ if (i == sysctl_max_skb_frags || !sg) {
tcp_mark_push(tp, skb);
goto new_segment;
}
@@ -2950,7 +2950,7 @@ static void __tcp_alloc_md5sig_pool(void)
struct crypto_hash *hash;
hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR_OR_NULL(hash))
+ if (IS_ERR(hash))
return;
per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm = hash;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 1c2a73406261..3b2c8e90a475 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2896,7 +2896,10 @@ static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us)
{
const u32 now = tcp_time_stamp, wlen = sysctl_tcp_min_rtt_wlen * HZ;
struct rtt_meas *m = tcp_sk(sk)->rtt_min;
- struct rtt_meas rttm = { .rtt = (rtt_us ? : 1), .ts = now };
+ struct rtt_meas rttm = {
+ .rtt = likely(rtt_us) ? rtt_us : jiffies_to_usecs(1),
+ .ts = now,
+ };
u32 elapsed;
/* Check if the new measurement updates the 1st, 2nd, or 3rd choices */
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a4d523709ab3..487ac67059e2 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -311,7 +311,7 @@ static void do_redirect(struct sk_buff *skb, struct sock *sk)
/* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */
-void tcp_req_err(struct sock *sk, u32 seq)
+void tcp_req_err(struct sock *sk, u32 seq, bool abort)
{
struct request_sock *req = inet_reqsk(sk);
struct net *net = sock_net(sk);
@@ -323,7 +323,7 @@ void tcp_req_err(struct sock *sk, u32 seq)
if (seq != tcp_rsk(req)->snt_isn) {
NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
- } else {
+ } else if (abort) {
/*
* Still in SYN_RECV, just remove it silently.
* There is no good way to pass the error to the newly
@@ -383,7 +383,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
}
seq = ntohl(th->seq);
if (sk->sk_state == TCP_NEW_SYN_RECV)
- return tcp_req_err(sk, seq);
+ return tcp_req_err(sk, seq,
+ type == ICMP_PARAMETERPROB ||
+ type == ICMP_TIME_EXCEEDED ||
+ (type == ICMP_DEST_UNREACH &&
+ (code == ICMP_NET_UNREACH ||
+ code == ICMP_HOST_UNREACH)));
bh_lock_sock(sk);
/* If too many ICMPs get dropped on busy
@@ -1592,28 +1597,30 @@ process:
if (sk->sk_state == TCP_NEW_SYN_RECV) {
struct request_sock *req = inet_reqsk(sk);
- struct sock *nsk = NULL;
+ struct sock *nsk;
sk = req->rsk_listener;
- if (tcp_v4_inbound_md5_hash(sk, skb))
- goto discard_and_relse;
- if (likely(sk->sk_state == TCP_LISTEN)) {
- nsk = tcp_check_req(sk, skb, req, false);
- } else {
+ if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
+ reqsk_put(req);
+ goto discard_it;
+ }
+ if (unlikely(sk->sk_state != TCP_LISTEN)) {
inet_csk_reqsk_queue_drop_and_put(sk, req);
goto lookup;
}
+ sock_hold(sk);
+ nsk = tcp_check_req(sk, skb, req, false);
if (!nsk) {
reqsk_put(req);
- goto discard_it;
+ goto discard_and_relse;
}
if (nsk == sk) {
- sock_hold(sk);
reqsk_put(req);
} else if (tcp_child_process(sk, nsk, skb)) {
tcp_v4_send_reset(nsk, skb);
- goto discard_it;
+ goto discard_and_relse;
} else {
+ sock_put(sk);
return 0;
}
}
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index c8cbc2b4b792..a726d7853ce5 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -550,7 +550,7 @@ reset:
*/
if (crtt > tp->srtt_us) {
/* Set RTO like tcp_rtt_estimator(), but from cached RTT. */
- crtt /= 8 * USEC_PER_MSEC;
+ crtt /= 8 * USEC_PER_SEC / HZ;
inet_csk(sk)->icsk_rto = crtt + max(2 * crtt, tcp_rto_min(sk));
} else if (tp->srtt_us == 0) {
/* RFC6298: 5.7 We've failed to get a valid RTT sample from
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 75632a925824..9b02af2139d3 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -455,7 +455,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->rcv_wup = newtp->copied_seq =
newtp->rcv_nxt = treq->rcv_isn + 1;
- newtp->segs_in = 0;
+ newtp->segs_in = 1;
newtp->snd_sml = newtp->snd_una =
newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
@@ -815,6 +815,7 @@ int tcp_child_process(struct sock *parent, struct sock *child,
int ret = 0;
int state = child->sk_state;
+ tcp_sk(child)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
if (!sock_owned_by_user(child)) {
ret = tcp_rcv_state_process(child, skb);
/* Wakeup parent, send SIGIO */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index be0b21852b13..95d2f198017e 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1048,8 +1048,10 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (msg->msg_controllen) {
err = ip_cmsg_send(sock_net(sk), msg, &ipc,
sk->sk_family == AF_INET6);
- if (err)
+ if (unlikely(err)) {
+ kfree(ipc.opt);
return err;
+ }
if (ipc.opt)
free = 1;
connected = 0;
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 0ec08814f37d..96599d1a1318 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -89,6 +89,8 @@ void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb
uh->source = src_port;
uh->len = htons(skb->len);
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
udp_set_csum(nocheck, skb, src, dst, skb->len);
iptunnel_xmit(sk, rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df, xnet);