diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/Kconfig | 1 | ||||
-rw-r--r-- | net/ipv4/arp.c | 12 | ||||
-rw-r--r-- | net/ipv4/cipso_ipv4.c | 4 | ||||
-rw-r--r-- | net/ipv4/fib_semantics.c | 33 | ||||
-rw-r--r-- | net/ipv4/fib_trie.c | 42 | ||||
-rw-r--r-- | net/ipv4/igmp.c | 1 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 11 | ||||
-rw-r--r-- | net/ipv4/ip_sockglue.c | 9 | ||||
-rw-r--r-- | net/ipv4/ping.c | 5 | ||||
-rw-r--r-- | net/ipv4/raw.c | 6 | ||||
-rw-r--r-- | net/ipv4/route.c | 19 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 6 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 12 | ||||
-rw-r--r-- | net/ipv4/tcp_metrics.c | 7 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_probe.c | 4 | ||||
-rw-r--r-- | net/ipv4/udp.c | 5 |
17 files changed, 131 insertions, 48 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index e0878c3f66a8..91a2557942fa 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -187,6 +187,7 @@ config NET_IPGRE_DEMUX config NET_IP_TUNNEL tristate select DST_CACHE + select GRO_CELLS default n config NET_IPGRE diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 89a8cac4726a..51b27ae09fbd 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1263,7 +1263,7 @@ void __init arp_init(void) /* * ax25 -> ASCII conversion */ -static char *ax2asc2(ax25_address *a, char *buf) +static void ax2asc2(ax25_address *a, char *buf) { char c, *s; int n; @@ -1285,10 +1285,10 @@ static char *ax2asc2(ax25_address *a, char *buf) *s++ = n + '0'; *s++ = '\0'; - if (*buf == '\0' || *buf == '-') - return "*"; - - return buf; + if (*buf == '\0' || *buf == '-') { + buf[0] = '*'; + buf[1] = '\0'; + } } #endif /* CONFIG_AX25 */ @@ -1322,7 +1322,7 @@ static void arp_format_neigh_entry(struct seq_file *seq, } #endif sprintf(tbuf, "%pI4", n->primary_key); - seq_printf(seq, "%-16s 0x%-10x0x%-10x%s * %s\n", + seq_printf(seq, "%-16s 0x%-10x0x%-10x%-17s * %s\n", tbuf, hatype, arp_state_to_flags(n), hbuffer, dev->name); read_unlock(&n->lock); } diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 72d6f056d863..ae206163c273 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1587,6 +1587,10 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option) goto validate_return_locked; } + if (opt_iter + 1 == opt_len) { + err_offset = opt_iter; + goto validate_return_locked; + } tag_len = tag[1]; if (tag_len > (opt_len - opt_iter)) { err_offset = opt_iter + 1; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 6306a67880e8..317026a39cfa 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1355,6 +1355,36 @@ int fib_sync_down_addr(struct net_device *dev, __be32 local) return ret; } +static int call_fib_nh_notifiers(struct fib_nh *fib_nh, + enum fib_event_type event_type) +{ + struct in_device *in_dev = __in_dev_get_rtnl(fib_nh->nh_dev); + struct fib_nh_notifier_info info = { + .fib_nh = fib_nh, + }; + + switch (event_type) { + case FIB_EVENT_NH_ADD: + if (fib_nh->nh_flags & RTNH_F_DEAD) + break; + if (IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && + fib_nh->nh_flags & RTNH_F_LINKDOWN) + break; + return call_fib_notifiers(dev_net(fib_nh->nh_dev), event_type, + &info.info); + case FIB_EVENT_NH_DEL: + if ((IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && + fib_nh->nh_flags & RTNH_F_LINKDOWN) || + (fib_nh->nh_flags & RTNH_F_DEAD)) + return call_fib_notifiers(dev_net(fib_nh->nh_dev), + event_type, &info.info); + default: + break; + } + + return NOTIFY_DONE; +} + /* Event force Flags Description * NETDEV_CHANGE 0 LINKDOWN Carrier OFF, not for scope host * NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host @@ -1396,6 +1426,8 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) nexthop_nh->nh_flags |= RTNH_F_LINKDOWN; break; } + call_fib_nh_notifiers(nexthop_nh, + FIB_EVENT_NH_DEL); dead++; } #ifdef CONFIG_IP_ROUTE_MULTIPATH @@ -1550,6 +1582,7 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags) continue; alive++; nexthop_nh->nh_flags &= ~nh_flags; + call_fib_nh_notifiers(nexthop_nh, FIB_EVENT_NH_ADD); } endfor_nexthops(fi) if (alive > 0) { diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 2919d1a10cfd..d8cea210af0e 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -124,7 +124,7 @@ static void fib_notify(struct net *net, struct notifier_block *nb, static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net, enum fib_event_type event_type, u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 tb_id, u32 nlflags) + u8 tos, u8 type, u32 tb_id) { struct fib_entry_notifier_info info = { .dst = dst, @@ -133,7 +133,6 @@ static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net, .tos = tos, .type = type, .tb_id = tb_id, - .nlflags = nlflags, }; return call_fib_notifier(nb, net, event_type, &info.info); } @@ -197,7 +196,7 @@ int call_fib_notifiers(struct net *net, enum fib_event_type event_type, static int call_fib_entry_notifiers(struct net *net, enum fib_event_type event_type, u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 tb_id, u32 nlflags) + u8 tos, u8 type, u32 tb_id) { struct fib_entry_notifier_info info = { .dst = dst, @@ -206,7 +205,6 @@ static int call_fib_entry_notifiers(struct net *net, .tos = tos, .type = type, .tb_id = tb_id, - .nlflags = nlflags, }; return call_fib_notifiers(net, event_type, &info.info); } @@ -1198,6 +1196,7 @@ static int fib_insert_alias(struct trie *t, struct key_vector *tp, int fib_table_insert(struct net *net, struct fib_table *tb, struct fib_config *cfg) { + enum fib_event_type event = FIB_EVENT_ENTRY_ADD; struct trie *t = (struct trie *)tb->tb_data; struct fib_alias *fa, *new_fa; struct key_vector *l, *tp; @@ -1295,6 +1294,13 @@ int fib_table_insert(struct net *net, struct fib_table *tb, new_fa->tb_id = tb->tb_id; new_fa->fa_default = -1; + call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, + key, plen, fi, + new_fa->fa_tos, cfg->fc_type, + tb->tb_id); + rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, + tb->tb_id, &cfg->fc_nlinfo, nlflags); + hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list); alias_free_mem_rcu(fa); @@ -1303,13 +1309,6 @@ int fib_table_insert(struct net *net, struct fib_table *tb, if (state & FA_S_ACCESSED) rt_cache_flush(cfg->fc_nlinfo.nl_net); - call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, - key, plen, fi, - new_fa->fa_tos, cfg->fc_type, - tb->tb_id, cfg->fc_nlflags); - rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, - tb->tb_id, &cfg->fc_nlinfo, nlflags); - goto succeeded; } /* Error if we find a perfect match which @@ -1319,10 +1318,12 @@ int fib_table_insert(struct net *net, struct fib_table *tb, if (fa_match) goto out; - if (cfg->fc_nlflags & NLM_F_APPEND) + if (cfg->fc_nlflags & NLM_F_APPEND) { + event = FIB_EVENT_ENTRY_APPEND; nlflags |= NLM_F_APPEND; - else + } else { fa = fa_first; + } } err = -ENOENT; if (!(cfg->fc_nlflags & NLM_F_CREATE)) @@ -1351,8 +1352,8 @@ int fib_table_insert(struct net *net, struct fib_table *tb, tb->tb_num_default++; rt_cache_flush(cfg->fc_nlinfo.nl_net); - call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, key, plen, fi, tos, - cfg->fc_type, tb->tb_id, cfg->fc_nlflags); + call_fib_entry_notifiers(net, event, key, plen, fi, tos, cfg->fc_type, + tb->tb_id); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id, &cfg->fc_nlinfo, nlflags); succeeded: @@ -1653,8 +1654,8 @@ int fib_table_delete(struct net *net, struct fib_table *tb, return -ESRCH; call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key, plen, - fa_to_delete->fa_info, tos, cfg->fc_type, - tb->tb_id, 0); + fa_to_delete->fa_info, tos, + fa_to_delete->fa_type, tb->tb_id); rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id, &cfg->fc_nlinfo, 0); @@ -1963,7 +1964,8 @@ int fib_table_flush(struct net *net, struct fib_table *tb) hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { struct fib_info *fi = fa->fa_info; - if (!fi || !(fi->fib_flags & RTNH_F_DEAD)) { + if (!fi || !(fi->fib_flags & RTNH_F_DEAD) || + tb->tb_id != fa->tb_id) { slen = fa->fa_slen; continue; } @@ -1972,7 +1974,7 @@ int fib_table_flush(struct net *net, struct fib_table *tb) n->key, KEYLENGTH - fa->fa_slen, fi, fa->fa_tos, fa->fa_type, - tb->tb_id, 0); + tb->tb_id); hlist_del_rcu(&fa->fa_list); fib_release_info(fa->fa_info); alias_free_mem_rcu(fa); @@ -2012,7 +2014,7 @@ static void fib_leaf_notify(struct net *net, struct key_vector *l, call_fib_entry_notifier(nb, net, event_type, l->key, KEYLENGTH - fa->fa_slen, fi, fa->fa_tos, - fa->fa_type, fa->tb_id, 0); + fa->fa_type, fa->tb_id); } } diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 5b15459955f8..44fd86de2823 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1172,6 +1172,7 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im) psf->sf_crcount = im->crcount; } in_dev_put(pmc->interface); + kfree(pmc); } spin_unlock_bh(&im->lock); } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index b67719f45953..737ce826d7ec 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -222,7 +222,10 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s if (unlikely(!neigh)) neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); if (!IS_ERR(neigh)) { - int res = dst_neigh_output(dst, neigh, skb); + int res; + + sock_confirm_neigh(skb, neigh); + res = neigh_output(neigh, skb); rcu_read_unlock_bh(); return res; @@ -886,6 +889,9 @@ static inline int ip_ufo_append_data(struct sock *sk, skb->csum = 0; + if (flags & MSG_CONFIRM) + skb_set_dst_pending_confirm(skb, 1); + __skb_queue_tail(queue, skb); } else if (skb_is_gso(skb)) { goto append; @@ -1086,6 +1092,9 @@ alloc_new_skb: exthdrlen = 0; csummode = CHECKSUM_NONE; + if ((flags & MSG_CONFIRM) && !skb_prev) + skb_set_dst_pending_confirm(skb, 1); + /* * Put the packet on the pending queue. */ diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 8a4409dd390a..ce1386a67e24 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1243,7 +1243,14 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) pktinfo->ipi_ifindex = 0; pktinfo->ipi_spec_dst.s_addr = 0; } - skb_dst_drop(skb); + /* We need to keep the dst for __ip_options_echo() + * We could restrict the test to opt.ts_needtime || opt.srr, + * but the following is good enough as IP options are not often used. + */ + if (unlikely(IPCB(skb)->opt.optlen)) + skb_dst_force(skb); + else + skb_dst_drop(skb); } int ip_setsockopt(struct sock *sk, int level, diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 592db6a3a0e9..2af6244b83e2 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -642,6 +642,8 @@ static int ping_v4_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh, { struct sk_buff *skb = skb_peek(&sk->sk_write_queue); + if (!skb) + return 0; pfh->wcheck = csum_partial((char *)&pfh->icmph, sizeof(struct icmphdr), pfh->wcheck); pfh->icmph.checksum = csum_fold(pfh->wcheck); @@ -848,7 +850,8 @@ out: return err; do_confirm: - dst_confirm(&rt->dst); + if (msg->msg_flags & MSG_PROBE) + dst_confirm_neigh(&rt->dst, &fl4.daddr); if (!(msg->msg_flags & MSG_PROBE) || len) goto back_from_confirm; err = 0; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 4e49e5cb001c..8119e1f66e03 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -383,6 +383,9 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, sock_tx_timestamp(sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags); + if (flags & MSG_CONFIRM) + skb_set_dst_pending_confirm(skb, 1); + skb->transport_header = skb->network_header; err = -EFAULT; if (memcpy_from_msg(iph, msg, length)) @@ -666,7 +669,8 @@ out: return len; do_confirm: - dst_confirm(&rt->dst); + if (msg->msg_flags & MSG_PROBE) + dst_confirm_neigh(&rt->dst, &fl4.daddr); if (!(msg->msg_flags & MSG_PROBE) || len) goto back_from_confirm; err = 0; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4b7c231c1aef..cb494a5050f7 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -154,6 +154,7 @@ static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, struct sk_buff *skb, const void *daddr); +static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr); static struct dst_ops ipv4_dst_ops = { .family = AF_INET, @@ -168,6 +169,7 @@ static struct dst_ops ipv4_dst_ops = { .redirect = ip_do_redirect, .local_out = __ip_local_out, .neigh_lookup = ipv4_neigh_lookup, + .confirm_neigh = ipv4_confirm_neigh, }; #define ECN_OR_COST(class) TC_PRIO_##class @@ -461,6 +463,23 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, return neigh_create(&arp_tbl, pkey, dev); } +static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr) +{ + struct net_device *dev = dst->dev; + const __be32 *pkey = daddr; + const struct rtable *rt; + + rt = (const struct rtable *)dst; + if (rt->rt_gateway) + pkey = (const __be32 *)&rt->rt_gateway; + else if (!daddr || + (rt->rt_flags & + (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL))) + return; + + __ipv4_confirm_neigh(dev, *(__force u32 *)pkey); +} + #define IP_IDENTS_SZ 2048u static atomic_t *ip_idents __read_mostly; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b751abc56935..d44a6989e76d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -773,6 +773,12 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, ret = -EAGAIN; break; } + /* if __tcp_splice_read() got nothing while we have + * an skb in receive queue, we do not want to loop. + * This might happen with URG data. + */ + if (!skb_queue_empty(&sk->sk_receive_queue)) + break; sk_wait_data(sk, &timeo, NULL); if (signal_pending(current)) { ret = sock_intr_errno(timeo); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 27c95acbb52f..2c0ff327b6df 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3644,11 +3644,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (tp->tlp_high_seq) tcp_process_tlp_ack(sk, ack, flag); - if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) { - struct dst_entry *dst = __sk_dst_get(sk); - if (dst) - dst_confirm(dst); - } + if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) + sk_dst_confirm(sk); if (icsk->icsk_pending == ICSK_TIME_RETRANS) tcp_schedule_loss_probe(sk); @@ -5995,7 +5992,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) break; case TCP_FIN_WAIT1: { - struct dst_entry *dst; int tmo; /* If we enter the TCP_FIN_WAIT1 state and we are a @@ -6022,9 +6018,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) tcp_set_state(sk, TCP_FIN_WAIT2); sk->sk_shutdown |= SEND_SHUTDOWN; - dst = __sk_dst_get(sk); - if (dst) - dst_confirm(dst); + sk_dst_confirm(sk); if (!sock_flag(sk, SOCK_DEAD)) { /* Wake up lingering close() */ diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index b9ed0d50aead..0f46e5fe31ad 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -375,12 +375,10 @@ void tcp_update_metrics(struct sock *sk) u32 val; int m; + sk_dst_confirm(sk); if (sysctl_tcp_nometrics_save || !dst) return; - if (dst->flags & DST_HOST) - dst_confirm(dst); - rcu_read_lock(); if (icsk->icsk_backoff || !tp->srtt_us) { /* This session failed to estimate rtt. Why? @@ -493,11 +491,10 @@ void tcp_init_metrics(struct sock *sk) struct tcp_metrics_block *tm; u32 val, crtt = 0; /* cached RTT scaled by 8 */ + sk_dst_confirm(sk); if (!dst) goto reset; - dst_confirm(dst); - rcu_read_lock(); tm = tcp_get_metrics(sk, dst, true); if (!tm) { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ccf1ef4dcba4..61f272a99a49 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -980,6 +980,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, skb_set_hash_from_sk(skb, sk); atomic_add(skb->truesize, &sk->sk_wmem_alloc); + skb_set_dst_pending_confirm(skb, sk->sk_dst_pending_confirm); + /* Build TCP header and checksum it. */ th = (struct tcphdr *)skb->data; th->source = inet->inet_sport; diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index f6c50af24a64..3d063eb37848 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -117,7 +117,7 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, (fwmark > 0 && skb->mark == fwmark)) && (full || tp->snd_cwnd != tcp_probe.lastcwnd)) { - spin_lock(&tcp_probe.lock); + spin_lock_bh(&tcp_probe.lock); /* If log fills, just silently drop */ if (tcp_probe_avail() > 1) { struct tcp_log *p = tcp_probe.log + tcp_probe.head; @@ -157,7 +157,7 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, tcp_probe.head = (tcp_probe.head + 1) & (bufsize - 1); } tcp_probe.lastcwnd = tp->snd_cwnd; - spin_unlock(&tcp_probe.lock); + spin_unlock_bh(&tcp_probe.lock); wake_up(&tcp_probe.wait); } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index cf6ba3387401..ea6e4cff9faf 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1101,7 +1101,8 @@ out: return err; do_confirm: - dst_confirm(&rt->dst); + if (msg->msg_flags & MSG_PROBE) + dst_confirm_neigh(&rt->dst, &fl4->daddr); if (!(msg->msg_flags&MSG_PROBE) || len) goto back_from_confirm; err = 0; @@ -1489,7 +1490,7 @@ try_again: return err; csum_copy_err: - if (!__sk_queue_drop_skb(sk, skb, flags)) { + if (!__sk_queue_drop_skb(sk, skb, flags, udp_skb_destructor)) { UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); } |