diff options
Diffstat (limited to 'net')
68 files changed, 824 insertions, 469 deletions
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index ea71513fca21..90f49a194249 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -119,6 +119,15 @@ static int br_dev_init(struct net_device *dev) return err; } +static void br_dev_uninit(struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + + br_multicast_uninit_stats(br); + br_vlan_flush(br); + free_percpu(br->stats); +} + static int br_dev_open(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); @@ -332,6 +341,7 @@ static const struct net_device_ops br_netdev_ops = { .ndo_open = br_dev_open, .ndo_stop = br_dev_stop, .ndo_init = br_dev_init, + .ndo_uninit = br_dev_uninit, .ndo_start_xmit = br_dev_xmit, .ndo_get_stats64 = br_get_stats64, .ndo_set_mac_address = br_set_mac_address, @@ -356,14 +366,6 @@ static const struct net_device_ops br_netdev_ops = { .ndo_features_check = passthru_features_check, }; -static void br_dev_free(struct net_device *dev) -{ - struct net_bridge *br = netdev_priv(dev); - - free_percpu(br->stats); - free_netdev(dev); -} - static struct device_type br_type = { .name = "bridge", }; @@ -376,7 +378,7 @@ void br_dev_setup(struct net_device *dev) ether_setup(dev); dev->netdev_ops = &br_netdev_ops; - dev->destructor = br_dev_free; + dev->destructor = free_netdev; dev->ethtool_ops = &br_ethtool_ops; SET_NETDEV_DEVTYPE(dev, &br_type); dev->priv_flags = IFF_EBRIDGE | IFF_NO_QUEUE; diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 8ac1770aa222..56a2a72e7738 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -311,7 +311,6 @@ void br_dev_delete(struct net_device *dev, struct list_head *head) br_fdb_delete_by_port(br, NULL, 0, 1); - br_vlan_flush(br); br_multicast_dev_del(br); cancel_delayed_work_sync(&br->gc_work); diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index b760f2620abf..faa7261a992f 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -2031,8 +2031,6 @@ void br_multicast_dev_del(struct net_bridge *br) out: spin_unlock_bh(&br->multicast_lock); - - free_percpu(br->mcast_stats); } int br_multicast_set_router(struct net_bridge *br, unsigned long val) @@ -2531,6 +2529,11 @@ int br_multicast_init_stats(struct net_bridge *br) return 0; } +void br_multicast_uninit_stats(struct net_bridge *br) +{ + free_percpu(br->mcast_stats); +} + static void mcast_stats_add_dir(u64 *dst, u64 *src) { dst[BR_MCAST_DIR_RX] += src[BR_MCAST_DIR_RX]; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index a8f6acd23e30..225ef7d53701 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1165,11 +1165,14 @@ static int br_dev_newlink(struct net *src_net, struct net_device *dev, spin_unlock_bh(&br->lock); } - err = br_changelink(dev, tb, data); + err = register_netdevice(dev); if (err) return err; - return register_netdevice(dev); + err = br_changelink(dev, tb, data); + if (err) + unregister_netdevice(dev); + return err; } static size_t br_get_size(const struct net_device *brdev) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 61368186edea..0d177280aa84 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -620,6 +620,7 @@ void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port, void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p, const struct sk_buff *skb, u8 type, u8 dir); int br_multicast_init_stats(struct net_bridge *br); +void br_multicast_uninit_stats(struct net_bridge *br); void br_multicast_get_stats(const struct net_bridge *br, const struct net_bridge_port *p, struct br_mcast_stats *dest); @@ -760,6 +761,10 @@ static inline int br_multicast_init_stats(struct net_bridge *br) return 0; } +static inline void br_multicast_uninit_stats(struct net_bridge *br) +{ +} + static inline int br_multicast_igmp_type(const struct sk_buff *skb) { return 0; diff --git a/net/core/datagram.c b/net/core/datagram.c index ea633342ab0d..f4947e737f34 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -398,7 +398,7 @@ int skb_copy_datagram_iter(const struct sk_buff *skb, int offset, struct iov_iter *to, int len) { int start = skb_headlen(skb); - int i, copy = start - offset; + int i, copy = start - offset, start_off = offset, n; struct sk_buff *frag_iter; trace_skb_copy_datagram_iovec(skb, len); @@ -407,11 +407,12 @@ int skb_copy_datagram_iter(const struct sk_buff *skb, int offset, if (copy > 0) { if (copy > len) copy = len; - if (copy_to_iter(skb->data + offset, copy, to) != copy) + n = copy_to_iter(skb->data + offset, copy, to); + offset += n; + if (n != copy) goto short_copy; if ((len -= copy) == 0) return 0; - offset += copy; } /* Copy paged appendix. Hmm... why does this look so complicated? */ @@ -425,13 +426,14 @@ int skb_copy_datagram_iter(const struct sk_buff *skb, int offset, if ((copy = end - offset) > 0) { if (copy > len) copy = len; - if (copy_page_to_iter(skb_frag_page(frag), + n = copy_page_to_iter(skb_frag_page(frag), frag->page_offset + offset - - start, copy, to) != copy) + start, copy, to); + offset += n; + if (n != copy) goto short_copy; if (!(len -= copy)) return 0; - offset += copy; } start = end; } @@ -463,6 +465,7 @@ int skb_copy_datagram_iter(const struct sk_buff *skb, int offset, */ fault: + iov_iter_revert(to, offset - start_off); return -EFAULT; short_copy: @@ -613,7 +616,7 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, __wsum *csump) { int start = skb_headlen(skb); - int i, copy = start - offset; + int i, copy = start - offset, start_off = offset; struct sk_buff *frag_iter; int pos = 0; int n; @@ -623,11 +626,11 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, if (copy > len) copy = len; n = csum_and_copy_to_iter(skb->data + offset, copy, csump, to); + offset += n; if (n != copy) goto fault; if ((len -= copy) == 0) return 0; - offset += copy; pos = copy; } @@ -649,12 +652,12 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, offset - start, copy, &csum2, to); kunmap(page); + offset += n; if (n != copy) goto fault; *csump = csum_block_add(*csump, csum2, pos); if (!(len -= copy)) return 0; - offset += copy; pos += copy; } start = end; @@ -687,6 +690,7 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, return 0; fault: + iov_iter_revert(to, offset - start_off); return -EFAULT; } @@ -771,6 +775,7 @@ int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, } return 0; csum_error: + iov_iter_revert(&msg->msg_iter, chunk); return -EINVAL; fault: return -EFAULT; diff --git a/net/core/dev.c b/net/core/dev.c index 7869ae3837ca..533a6d6f6092 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6757,7 +6757,6 @@ int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags) return err; } -EXPORT_SYMBOL(dev_change_xdp_fd); /** * dev_new_index - allocate an ifindex diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index c35aae13c8d2..d98d4998213d 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -390,7 +390,7 @@ mpls: unsigned char ar_tip[4]; } *arp_eth, _arp_eth; const struct arphdr *arp; - struct arphdr *_arp; + struct arphdr _arp; arp = __skb_header_pointer(skb, nhoff, sizeof(_arp), data, hlen, &_arp); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index e7c12caa20c8..4526cbd7e28a 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -860,7 +860,8 @@ static void neigh_probe(struct neighbour *neigh) if (skb) skb = skb_clone(skb, GFP_ATOMIC); write_unlock(&neigh->lock); - neigh->ops->solicit(neigh, skb); + if (neigh->ops->solicit) + neigh->ops->solicit(neigh, skb); atomic_inc(&neigh->probes); kfree_skb(skb); } diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 758f140b6bed..d28da7d363f1 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -20,9 +20,11 @@ #include <net/tcp.h> static siphash_key_t net_secret __read_mostly; +static siphash_key_t ts_secret __read_mostly; static __always_inline void net_secret_init(void) { + net_get_random_once(&ts_secret, sizeof(ts_secret)); net_get_random_once(&net_secret, sizeof(net_secret)); } #endif @@ -45,6 +47,23 @@ static u32 seq_scale(u32 seq) #endif #if IS_ENABLED(CONFIG_IPV6) +static u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr) +{ + const struct { + struct in6_addr saddr; + struct in6_addr daddr; + } __aligned(SIPHASH_ALIGNMENT) combined = { + .saddr = *(struct in6_addr *)saddr, + .daddr = *(struct in6_addr *)daddr, + }; + + if (sysctl_tcp_timestamps != 1) + return 0; + + return siphash(&combined, offsetofend(typeof(combined), daddr), + &ts_secret); +} + u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, __be16 sport, __be16 dport, u32 *tsoff) { @@ -63,7 +82,7 @@ u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, net_secret_init(); hash = siphash(&combined, offsetofend(typeof(combined), dport), &net_secret); - *tsoff = sysctl_tcp_timestamps == 1 ? (hash >> 32) : 0; + *tsoff = secure_tcpv6_ts_off(saddr, daddr); return seq_scale(hash); } EXPORT_SYMBOL(secure_tcpv6_sequence_number); @@ -88,6 +107,14 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral); #endif #ifdef CONFIG_INET +static u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr) +{ + if (sysctl_tcp_timestamps != 1) + return 0; + + return siphash_2u32((__force u32)saddr, (__force u32)daddr, + &ts_secret); +} /* secure_tcp_sequence_number(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d), * but fortunately, `sport' cannot be 0 in any circumstances. If this changes, @@ -103,7 +130,7 @@ u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, hash = siphash_3u32((__force u32)saddr, (__force u32)daddr, (__force u32)sport << 16 | (__force u32)dport, &net_secret); - *tsoff = sysctl_tcp_timestamps == 1 ? (hash >> 32) : 0; + *tsoff = secure_tcp_ts_off(saddr, daddr); return seq_scale(hash); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 9f781092fda9..35c1e2460206 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3807,6 +3807,7 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb, serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; serr->ee.ee_info = tstype; serr->opt_stats = opt_stats; + serr->header.h4.iif = skb->dev ? skb->dev->ifindex : 0; if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) { serr->ee.ee_data = skb_shinfo(skb)->tskey; if (sk->sk_protocol == IPPROTO_TCP && diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 4ead336e14ea..7f9cc400eca0 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -408,14 +408,16 @@ static struct ctl_table net_core_table[] = { .data = &sysctl_net_busy_poll, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, }, { .procname = "busy_read", .data = &sysctl_net_busy_read, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, }, #endif #ifdef CONFIG_NET_SCHED diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index ebd953bc5607..1d46d05efb0f 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -488,16 +488,15 @@ static bool ipv4_datagram_support_cmsg(const struct sock *sk, return false; /* Support IP_PKTINFO on tstamp packets if requested, to correlate - * timestamp with egress dev. Not possible for packets without dev + * timestamp with egress dev. Not possible for packets without iif * or without payload (SOF_TIMESTAMPING_OPT_TSONLY). */ - if ((!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG)) || - (!skb->dev)) + info = PKTINFO_SKB_CB(skb); + if (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG) || + !info->ipi_ifindex) return false; - info = PKTINFO_SKB_CB(skb); info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr; - info->ipi_ifindex = skb->dev->ifindex; return true; } @@ -591,6 +590,7 @@ static bool setsockopt_needs_rtnl(int optname) case MCAST_LEAVE_GROUP: case MCAST_LEAVE_SOURCE_GROUP: case MCAST_UNBLOCK_SOURCE: + case IP_ROUTER_ALERT: return true; } return false; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index fd9f34bbd740..dfb2ab2dd3c8 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -306,7 +306,7 @@ static void __init ic_close_devs(void) while ((d = next)) { next = d->next; dev = d->dev; - if ((!ic_dev || dev != ic_dev->dev) && !netdev_uses_dsa(dev)) { + if (d != ic_dev && !netdev_uses_dsa(dev)) { pr_debug("IP-Config: Downing %s\n", dev->name); dev_change_flags(dev, d->flags); } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index c0317c940bcd..b036e85e093b 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1278,7 +1278,7 @@ static void mrtsock_destruct(struct sock *sk) struct net *net = sock_net(sk); struct mr_table *mrt; - rtnl_lock(); + ASSERT_RTNL(); ipmr_for_each_table(mrt, net) { if (sk == rtnl_dereference(mrt->mroute_sk)) { IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; @@ -1289,7 +1289,6 @@ static void mrtsock_destruct(struct sock *sk) mroute_clean_tables(mrt, false); } } - rtnl_unlock(); } /* Socket options and virtual interface manipulation. The whole @@ -1353,13 +1352,8 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, if (sk != rcu_access_pointer(mrt->mroute_sk)) { ret = -EACCES; } else { - /* We need to unlock here because mrtsock_destruct takes - * care of rtnl itself and we can't change that due to - * the IP_ROUTER_ALERT setsockopt which runs without it. - */ - rtnl_unlock(); ret = ip_ra_control(sk, 0, NULL); - goto out; + goto out_unlock; } break; case MRT_ADD_VIF: @@ -1470,7 +1464,6 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, } out_unlock: rtnl_unlock(); -out: return ret; } diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 52f26459efc3..9b8841316e7b 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -461,7 +461,7 @@ static void clusterip_tg_destroy(const struct xt_tgdtor_param *par) clusterip_config_put(cipinfo->config); - nf_ct_netns_get(par->net, par->family); + nf_ct_netns_put(par->net, par->family); } #ifdef CONFIG_COMPAT diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index c9b52c361da2..53e49f5011d3 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -1260,16 +1260,6 @@ static const struct nf_conntrack_expect_policy snmp_exp_policy = { .timeout = 180, }; -static struct nf_conntrack_helper snmp_helper __read_mostly = { - .me = THIS_MODULE, - .help = help, - .expect_policy = &snmp_exp_policy, - .name = "snmp", - .tuple.src.l3num = AF_INET, - .tuple.src.u.udp.port = cpu_to_be16(SNMP_PORT), - .tuple.dst.protonum = IPPROTO_UDP, -}; - static struct nf_conntrack_helper snmp_trap_helper __read_mostly = { .me = THIS_MODULE, .help = help, @@ -1288,22 +1278,16 @@ static struct nf_conntrack_helper snmp_trap_helper __read_mostly = { static int __init nf_nat_snmp_basic_init(void) { - int ret = 0; - BUG_ON(nf_nat_snmp_hook != NULL); RCU_INIT_POINTER(nf_nat_snmp_hook, help); - ret = nf_conntrack_helper_register(&snmp_trap_helper); - if (ret < 0) { - nf_conntrack_helper_unregister(&snmp_helper); - return ret; - } - return ret; + return nf_conntrack_helper_register(&snmp_trap_helper); } static void __exit nf_nat_snmp_basic_fini(void) { RCU_INIT_POINTER(nf_nat_snmp_hook, NULL); + synchronize_rcu(); nf_conntrack_helper_unregister(&snmp_trap_helper); } diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 2af6244b83e2..ccfbce13a633 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -156,17 +156,18 @@ int ping_hash(struct sock *sk) void ping_unhash(struct sock *sk) { struct inet_sock *isk = inet_sk(sk); + pr_debug("ping_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num); + write_lock_bh(&ping_table.lock); if (sk_hashed(sk)) { - write_lock_bh(&ping_table.lock); hlist_nulls_del(&sk->sk_nulls_node); sk_nulls_node_init(&sk->sk_nulls_node); sock_put(sk); isk->inet_num = 0; isk->inet_sport = 0; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - write_unlock_bh(&ping_table.lock); } + write_unlock_bh(&ping_table.lock); } EXPORT_SYMBOL_GPL(ping_unhash); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 8119e1f66e03..9d943974de2b 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -682,7 +682,9 @@ static void raw_close(struct sock *sk, long timeout) /* * Raw sockets may have direct kernel references. Kill them. */ + rtnl_lock(); ip_ra_control(sk, 0, NULL); + rtnl_unlock(); sk_common_release(sk); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 8471dd116771..acd69cfe2951 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2620,7 +2620,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) skb_reset_network_header(skb); /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */ - ip_hdr(skb)->protocol = IPPROTO_ICMP; + ip_hdr(skb)->protocol = IPPROTO_UDP; skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr)); src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1e319a525d51..40ba4249a586 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2322,6 +2322,7 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_init_send_head(sk); memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); __sk_dst_reset(sk); + tcp_saved_syn_free(tp); /* Clean up fastopen related fields */ tcp_free_fastopen_req(tp); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c43119726a62..659d1baefb2b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -126,7 +126,8 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; #define REXMIT_LOST 1 /* retransmit packets marked lost */ #define REXMIT_NEW 2 /* FRTO-style transmit of unsent/new packets */ -static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb) +static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb, + unsigned int len) { static bool __once __read_mostly; @@ -137,8 +138,9 @@ static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb) rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif); - pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n", - dev ? dev->name : "Unknown driver"); + if (!dev || len >= dev->mtu) + pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n", + dev ? dev->name : "Unknown driver"); rcu_read_unlock(); } } @@ -161,8 +163,10 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb) if (len >= icsk->icsk_ack.rcv_mss) { icsk->icsk_ack.rcv_mss = min_t(unsigned int, len, tcp_sk(sk)->advmss); - if (unlikely(icsk->icsk_ack.rcv_mss != len)) - tcp_gro_dev_warn(sk, skb); + /* Account for possibly-removed options */ + if (unlikely(len > icsk->icsk_ack.rcv_mss + + MAX_TCP_OPTION_SPACE)) + tcp_gro_dev_warn(sk, skb, len); } else { /* Otherwise, we make more careful check taking into account, * that SACKs block is variable. @@ -874,22 +878,11 @@ static void tcp_update_reordering(struct sock *sk, const int metric, const int ts) { struct tcp_sock *tp = tcp_sk(sk); - if (metric > tp->reordering) { - int mib_idx; + int mib_idx; + if (metric > tp->reordering) { tp->reordering = min(sysctl_tcp_max_reordering, metric); - /* This exciting event is worth to be remembered. 8) */ - if (ts) - mib_idx = LINUX_MIB_TCPTSREORDER; - else if (tcp_is_reno(tp)) - mib_idx = LINUX_MIB_TCPRENOREORDER; - else if (tcp_is_fack(tp)) - mib_idx = LINUX_MIB_TCPFACKREORDER; - else - mib_idx = LINUX_MIB_TCPSACKREORDER; - - NET_INC_STATS(sock_net(sk), mib_idx); #if FASTRETRANS_DEBUG > 1 pr_debug("Disorder%d %d %u f%u s%u rr%d\n", tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state, @@ -902,6 +895,18 @@ static void tcp_update_reordering(struct sock *sk, const int metric, } tp->rack.reord = 1; + + /* This exciting event is worth to be remembered. 8) */ + if (ts) + mib_idx = LINUX_MIB_TCPTSREORDER; + else if (tcp_is_reno(tp)) + mib_idx = LINUX_MIB_TCPRENOREORDER; + else if (tcp_is_fack(tp)) + mib_idx = LINUX_MIB_TCPFACKREORDER; + else + mib_idx = LINUX_MIB_TCPSACKREORDER; + + NET_INC_STATS(sock_net(sk), mib_idx); } /* This must be called before lost_out is incremented */ @@ -1930,6 +1935,7 @@ void tcp_enter_loss(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); struct sk_buff *skb; + bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery; bool is_reneg; /* is receiver reneging on SACKs? */ bool mark_lost; @@ -1989,15 +1995,18 @@ void tcp_enter_loss(struct sock *sk) tp->high_seq = tp->snd_nxt; tcp_ecn_queue_cwr(tp); - /* F-RTO RFC5682 sec 3.1 step 1 mandates to disable F-RTO - * if a previous recovery is underway, otherwise it may incorrectly - * call a timeout spurious if some previously retransmitted packets - * are s/acked (sec 3.2). We do not apply that retriction since - * retransmitted skbs are permanently tagged with TCPCB_EVER_RETRANS - * so FLAG_ORIG_SACK_ACKED is always correct. But we do disable F-RTO - * on PTMU discovery to avoid sending new data. + /* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous + * loss recovery is underway except recurring timeout(s) on + * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing + * + * In theory F-RTO can be used repeatedly during loss recovery. + * In practice this interacts badly with broken middle-boxes that + * falsely raise the receive window, which results in repeated + * timeouts and stop-and-go behavior. */ - tp->frto = sysctl_tcp_frto && !inet_csk(sk)->icsk_mtup.probe_size; + tp->frto = sysctl_tcp_frto && + (new_recovery || icsk->icsk_retransmits) && + !inet_csk(sk)->icsk_mtup.probe_size; } /* If ACK arrived pointing to a remembered SACK, it means that our diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 22548b5f05cb..c3c082ed3879 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2999,6 +2999,8 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) { struct sk_buff *skb; + TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS); + /* NOTE: No TCP options attached and we never retransmit this. */ skb = alloc_skb(MAX_TCP_HEADER, priority); if (!skb) { @@ -3014,8 +3016,6 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) /* Send it off. */ if (tcp_transmit_skb(sk, skb, 0, priority)) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED); - - TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS); } /* Send a crossed SYN-ACK during socket establishment. diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index 4ecb38ae8504..d8acbd9f477a 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -12,7 +12,8 @@ static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb) /* Account for retransmits that are lost again */ TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; tp->retrans_out -= tcp_skb_pcount(skb); - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT); + NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT, + tcp_skb_pcount(skb)); } } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 363172527e43..80ce478c4851 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3626,14 +3626,19 @@ restart: INIT_LIST_HEAD(&del_list); list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) { struct rt6_info *rt = NULL; + bool keep; addrconf_del_dad_work(ifa); + keep = keep_addr && (ifa->flags & IFA_F_PERMANENT) && + !addr_is_local(&ifa->addr); + if (!keep) + list_move(&ifa->if_list, &del_list); + write_unlock_bh(&idev->lock); spin_lock_bh(&ifa->lock); - if (keep_addr && (ifa->flags & IFA_F_PERMANENT) && - !addr_is_local(&ifa->addr)) { + if (keep) { /* set state to skip the notifier below */ state = INET6_IFADDR_STATE_DEAD; ifa->state = 0; @@ -3645,8 +3650,6 @@ restart: } else { state = ifa->state; ifa->state = INET6_IFADDR_STATE_DEAD; - - list_move(&ifa->if_list, &del_list); } spin_unlock_bh(&ifa->lock); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index eec27f87efac..e011122ebd43 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -405,9 +405,6 @@ static inline bool ipv6_datagram_support_addr(struct sock_exterr_skb *serr) * At one point, excluding local errors was a quick test to identify icmp/icmp6 * errors. This is no longer true, but the test remained, so the v6 stack, * unlike v4, also honors cmsg requests on all wifi and timestamp errors. - * - * Timestamp code paths do not initialize the fields expected by cmsg: - * the PKTINFO fields in skb->cb[]. Fill those in here. */ static bool ip6_datagram_support_cmsg(struct sk_buff *skb, struct sock_exterr_skb *serr) @@ -419,14 +416,9 @@ static bool ip6_datagram_support_cmsg(struct sk_buff *skb, if (serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL) return false; - if (!skb->dev) + if (!IP6CB(skb)->iif) return false; - if (skb->protocol == htons(ETH_P_IPV6)) - IP6CB(skb)->iif = skb->dev->ifindex; - else - PKTINFO_SKB_CB(skb)->ipi_ifindex = skb->dev->ifindex; - return true; } diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index aacfb4bce153..c45b12b4431c 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -122,11 +122,14 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs)); /* * RFC4291 2.5.3 + * The loopback address must not be used as the source address in IPv6 + * packets that are sent outside of a single node. [..] * A packet received on an interface with a destination address * of loopback must be dropped. */ - if (!(dev->flags & IFF_LOOPBACK) && - ipv6_addr_loopback(&hdr->daddr)) + if ((ipv6_addr_loopback(&hdr->saddr) || + ipv6_addr_loopback(&hdr->daddr)) && + !(dev->flags & IFF_LOOPBACK)) goto err; /* RFC4291 Errata ID: 3480 diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 309062f3debe..31762f76cdb5 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1687,7 +1687,7 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) struct kcm_attach info; if (copy_from_user(&info, (void __user *)arg, sizeof(info))) - err = -EFAULT; + return -EFAULT; err = kcm_attach_ioctl(sock, &info); @@ -1697,7 +1697,7 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) struct kcm_unattach info; if (copy_from_user(&info, (void __user *)arg, sizeof(info))) - err = -EFAULT; + return -EFAULT; err = kcm_unattach_ioctl(sock, &info); @@ -1708,7 +1708,7 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) struct socket *newsock = NULL; if (copy_from_user(&info, (void __user *)arg, sizeof(info))) - err = -EFAULT; + return -EFAULT; err = kcm_clone(sock, &info, &newsock); diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 8adab6335ced..e37d9554da7b 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -278,7 +278,57 @@ struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunn } EXPORT_SYMBOL_GPL(l2tp_session_find); -struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth) +/* Like l2tp_session_find() but takes a reference on the returned session. + * Optionally calls session->ref() too if do_ref is true. + */ +struct l2tp_session *l2tp_session_get(struct net *net, + struct l2tp_tunnel *tunnel, + u32 session_id, bool do_ref) +{ + struct hlist_head *session_list; + struct l2tp_session *session; + + if (!tunnel) { + struct l2tp_net *pn = l2tp_pernet(net); + + session_list = l2tp_session_id_hash_2(pn, session_id); + + rcu_read_lock_bh(); + hlist_for_each_entry_rcu(session, session_list, global_hlist) { + if (session->session_id == session_id) { + l2tp_session_inc_refcount(session); + if (do_ref && session->ref) + session->ref(session); + rcu_read_unlock_bh(); + + return session; + } + } + rcu_read_unlock_bh(); + + return NULL; + } + + session_list = l2tp_session_id_hash(tunnel, session_id); + read_lock_bh(&tunnel->hlist_lock); + hlist_for_each_entry(session, session_list, hlist) { + if (session->session_id == session_id) { + l2tp_session_inc_refcount(session); + if (do_ref && session->ref) + session->ref(session); + read_unlock_bh(&tunnel->hlist_lock); + + return session; + } + } + read_unlock_bh(&tunnel->hlist_lock); + + return NULL; +} +EXPORT_SYMBOL_GPL(l2tp_session_get); + +struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth, + bool do_ref) { int hash; struct l2tp_session *session; @@ -288,6 +338,9 @@ struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth) for (hash = 0; hash < L2TP_HASH_SIZE; hash++) { hlist_for_each_entry(session, &tunnel->session_hlist[hash], hlist) { if (++count > nth) { + l2tp_session_inc_refcount(session); + if (do_ref && session->ref) + session->ref(session); read_unlock_bh(&tunnel->hlist_lock); return session; } @@ -298,12 +351,13 @@ struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth) return NULL; } -EXPORT_SYMBOL_GPL(l2tp_session_find_nth); +EXPORT_SYMBOL_GPL(l2tp_session_get_nth); /* Lookup a session by interface name. * This is very inefficient but is only used by management interfaces. */ -struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname) +struct l2tp_session *l2tp_session_get_by_ifname(struct net *net, char *ifname, + bool do_ref) { struct l2tp_net *pn = l2tp_pernet(net); int hash; @@ -313,7 +367,11 @@ struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname) for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) { hlist_for_each_entry_rcu(session, &pn->l2tp_session_hlist[hash], global_hlist) { if (!strcmp(session->ifname, ifname)) { + l2tp_session_inc_refcount(session); + if (do_ref && session->ref) + session->ref(session); rcu_read_unlock_bh(); + return session; } } @@ -323,7 +381,49 @@ struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname) return NULL; } -EXPORT_SYMBOL_GPL(l2tp_session_find_by_ifname); +EXPORT_SYMBOL_GPL(l2tp_session_get_by_ifname); + +static int l2tp_session_add_to_tunnel(struct l2tp_tunnel *tunnel, + struct l2tp_session *session) +{ + struct l2tp_session *session_walk; + struct hlist_head *g_head; + struct hlist_head *head; + struct l2tp_net *pn; + + head = l2tp_session_id_hash(tunnel, session->session_id); + + write_lock_bh(&tunnel->hlist_lock); + hlist_for_each_entry(session_walk, head, hlist) + if (session_walk->session_id == session->session_id) + goto exist; + + if (tunnel->version == L2TP_HDR_VER_3) { + pn = l2tp_pernet(tunnel->l2tp_net); + g_head = l2tp_session_id_hash_2(l2tp_pernet(tunnel->l2tp_net), + session->session_id); + + spin_lock_bh(&pn->l2tp_session_hlist_lock); + hlist_for_each_entry(session_walk, g_head, global_hlist) + if (session_walk->session_id == session->session_id) + goto exist_glob; + + hlist_add_head_rcu(&session->global_hlist, g_head); + spin_unlock_bh(&pn->l2tp_session_hlist_lock); + } + + hlist_add_head(&session->hlist, head); + write_unlock_bh(&tunnel->hlist_lock); + + return 0; + +exist_glob: + spin_unlock_bh(&pn->l2tp_session_hlist_lock); +exist: + write_unlock_bh(&tunnel->hlist_lock); + + return -EEXIST; +} /* Lookup a tunnel by id */ @@ -633,6 +733,9 @@ discard: * a data (not control) frame before coming here. Fields up to the * session-id have already been parsed and ptr points to the data * after the session-id. + * + * session->ref() must have been called prior to l2tp_recv_common(). + * session->deref() will be called automatically after skb is processed. */ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, @@ -642,14 +745,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, int offset; u32 ns, nr; - /* The ref count is increased since we now hold a pointer to - * the session. Take care to decrement the refcnt when exiting - * this function from now on... - */ - l2tp_session_inc_refcount(session); - if (session->ref) - (*session->ref)(session); - /* Parse and check optional cookie */ if (session->peer_cookie_len > 0) { if (memcmp(ptr, &session->peer_cookie[0], session->peer_cookie_len)) { @@ -802,8 +897,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, /* Try to dequeue as many skbs from reorder_q as we can. */ l2tp_recv_dequeue(session); - l2tp_session_dec_refcount(session); - return; discard: @@ -812,8 +905,6 @@ discard: if (session->deref) (*session->deref)(session); - - l2tp_session_dec_refcount(session); } EXPORT_SYMBOL(l2tp_recv_common); @@ -920,8 +1011,14 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, } /* Find the session context */ - session = l2tp_session_find(tunnel->l2tp_net, tunnel, session_id); + session = l2tp_session_get(tunnel->l2tp_net, tunnel, session_id, true); if (!session || !session->recv_skb) { + if (session) { + if (session->deref) + session->deref(session); + l2tp_session_dec_refcount(session); + } + /* Not found? Pass to userspace to deal with */ l2tp_info(tunnel, L2TP_MSG_DATA, "%s: no session found (%u/%u). Passing up.\n", @@ -930,6 +1027,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, } l2tp_recv_common(session, skb, ptr, optr, hdrflags, length, payload_hook); + l2tp_session_dec_refcount(session); return 0; @@ -1738,6 +1836,7 @@ EXPORT_SYMBOL_GPL(l2tp_session_set_header_len); struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg) { struct l2tp_session *session; + int err; session = kzalloc(sizeof(struct l2tp_session) + priv_size, GFP_KERNEL); if (session != NULL) { @@ -1793,6 +1892,13 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn l2tp_session_set_header_len(session, tunnel->version); + err = l2tp_session_add_to_tunnel(tunnel, session); + if (err) { + kfree(session); + + return ERR_PTR(err); + } + /* Bump the reference count. The session context is deleted * only when this drops to zero. */ @@ -1802,28 +1908,14 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn /* Ensure tunnel socket isn't deleted */ sock_hold(tunnel->sock); - /* Add session to the tunnel's hash list */ - write_lock_bh(&tunnel->hlist_lock); - hlist_add_head(&session->hlist, - l2tp_session_id_hash(tunnel, session_id)); - write_unlock_bh(&tunnel->hlist_lock); - - /* And to the global session list if L2TPv3 */ - if (tunnel->version != L2TP_HDR_VER_2) { - struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); - - spin_lock_bh(&pn->l2tp_session_hlist_lock); - hlist_add_head_rcu(&session->global_hlist, - l2tp_session_id_hash_2(pn, session_id)); - spin_unlock_bh(&pn->l2tp_session_hlist_lock); - } - /* Ignore management session in session count value */ if (session->session_id != 0) atomic_inc(&l2tp_session_count); + + return session; } - return session; + return ERR_PTR(-ENOMEM); } EXPORT_SYMBOL_GPL(l2tp_session_create); diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index aebf281d09ee..8ce7818c7a9d 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -230,11 +230,16 @@ out: return tunnel; } +struct l2tp_session *l2tp_session_get(struct net *net, + struct l2tp_tunnel *tunnel, + u32 session_id, bool do_ref); struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id); -struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth); -struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname); +struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth, + bool do_ref); +struct l2tp_session *l2tp_session_get_by_ifname(struct net *net, char *ifname, + bool do_ref); struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id); struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth); diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index 2d6760a2ae34..d100aed3d06f 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -53,7 +53,7 @@ static void l2tp_dfs_next_tunnel(struct l2tp_dfs_seq_data *pd) static void l2tp_dfs_next_session(struct l2tp_dfs_seq_data *pd) { - pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx); + pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true); pd->session_idx++; if (pd->session == NULL) { @@ -238,10 +238,14 @@ static int l2tp_dfs_seq_show(struct seq_file *m, void *v) } /* Show the tunnel or session context */ - if (pd->session == NULL) + if (!pd->session) { l2tp_dfs_seq_tunnel_show(m, pd->tunnel); - else + } else { l2tp_dfs_seq_session_show(m, pd->session); + if (pd->session->deref) + pd->session->deref(pd->session); + l2tp_session_dec_refcount(pd->session); + } out: return 0; diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 8bf18a5f66e0..6fd41d7afe1e 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -221,12 +221,6 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p goto out; } - session = l2tp_session_find(net, tunnel, session_id); - if (session) { - rc = -EEXIST; - goto out; - } - if (cfg->ifname) { dev = dev_get_by_name(net, cfg->ifname); if (dev) { @@ -240,8 +234,8 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p session = l2tp_session_create(sizeof(*spriv), tunnel, session_id, peer_session_id, cfg); - if (!session) { - rc = -ENOMEM; + if (IS_ERR(session)) { + rc = PTR_ERR(session); goto out; } diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index d25038cfd64e..4d322c1b7233 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -143,19 +143,19 @@ static int l2tp_ip_recv(struct sk_buff *skb) } /* Ok, this is a data packet. Lookup the session. */ - session = l2tp_session_find(net, NULL, session_id); - if (session == NULL) + session = l2tp_session_get(net, NULL, session_id, true); + if (!session) goto discard; tunnel = session->tunnel; - if (tunnel == NULL) - goto discard; + if (!tunnel) + goto discard_sess; /* Trace packet contents, if enabled */ if (tunnel->debug & L2TP_MSG_DATA) { length = min(32u, skb->len); if (!pskb_may_pull(skb, length)) - goto discard; + goto discard_sess; /* Point to L2TP header */ optr = ptr = skb->data; @@ -165,6 +165,7 @@ static int l2tp_ip_recv(struct sk_buff *skb) } l2tp_recv_common(session, skb, ptr, optr, 0, skb->len, tunnel->recv_payload_hook); + l2tp_session_dec_refcount(session); return 0; @@ -178,9 +179,10 @@ pass_up: tunnel_id = ntohl(*(__be32 *) &skb->data[4]); tunnel = l2tp_tunnel_find(net, tunnel_id); - if (tunnel != NULL) + if (tunnel) { sk = tunnel->sock; - else { + sock_hold(sk); + } else { struct iphdr *iph = (struct iphdr *) skb_network_header(skb); read_lock_bh(&l2tp_ip_lock); @@ -202,6 +204,12 @@ pass_up: return sk_receive_skb(sk, skb, 1); +discard_sess: + if (session->deref) + session->deref(session); + l2tp_session_dec_refcount(session); + goto discard; + discard_put: sock_put(sk); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index a4abcbc4c09a..88b397c30d86 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -156,19 +156,19 @@ static int l2tp_ip6_recv(struct sk_buff *skb) } /* Ok, this is a data packet. Lookup the session. */ - session = l2tp_session_find(net, NULL, session_id); - if (session == NULL) + session = l2tp_session_get(net, NULL, session_id, true); + if (!session) goto discard; tunnel = session->tunnel; - if (tunnel == NULL) - goto discard; + if (!tunnel) + goto discard_sess; /* Trace packet contents, if enabled */ if (tunnel->debug & L2TP_MSG_DATA) { length = min(32u, skb->len); if (!pskb_may_pull(skb, length)) - goto discard; + goto discard_sess; /* Point to L2TP header */ optr = ptr = skb->data; @@ -179,6 +179,8 @@ static int l2tp_ip6_recv(struct sk_buff *skb) l2tp_recv_common(session, skb, ptr, optr, 0, skb->len, tunnel->recv_payload_hook); + l2tp_session_dec_refcount(session); + return 0; pass_up: @@ -191,9 +193,10 @@ pass_up: tunnel_id = ntohl(*(__be32 *) &skb->data[4]); tunnel = l2tp_tunnel_find(net, tunnel_id); - if (tunnel != NULL) + if (tunnel) { sk = tunnel->sock; - else { + sock_hold(sk); + } else { struct ipv6hdr *iph = ipv6_hdr(skb); read_lock_bh(&l2tp_ip6_lock); @@ -215,6 +218,12 @@ pass_up: return sk_receive_skb(sk, skb, 1); +discard_sess: + if (session->deref) + session->deref(session); + l2tp_session_dec_refcount(session); + goto discard; + discard_put: sock_put(sk); diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 3620fba31786..7e3e669baac4 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -48,7 +48,8 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, /* Accessed under genl lock */ static const struct l2tp_nl_cmd_ops *l2tp_nl_cmd_ops[__L2TP_PWTYPE_MAX]; -static struct l2tp_session *l2tp_nl_session_find(struct genl_info *info) +static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info, + bool do_ref) { u32 tunnel_id; u32 session_id; @@ -59,14 +60,15 @@ static struct l2tp_session *l2tp_nl_session_find(struct genl_info *info) if (info->attrs[L2TP_ATTR_IFNAME]) { ifname = nla_data(info->attrs[L2TP_ATTR_IFNAME]); - session = l2tp_session_find_by_ifname(net, ifname); + session = l2tp_session_get_by_ifname(net, ifname, do_ref); } else if ((info->attrs[L2TP_ATTR_SESSION_ID]) && (info->attrs[L2TP_ATTR_CONN_ID])) { tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]); session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]); tunnel = l2tp_tunnel_find(net, tunnel_id); if (tunnel) - session = l2tp_session_find(net, tunnel, session_id); + session = l2tp_session_get(net, tunnel, session_id, + do_ref); } return session; @@ -642,10 +644,12 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf session_id, peer_session_id, &cfg); if (ret >= 0) { - session = l2tp_session_find(net, tunnel, session_id); - if (session) + session = l2tp_session_get(net, tunnel, session_id, false); + if (session) { ret = l2tp_session_notify(&l2tp_nl_family, info, session, L2TP_CMD_SESSION_CREATE); + l2tp_session_dec_refcount(session); + } } out: @@ -658,7 +662,7 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf struct l2tp_session *session; u16 pw_type; - session = l2tp_nl_session_find(info); + session = l2tp_nl_session_get(info, true); if (session == NULL) { ret = -ENODEV; goto out; @@ -672,6 +676,10 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf if (l2tp_nl_cmd_ops[pw_type] && l2tp_nl_cmd_ops[pw_type]->session_delete) ret = (*l2tp_nl_cmd_ops[pw_type]->session_delete)(session); + if (session->deref) + session->deref(session); + l2tp_session_dec_refcount(session); + out: return ret; } @@ -681,7 +689,7 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf int ret = 0; struct l2tp_session *session; - session = l2tp_nl_session_find(info); + session = l2tp_nl_session_get(info, false); if (session == NULL) { ret = -ENODEV; goto out; @@ -716,6 +724,8 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf ret = l2tp_session_notify(&l2tp_nl_family, info, session, L2TP_CMD_SESSION_MODIFY); + l2tp_session_dec_refcount(session); + out: return ret; } @@ -811,29 +821,34 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info) struct sk_buff *msg; int ret; - session = l2tp_nl_session_find(info); + session = l2tp_nl_session_get(info, false); if (session == NULL) { ret = -ENODEV; - goto out; + goto err; } msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; - goto out; + goto err_ref; } ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq, 0, session, L2TP_CMD_SESSION_GET); if (ret < 0) - goto err_out; + goto err_ref_msg; - return genlmsg_unicast(genl_info_net(info), msg, info->snd_portid); + ret = genlmsg_unicast(genl_info_net(info), msg, info->snd_portid); -err_out: - nlmsg_free(msg); + l2tp_session_dec_refcount(session); -out: + return ret; + +err_ref_msg: + nlmsg_free(msg); +err_ref: + l2tp_session_dec_refcount(session); +err: return ret; } @@ -852,7 +867,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback goto out; } - session = l2tp_session_find_nth(tunnel, si); + session = l2tp_session_get_nth(tunnel, si, false); if (session == NULL) { ti++; tunnel = NULL; @@ -862,8 +877,11 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - session, L2TP_CMD_SESSION_GET) < 0) + session, L2TP_CMD_SESSION_GET) < 0) { + l2tp_session_dec_refcount(session); break; + } + l2tp_session_dec_refcount(session); si++; } diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 36cc56fd0418..32ea0f3d868c 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -450,6 +450,10 @@ static void pppol2tp_session_close(struct l2tp_session *session) static void pppol2tp_session_destruct(struct sock *sk) { struct l2tp_session *session = sk->sk_user_data; + + skb_queue_purge(&sk->sk_receive_queue); + skb_queue_purge(&sk->sk_write_queue); + if (session) { sk->sk_user_data = NULL; BUG_ON(session->magic != L2TP_SESSION_MAGIC); @@ -488,9 +492,6 @@ static int pppol2tp_release(struct socket *sock) l2tp_session_queue_purge(session); sock_put(sk); } - skb_queue_purge(&sk->sk_receive_queue); - skb_queue_purge(&sk->sk_write_queue); - release_sock(sk); /* This will delete the session context via @@ -582,6 +583,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, int error = 0; u32 tunnel_id, peer_tunnel_id; u32 session_id, peer_session_id; + bool drop_refcnt = false; int ver = 2; int fd; @@ -683,36 +685,36 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, if (tunnel->peer_tunnel_id == 0) tunnel->peer_tunnel_id = peer_tunnel_id; - /* Create session if it doesn't already exist. We handle the - * case where a session was previously created by the netlink - * interface by checking that the session doesn't already have - * a socket and its tunnel socket are what we expect. If any - * of those checks fail, return EEXIST to the caller. - */ - session = l2tp_session_find(sock_net(sk), tunnel, session_id); - if (session == NULL) { - /* Default MTU must allow space for UDP/L2TP/PPP - * headers. + session = l2tp_session_get(sock_net(sk), tunnel, session_id, false); + if (session) { + drop_refcnt = true; + ps = l2tp_session_priv(session); + + /* Using a pre-existing session is fine as long as it hasn't + * been connected yet. */ - cfg.mtu = cfg.mru = 1500 - PPPOL2TP_HEADER_OVERHEAD; + if (ps->sock) { + error = -EEXIST; + goto end; + } - /* Allocate and initialize a new session context. */ - session = l2tp_session_create(sizeof(struct pppol2tp_session), - tunnel, session_id, - peer_session_id, &cfg); - if (session == NULL) { - error = -ENOMEM; + /* consistency checks */ + if (ps->tunnel_sock != tunnel->sock) { + error = -EEXIST; goto end; } } else { - ps = l2tp_session_priv(session); - error = -EEXIST; - if (ps->sock != NULL) - goto end; + /* Default MTU must allow space for UDP/L2TP/PPP headers */ + cfg.mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD; + cfg.mru = cfg.mtu; - /* consistency checks */ - if (ps->tunnel_sock != tunnel->sock) + session = l2tp_session_create(sizeof(struct pppol2tp_session), + tunnel, session_id, + peer_session_id, &cfg); + if (IS_ERR(session)) { + error = PTR_ERR(session); goto end; + } } /* Associate session with its PPPoL2TP socket */ @@ -777,6 +779,8 @@ out_no_ppp: session->name); end: + if (drop_refcnt) + l2tp_session_dec_refcount(session); release_sock(sk); return error; @@ -804,12 +808,6 @@ static int pppol2tp_session_create(struct net *net, u32 tunnel_id, u32 session_i if (tunnel->sock == NULL) goto out; - /* Check that this session doesn't already exist */ - error = -EEXIST; - session = l2tp_session_find(net, tunnel, session_id); - if (session != NULL) - goto out; - /* Default MTU values. */ if (cfg->mtu == 0) cfg->mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD; @@ -817,12 +815,13 @@ static int pppol2tp_session_create(struct net *net, u32 tunnel_id, u32 session_i cfg->mru = cfg->mtu; /* Allocate and initialize a new session context. */ - error = -ENOMEM; session = l2tp_session_create(sizeof(struct pppol2tp_session), tunnel, session_id, peer_session_id, cfg); - if (session == NULL) + if (IS_ERR(session)) { + error = PTR_ERR(session); goto out; + } ps = l2tp_session_priv(session); ps->tunnel_sock = tunnel->sock; @@ -1140,11 +1139,18 @@ static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel, if (stats.session_id != 0) { /* resend to session ioctl handler */ struct l2tp_session *session = - l2tp_session_find(sock_net(sk), tunnel, stats.session_id); - if (session != NULL) - err = pppol2tp_session_ioctl(session, cmd, arg); - else + l2tp_session_get(sock_net(sk), tunnel, + stats.session_id, true); + + if (session) { + err = pppol2tp_session_ioctl(session, cmd, + arg); + if (session->deref) + session->deref(session); + l2tp_session_dec_refcount(session); + } else { err = -EBADR; + } break; } #ifdef CONFIG_XFRM @@ -1377,8 +1383,6 @@ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname, } else err = pppol2tp_session_setsockopt(sk, session, optname, val); - err = 0; - end_put_sess: sock_put(sk); end: @@ -1501,8 +1505,13 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname, err = pppol2tp_tunnel_getsockopt(sk, tunnel, optname, &val); sock_put(ps->tunnel_sock); - } else + if (err) + goto end_put_sess; + } else { err = pppol2tp_session_getsockopt(sk, session, optname, &val); + if (err) + goto end_put_sess; + } err = -EFAULT; if (put_user(len, optlen)) @@ -1554,7 +1563,7 @@ static void pppol2tp_next_tunnel(struct net *net, struct pppol2tp_seq_data *pd) static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd) { - pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx); + pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true); pd->session_idx++; if (pd->session == NULL) { @@ -1681,10 +1690,14 @@ static int pppol2tp_seq_show(struct seq_file *m, void *v) /* Show the tunnel or session context. */ - if (pd->session == NULL) + if (!pd->session) { pppol2tp_seq_tunnel_show(m, pd->tunnel); - else + } else { pppol2tp_seq_session_show(m, pd->session); + if (pd->session->deref) + pd->session->deref(pd->session); + l2tp_session_dec_refcount(pd->session); + } out: return 0; @@ -1843,4 +1856,4 @@ MODULE_DESCRIPTION("PPP over L2TP over UDP"); MODULE_LICENSE("GPL"); MODULE_VERSION(PPPOL2TP_DRV_VERSION); MODULE_ALIAS_NET_PF_PROTO(PF_PPPOX, PX_PROTO_OL2TP); -MODULE_ALIAS_L2TP_PWTYPE(11); +MODULE_ALIAS_L2TP_PWTYPE(7); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 40813dd3301c..5bb0c5012819 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -718,7 +718,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) ieee80211_recalc_ps(local); if (sdata->vif.type == NL80211_IFTYPE_MONITOR || - sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { + sdata->vif.type == NL80211_IFTYPE_AP_VLAN || + local->ops->wake_tx_queue) { /* XXX: for AP_VLAN, actually track AP queues */ netif_tx_start_all_queues(dev); } else if (dev) { diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index da9df2d56e66..22fc32143e9c 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -290,6 +290,7 @@ void nf_conntrack_unregister_notifier(struct net *net, BUG_ON(notify != new); RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL); mutex_unlock(&nf_ct_ecache_mutex); + /* synchronize_rcu() is called from ctnetlink_exit. */ } EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); @@ -326,6 +327,7 @@ void nf_ct_expect_unregister_notifier(struct net *net, BUG_ON(notify != new); RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL); mutex_unlock(&nf_ct_ecache_mutex); + /* synchronize_rcu() is called from ctnetlink_exit. */ } EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 4b2e1fb28bb4..d80073037856 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -57,7 +57,7 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, hlist_del_rcu(&exp->hnode); net->ct.expect_count--; - hlist_del(&exp->lnode); + hlist_del_rcu(&exp->lnode); master_help->expecting[exp->class]--; nf_ct_expect_event_report(IPEXP_DESTROY, exp, portid, report); @@ -363,7 +363,7 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) /* two references : one for hash insert, one for the timer */ atomic_add(2, &exp->use); - hlist_add_head(&exp->lnode, &master_help->expectations); + hlist_add_head_rcu(&exp->lnode, &master_help->expectations); master_help->expecting[exp->class]++; hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]); diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 02bcf00c2492..008299b7f78f 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -53,7 +53,11 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, rcu_read_lock(); t = rcu_dereference(nf_ct_ext_types[id]); - BUG_ON(t == NULL); + if (!t) { + rcu_read_unlock(); + return NULL; + } + off = ALIGN(sizeof(struct nf_ct_ext), t->align); len = off + t->len + var_alloc_len; alloc_size = t->alloc_size + var_alloc_len; @@ -88,7 +92,10 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id, rcu_read_lock(); t = rcu_dereference(nf_ct_ext_types[id]); - BUG_ON(t == NULL); + if (!t) { + rcu_read_unlock(); + return NULL; + } newoff = ALIGN(old->len, t->align); newlen = newoff + t->len + var_alloc_len; @@ -175,6 +182,6 @@ void nf_ct_extend_unregister(struct nf_ct_ext_type *type) RCU_INIT_POINTER(nf_ct_ext_types[type->id], NULL); update_alloc_size(type); mutex_unlock(&nf_ct_ext_type_mutex); - rcu_barrier(); /* Wait for completion of call_rcu()'s */ + synchronize_rcu(); } EXPORT_SYMBOL_GPL(nf_ct_extend_unregister); diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 6dc44d9b4190..4eeb3418366a 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -158,16 +158,25 @@ nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum) { struct nf_conntrack_helper *h; + rcu_read_lock(); + h = __nf_conntrack_helper_find(name, l3num, protonum); #ifdef CONFIG_MODULES if (h == NULL) { - if (request_module("nfct-helper-%s", name) == 0) + rcu_read_unlock(); + if (request_module("nfct-helper-%s", name) == 0) { + rcu_read_lock(); h = __nf_conntrack_helper_find(name, l3num, protonum); + } else { + return h; + } } #endif if (h != NULL && !try_module_get(h->me)) h = NULL; + rcu_read_unlock(); + return h; } EXPORT_SYMBOL_GPL(nf_conntrack_helper_try_module_get); @@ -311,38 +320,36 @@ void nf_ct_helper_expectfn_unregister(struct nf_ct_helper_expectfn *n) } EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_unregister); +/* Caller should hold the rcu lock */ struct nf_ct_helper_expectfn * nf_ct_helper_expectfn_find_by_name(const char *name) { struct nf_ct_helper_expectfn *cur; bool found = false; - rcu_read_lock(); list_for_each_entry_rcu(cur, &nf_ct_helper_expectfn_list, head) { if (!strcmp(cur->name, name)) { found = true; break; } } - rcu_read_unlock(); return found ? cur : NULL; } EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_find_by_name); +/* Caller should hold the rcu lock */ struct nf_ct_helper_expectfn * nf_ct_helper_expectfn_find_by_symbol(const void *symbol) { struct nf_ct_helper_expectfn *cur; bool found = false; - rcu_read_lock(); list_for_each_entry_rcu(cur, &nf_ct_helper_expectfn_list, head) { if (cur->expectfn == symbol) { found = true; break; } } - rcu_read_unlock(); return found ? cur : NULL; } EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_find_by_symbol); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 6806b5e73567..dc7dfd68fafe 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1488,11 +1488,16 @@ static int ctnetlink_change_helper(struct nf_conn *ct, * treat the second attempt as a no-op instead of returning * an error. */ - if (help && help->helper && - !strcmp(help->helper->name, helpname)) - return 0; - else - return -EBUSY; + err = -EBUSY; + if (help) { + rcu_read_lock(); + helper = rcu_dereference(help->helper); + if (helper && !strcmp(helper->name, helpname)) + err = 0; + rcu_read_unlock(); + } + + return err; } if (!strcmp(helpname, "")) { @@ -1929,9 +1934,9 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl, err = 0; if (test_bit(IPS_EXPECTED_BIT, &ct->status)) - events = IPCT_RELATED; + events = 1 << IPCT_RELATED; else - events = IPCT_NEW; + events = 1 << IPCT_NEW; if (cda[CTA_LABELS] && ctnetlink_attach_labels(ct, cda) == 0) @@ -2675,8 +2680,8 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) last = (struct nf_conntrack_expect *)cb->args[1]; for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) { restart: - hlist_for_each_entry(exp, &nf_ct_expect_hash[cb->args[0]], - hnode) { + hlist_for_each_entry_rcu(exp, &nf_ct_expect_hash[cb->args[0]], + hnode) { if (l3proto && exp->tuple.src.l3num != l3proto) continue; @@ -2727,7 +2732,7 @@ ctnetlink_exp_ct_dump_table(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); last = (struct nf_conntrack_expect *)cb->args[1]; restart: - hlist_for_each_entry(exp, &help->expectations, lnode) { + hlist_for_each_entry_rcu(exp, &help->expectations, lnode) { if (l3proto && exp->tuple.src.l3num != l3proto) continue; if (cb->args[1]) { @@ -2789,6 +2794,12 @@ static int ctnetlink_dump_exp_ct(struct net *net, struct sock *ctnl, return -ENOENT; ct = nf_ct_tuplehash_to_ctrack(h); + /* No expectation linked to this connection tracking. */ + if (!nfct_help(ct)) { + nf_ct_put(ct); + return 0; + } + c.data = ct; err = netlink_dump_start(ctnl, skb, nlh, &c); @@ -3133,23 +3144,27 @@ ctnetlink_create_expect(struct net *net, return -ENOENT; ct = nf_ct_tuplehash_to_ctrack(h); + rcu_read_lock(); if (cda[CTA_EXPECT_HELP_NAME]) { const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]); helper = __nf_conntrack_helper_find(helpname, u3, nf_ct_protonum(ct)); if (helper == NULL) { + rcu_read_unlock(); #ifdef CONFIG_MODULES if (request_module("nfct-helper-%s", helpname) < 0) { err = -EOPNOTSUPP; goto err_ct; } + rcu_read_lock(); helper = __nf_conntrack_helper_find(helpname, u3, nf_ct_protonum(ct)); if (helper) { err = -EAGAIN; - goto err_ct; + goto err_rcu; } + rcu_read_unlock(); #endif err = -EOPNOTSUPP; goto err_ct; @@ -3159,11 +3174,13 @@ ctnetlink_create_expect(struct net *net, exp = ctnetlink_alloc_expect(cda, ct, helper, &tuple, &mask); if (IS_ERR(exp)) { err = PTR_ERR(exp); - goto err_ct; + goto err_rcu; } err = nf_ct_expect_related_report(exp, portid, report); nf_ct_expect_put(exp); +err_rcu: + rcu_read_unlock(); err_ct: nf_ct_put(ct); return err; @@ -3442,6 +3459,7 @@ static void __exit ctnetlink_exit(void) #ifdef CONFIG_NETFILTER_NETLINK_GLUE_CT RCU_INIT_POINTER(nfnl_ct_hook, NULL); #endif + synchronize_rcu(); } module_init(ctnetlink_init); diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 94b14c5a8b17..82802e4a6640 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -903,6 +903,8 @@ static void __exit nf_nat_cleanup(void) #ifdef CONFIG_XFRM RCU_INIT_POINTER(nf_nat_decode_session_hook, NULL); #endif + synchronize_rcu(); + for (i = 0; i < NFPROTO_NUMPROTO; i++) kfree(nf_nat_l4protos[i]); diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c index d43869879fcf..86067560a318 100644 --- a/net/netfilter/nf_nat_redirect.c +++ b/net/netfilter/nf_nat_redirect.c @@ -101,11 +101,13 @@ nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range *range, rcu_read_lock(); idev = __in6_dev_get(skb->dev); if (idev != NULL) { + read_lock_bh(&idev->lock); list_for_each_entry(ifa, &idev->addr_list, if_list) { newdst = ifa->addr; addr = true; break; } + read_unlock_bh(&idev->lock); } rcu_read_unlock(); diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index de8782345c86..d45558178da5 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -32,6 +32,13 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); MODULE_DESCRIPTION("nfnl_cthelper: User-space connection tracking helpers"); +struct nfnl_cthelper { + struct list_head list; + struct nf_conntrack_helper helper; +}; + +static LIST_HEAD(nfnl_cthelper_list); + static int nfnl_userspace_cthelper(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) @@ -161,6 +168,7 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper, int i, ret; struct nf_conntrack_expect_policy *expect_policy; struct nlattr *tb[NFCTH_POLICY_SET_MAX+1]; + unsigned int class_max; ret = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr, nfnl_cthelper_expect_policy_set); @@ -170,19 +178,18 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper, if (!tb[NFCTH_POLICY_SET_NUM]) return -EINVAL; - helper->expect_class_max = - ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM])); - - if (helper->expect_class_max != 0 && - helper->expect_class_max > NF_CT_MAX_EXPECT_CLASSES) + class_max = ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM])); + if (class_max == 0) + return -EINVAL; + if (class_max > NF_CT_MAX_EXPECT_CLASSES) return -EOVERFLOW; expect_policy = kzalloc(sizeof(struct nf_conntrack_expect_policy) * - helper->expect_class_max, GFP_KERNEL); + class_max, GFP_KERNEL); if (expect_policy == NULL) return -ENOMEM; - for (i=0; i<helper->expect_class_max; i++) { + for (i = 0; i < class_max; i++) { if (!tb[NFCTH_POLICY_SET+i]) goto err; @@ -191,6 +198,8 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper, if (ret < 0) goto err; } + + helper->expect_class_max = class_max - 1; helper->expect_policy = expect_policy; return 0; err: @@ -203,18 +212,20 @@ nfnl_cthelper_create(const struct nlattr * const tb[], struct nf_conntrack_tuple *tuple) { struct nf_conntrack_helper *helper; + struct nfnl_cthelper *nfcth; int ret; if (!tb[NFCTH_TUPLE] || !tb[NFCTH_POLICY] || !tb[NFCTH_PRIV_DATA_LEN]) return -EINVAL; - helper = kzalloc(sizeof(struct nf_conntrack_helper), GFP_KERNEL); - if (helper == NULL) + nfcth = kzalloc(sizeof(*nfcth), GFP_KERNEL); + if (nfcth == NULL) return -ENOMEM; + helper = &nfcth->helper; ret = nfnl_cthelper_parse_expect_policy(helper, tb[NFCTH_POLICY]); if (ret < 0) - goto err; + goto err1; strncpy(helper->name, nla_data(tb[NFCTH_NAME]), NF_CT_HELPER_NAME_LEN); helper->data_len = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN])); @@ -245,15 +256,101 @@ nfnl_cthelper_create(const struct nlattr * const tb[], ret = nf_conntrack_helper_register(helper); if (ret < 0) - goto err; + goto err2; + list_add_tail(&nfcth->list, &nfnl_cthelper_list); return 0; -err: - kfree(helper); +err2: + kfree(helper->expect_policy); +err1: + kfree(nfcth); return ret; } static int +nfnl_cthelper_update_policy_one(const struct nf_conntrack_expect_policy *policy, + struct nf_conntrack_expect_policy *new_policy, + const struct nlattr *attr) +{ + struct nlattr *tb[NFCTH_POLICY_MAX + 1]; + int err; + + err = nla_parse_nested(tb, NFCTH_POLICY_MAX, attr, + nfnl_cthelper_expect_pol); + if (err < 0) + return err; + + if (!tb[NFCTH_POLICY_NAME] || + !tb[NFCTH_POLICY_EXPECT_MAX] || + !tb[NFCTH_POLICY_EXPECT_TIMEOUT]) + return -EINVAL; + + if (nla_strcmp(tb[NFCTH_POLICY_NAME], policy->name)) + return -EBUSY; + + new_policy->max_expected = + ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX])); + new_policy->timeout = + ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_TIMEOUT])); + + return 0; +} + +static int nfnl_cthelper_update_policy_all(struct nlattr *tb[], + struct nf_conntrack_helper *helper) +{ + struct nf_conntrack_expect_policy new_policy[helper->expect_class_max + 1]; + struct nf_conntrack_expect_policy *policy; + int i, err; + + /* Check first that all policy attributes are well-formed, so we don't + * leave things in inconsistent state on errors. + */ + for (i = 0; i < helper->expect_class_max + 1; i++) { + + if (!tb[NFCTH_POLICY_SET + i]) + return -EINVAL; + + err = nfnl_cthelper_update_policy_one(&helper->expect_policy[i], + &new_policy[i], + tb[NFCTH_POLICY_SET + i]); + if (err < 0) + return err; + } + /* Now we can safely update them. */ + for (i = 0; i < helper->expect_class_max + 1; i++) { + policy = (struct nf_conntrack_expect_policy *) + &helper->expect_policy[i]; + policy->max_expected = new_policy->max_expected; + policy->timeout = new_policy->timeout; + } + + return 0; +} + +static int nfnl_cthelper_update_policy(struct nf_conntrack_helper *helper, + const struct nlattr *attr) +{ + struct nlattr *tb[NFCTH_POLICY_SET_MAX + 1]; + unsigned int class_max; + int err; + + err = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr, + nfnl_cthelper_expect_policy_set); + if (err < 0) + return err; + + if (!tb[NFCTH_POLICY_SET_NUM]) + return -EINVAL; + + class_max = ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM])); + if (helper->expect_class_max + 1 != class_max) + return -EBUSY; + + return nfnl_cthelper_update_policy_all(tb, helper); +} + +static int nfnl_cthelper_update(const struct nlattr * const tb[], struct nf_conntrack_helper *helper) { @@ -263,8 +360,7 @@ nfnl_cthelper_update(const struct nlattr * const tb[], return -EBUSY; if (tb[NFCTH_POLICY]) { - ret = nfnl_cthelper_parse_expect_policy(helper, - tb[NFCTH_POLICY]); + ret = nfnl_cthelper_update_policy(helper, tb[NFCTH_POLICY]); if (ret < 0) return ret; } @@ -293,7 +389,8 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl, const char *helper_name; struct nf_conntrack_helper *cur, *helper = NULL; struct nf_conntrack_tuple tuple; - int ret = 0, i; + struct nfnl_cthelper *nlcth; + int ret = 0; if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE]) return -EINVAL; @@ -304,31 +401,22 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl, if (ret < 0) return ret; - rcu_read_lock(); - for (i = 0; i < nf_ct_helper_hsize && !helper; i++) { - hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[i], hnode) { + list_for_each_entry(nlcth, &nfnl_cthelper_list, list) { + cur = &nlcth->helper; - /* skip non-userspace conntrack helpers. */ - if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) - continue; + if (strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN)) + continue; - if (strncmp(cur->name, helper_name, - NF_CT_HELPER_NAME_LEN) != 0) - continue; + if ((tuple.src.l3num != cur->tuple.src.l3num || + tuple.dst.protonum != cur->tuple.dst.protonum)) + continue; - if ((tuple.src.l3num != cur->tuple.src.l3num || - tuple.dst.protonum != cur->tuple.dst.protonum)) - continue; + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; - if (nlh->nlmsg_flags & NLM_F_EXCL) { - ret = -EEXIST; - goto err; - } - helper = cur; - break; - } + helper = cur; + break; } - rcu_read_unlock(); if (helper == NULL) ret = nfnl_cthelper_create(tb, &tuple); @@ -336,9 +424,6 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl, ret = nfnl_cthelper_update(tb, helper); return ret; -err: - rcu_read_unlock(); - return ret; } static int @@ -377,10 +462,10 @@ nfnl_cthelper_dump_policy(struct sk_buff *skb, goto nla_put_failure; if (nla_put_be32(skb, NFCTH_POLICY_SET_NUM, - htonl(helper->expect_class_max))) + htonl(helper->expect_class_max + 1))) goto nla_put_failure; - for (i=0; i<helper->expect_class_max; i++) { + for (i = 0; i < helper->expect_class_max + 1; i++) { nest_parms2 = nla_nest_start(skb, (NFCTH_POLICY_SET+i) | NLA_F_NESTED); if (nest_parms2 == NULL) @@ -502,11 +587,12 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const tb[]) { - int ret = -ENOENT, i; + int ret = -ENOENT; struct nf_conntrack_helper *cur; struct sk_buff *skb2; char *helper_name = NULL; struct nf_conntrack_tuple tuple; + struct nfnl_cthelper *nlcth; bool tuple_set = false; if (nlh->nlmsg_flags & NLM_F_DUMP) { @@ -527,45 +613,39 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl, tuple_set = true; } - for (i = 0; i < nf_ct_helper_hsize; i++) { - hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[i], hnode) { + list_for_each_entry(nlcth, &nfnl_cthelper_list, list) { + cur = &nlcth->helper; + if (helper_name && + strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN)) + continue; - /* skip non-userspace conntrack helpers. */ - if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) - continue; + if (tuple_set && + (tuple.src.l3num != cur->tuple.src.l3num || + tuple.dst.protonum != cur->tuple.dst.protonum)) + continue; - if (helper_name && strncmp(cur->name, helper_name, - NF_CT_HELPER_NAME_LEN) != 0) { - continue; - } - if (tuple_set && - (tuple.src.l3num != cur->tuple.src.l3num || - tuple.dst.protonum != cur->tuple.dst.protonum)) - continue; - - skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (skb2 == NULL) { - ret = -ENOMEM; - break; - } + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb2 == NULL) { + ret = -ENOMEM; + break; + } - ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid, - nlh->nlmsg_seq, - NFNL_MSG_TYPE(nlh->nlmsg_type), - NFNL_MSG_CTHELPER_NEW, cur); - if (ret <= 0) { - kfree_skb(skb2); - break; - } + ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, + NFNL_MSG_TYPE(nlh->nlmsg_type), + NFNL_MSG_CTHELPER_NEW, cur); + if (ret <= 0) { + kfree_skb(skb2); + break; + } - ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid, - MSG_DONTWAIT); - if (ret > 0) - ret = 0; + ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid, + MSG_DONTWAIT); + if (ret > 0) + ret = 0; - /* this avoids a loop in nfnetlink. */ - return ret == -EAGAIN ? -ENOBUFS : ret; - } + /* this avoids a loop in nfnetlink. */ + return ret == -EAGAIN ? -ENOBUFS : ret; } return ret; } @@ -576,10 +656,10 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl, { char *helper_name = NULL; struct nf_conntrack_helper *cur; - struct hlist_node *tmp; struct nf_conntrack_tuple tuple; bool tuple_set = false, found = false; - int i, j = 0, ret; + struct nfnl_cthelper *nlcth, *n; + int j = 0, ret; if (tb[NFCTH_NAME]) helper_name = nla_data(tb[NFCTH_NAME]); @@ -592,28 +672,27 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl, tuple_set = true; } - for (i = 0; i < nf_ct_helper_hsize; i++) { - hlist_for_each_entry_safe(cur, tmp, &nf_ct_helper_hash[i], - hnode) { - /* skip non-userspace conntrack helpers. */ - if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) - continue; + list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) { + cur = &nlcth->helper; + j++; - j++; + if (helper_name && + strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN)) + continue; - if (helper_name && strncmp(cur->name, helper_name, - NF_CT_HELPER_NAME_LEN) != 0) { - continue; - } - if (tuple_set && - (tuple.src.l3num != cur->tuple.src.l3num || - tuple.dst.protonum != cur->tuple.dst.protonum)) - continue; + if (tuple_set && + (tuple.src.l3num != cur->tuple.src.l3num || + tuple.dst.protonum != cur->tuple.dst.protonum)) + continue; - found = true; - nf_conntrack_helper_unregister(cur); - } + found = true; + nf_conntrack_helper_unregister(cur); + kfree(cur->expect_policy); + + list_del(&nlcth->list); + kfree(nlcth); } + /* Make sure we return success if we flush and there is no helpers */ return (found || j == 0) ? 0 : -ENOENT; } @@ -662,20 +741,16 @@ err_out: static void __exit nfnl_cthelper_exit(void) { struct nf_conntrack_helper *cur; - struct hlist_node *tmp; - int i; + struct nfnl_cthelper *nlcth, *n; nfnetlink_subsys_unregister(&nfnl_cthelper_subsys); - for (i=0; i<nf_ct_helper_hsize; i++) { - hlist_for_each_entry_safe(cur, tmp, &nf_ct_helper_hash[i], - hnode) { - /* skip non-userspace conntrack helpers. */ - if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) - continue; + list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) { + cur = &nlcth->helper; - nf_conntrack_helper_unregister(cur); - } + nf_conntrack_helper_unregister(cur); + kfree(cur->expect_policy); + kfree(nlcth); } } diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 139e0867e56e..47d6656c9119 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -646,8 +646,8 @@ static void __exit cttimeout_exit(void) #ifdef CONFIG_NF_CONNTRACK_TIMEOUT RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL); RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL); + synchronize_rcu(); #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ - rcu_barrier(); } module_init(cttimeout_init); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 3ee0b8a000a4..933509ebf3d3 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -443,7 +443,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, skb = alloc_skb(size, GFP_ATOMIC); if (!skb) { skb_tx_error(entskb); - return NULL; + goto nlmsg_failure; } nlh = nlmsg_put(skb, 0, 0, @@ -452,7 +452,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, if (!nlh) { skb_tx_error(entskb); kfree_skb(skb); - return NULL; + goto nlmsg_failure; } nfmsg = nlmsg_data(nlh); nfmsg->nfgen_family = entry->state.pf; @@ -598,12 +598,17 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, } nlh->nlmsg_len = skb->len; + if (seclen) + security_release_secctx(secdata, seclen); return skb; nla_put_failure: skb_tx_error(entskb); kfree_skb(skb); net_err_ratelimited("nf_queue: error creating packet message\n"); +nlmsg_failure: + if (seclen) + security_release_secctx(secdata, seclen); return NULL; } diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index eb2721af898d..c4dad1254ead 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -21,6 +21,7 @@ struct nft_hash { enum nft_registers sreg:8; enum nft_registers dreg:8; u8 len; + bool autogen_seed:1; u32 modulus; u32 seed; u32 offset; @@ -82,10 +83,12 @@ static int nft_hash_init(const struct nft_ctx *ctx, if (priv->offset + priv->modulus - 1 < priv->offset) return -EOVERFLOW; - if (tb[NFTA_HASH_SEED]) + if (tb[NFTA_HASH_SEED]) { priv->seed = ntohl(nla_get_be32(tb[NFTA_HASH_SEED])); - else + } else { + priv->autogen_seed = true; get_random_bytes(&priv->seed, sizeof(priv->seed)); + } return nft_validate_register_load(priv->sreg, len) && nft_validate_register_store(ctx, priv->dreg, NULL, @@ -105,7 +108,8 @@ static int nft_hash_dump(struct sk_buff *skb, goto nla_put_failure; if (nla_put_be32(skb, NFTA_HASH_MODULUS, htonl(priv->modulus))) goto nla_put_failure; - if (nla_put_be32(skb, NFTA_HASH_SEED, htonl(priv->seed))) + if (!priv->autogen_seed && + nla_put_be32(skb, NFTA_HASH_SEED, htonl(priv->seed))) goto nla_put_failure; if (priv->offset != 0) if (nla_put_be32(skb, NFTA_HASH_OFFSET, htonl(priv->offset))) diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 27241a767f17..c64aca611ac5 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -104,7 +104,7 @@ tcpmss_mangle_packet(struct sk_buff *skb, tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff); tcp_hdrlen = tcph->doff * 4; - if (len < tcp_hdrlen) + if (len < tcp_hdrlen || tcp_hdrlen < sizeof(struct tcphdr)) return -1; if (info->mss == XT_TCPMSS_CLAMP_PMTU) { @@ -152,6 +152,10 @@ tcpmss_mangle_packet(struct sk_buff *skb, if (len > tcp_hdrlen) return 0; + /* tcph->doff has 4 bits, do not wrap it to 0 */ + if (tcp_hdrlen >= 15 * 4) + return 0; + /* * MSS Option not found ?! add it.. */ diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index 80cb7babeb64..df7f1df00330 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -393,7 +393,8 @@ tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr, rcu_read_lock(); indev = __in6_dev_get(skb->dev); - if (indev) + if (indev) { + read_lock_bh(&indev->lock); list_for_each_entry(ifa, &indev->addr_list, if_list) { if (ifa->flags & (IFA_F_TENTATIVE | IFA_F_DEPRECATED)) continue; @@ -401,6 +402,8 @@ tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr, laddr = &ifa->addr; break; } + read_unlock_bh(&indev->lock); + } rcu_read_unlock(); return laddr ? laddr : daddr; diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index e0a87776a010..7b2c2fce408a 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -643,8 +643,8 @@ static bool skb_nfct_cached(struct net *net, */ if (nf_ct_is_confirmed(ct)) nf_ct_delete(ct, 0, 0); - else - nf_conntrack_put(&ct->ct_general); + + nf_conntrack_put(&ct->ct_general); nf_ct_set(skb, NULL, 0); return false; } diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 9d4bb8eb63f2..3f76cb765e5b 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -527,7 +527,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) /* Link layer. */ clear_vlan(key); - if (key->mac_proto == MAC_PROTO_NONE) { + if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) { if (unlikely(eth_type_vlan(skb->protocol))) return -EINVAL; @@ -745,7 +745,13 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key) { - return key_extract(skb, key); + int res; + + res = key_extract(skb, key); + if (!res) + key->mac_proto &= ~SW_FLOW_KEY_INVALID; + + return res; } static int key_extract_mac_proto(struct sk_buff *skb) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index a0dbe7ca8f72..8489beff5c25 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3665,6 +3665,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv return -EBUSY; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; + if (val > INT_MAX) + return -EINVAL; po->tp_reserve = val; return 0; } @@ -4193,8 +4195,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, if (unlikely(!PAGE_ALIGNED(req->tp_block_size))) goto out; if (po->tp_version >= TPACKET_V3 && - (int)(req->tp_block_size - - BLK_PLUS_PRIV(req_u->req3.tp_sizeof_priv)) <= 0) + req->tp_block_size <= + BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv)) goto out; if (unlikely(req->tp_frame_size < po->tp_hdrlen + po->tp_reserve)) @@ -4205,6 +4207,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, rb->frames_per_block = req->tp_block_size / req->tp_frame_size; if (unlikely(rb->frames_per_block == 0)) goto out; + if (unlikely(req->tp_block_size > UINT_MAX / req->tp_block_nr)) + goto out; if (unlikely((rb->frames_per_block * req->tp_block_nr) != req->tp_frame_nr)) goto out; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index b052b27a984e..1a2f9e964330 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -794,7 +794,7 @@ static void attach_default_qdiscs(struct net_device *dev) } } #ifdef CONFIG_NET_SCHED - if (dev->qdisc) + if (dev->qdisc != &noop_qdisc) qdisc_hash_add(dev->qdisc); #endif } diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 0439a1a68367..a9708da28eb5 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -246,6 +246,9 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a if (!sctp_ulpq_init(&asoc->ulpq, asoc)) goto fail_init; + if (sctp_stream_new(asoc, gfp)) + goto fail_init; + /* Assume that peer would support both address types unless we are * told otherwise. */ @@ -264,7 +267,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a /* AUTH related initializations */ INIT_LIST_HEAD(&asoc->endpoint_shared_keys); if (sctp_auth_asoc_copy_shkeys(ep, asoc, gfp)) - goto fail_init; + goto stream_free; asoc->active_key_id = ep->active_key_id; asoc->prsctp_enable = ep->prsctp_enable; @@ -287,6 +290,8 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a return asoc; +stream_free: + sctp_stream_free(asoc->stream); fail_init: sock_put(asoc->base.sk); sctp_endpoint_put(asoc->ep); @@ -1407,7 +1412,7 @@ sctp_assoc_choose_alter_transport(struct sctp_association *asoc, /* Update the association's pmtu and frag_point by going through all the * transports. This routine is called when a transport's PMTU has changed. */ -void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc) +void sctp_assoc_sync_pmtu(struct sctp_association *asoc) { struct sctp_transport *t; __u32 pmtu = 0; @@ -1419,8 +1424,8 @@ void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc) list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) { if (t->pmtu_pending && t->dst) { - sctp_transport_update_pmtu(sk, t, - SCTP_TRUNC4(dst_mtu(t->dst))); + sctp_transport_update_pmtu( + t, SCTP_TRUNC4(dst_mtu(t->dst))); t->pmtu_pending = 0; } if (!pmtu || (t->pathmtu < pmtu)) diff --git a/net/sctp/input.c b/net/sctp/input.c index 2a28ab20487f..0e06a278d2a9 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -401,10 +401,10 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, if (t->param_flags & SPP_PMTUD_ENABLE) { /* Update transports view of the MTU */ - sctp_transport_update_pmtu(sk, t, pmtu); + sctp_transport_update_pmtu(t, pmtu); /* Update association pmtu. */ - sctp_assoc_sync_pmtu(sk, asoc); + sctp_assoc_sync_pmtu(asoc); } /* Retransmit with the new pmtu setting. diff --git a/net/sctp/output.c b/net/sctp/output.c index 1224421036b3..1409a875ad8e 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -86,43 +86,53 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag, { struct sctp_transport *tp = packet->transport; struct sctp_association *asoc = tp->asoc; + struct sock *sk; pr_debug("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag); - packet->vtag = vtag; - if (asoc && tp->dst) { - struct sock *sk = asoc->base.sk; - - rcu_read_lock(); - if (__sk_dst_get(sk) != tp->dst) { - dst_hold(tp->dst); - sk_setup_caps(sk, tp->dst); - } - - if (sk_can_gso(sk)) { - struct net_device *dev = tp->dst->dev; + /* do the following jobs only once for a flush schedule */ + if (!sctp_packet_empty(packet)) + return; - packet->max_size = dev->gso_max_size; - } else { - packet->max_size = asoc->pathmtu; - } - rcu_read_unlock(); + /* set packet max_size with pathmtu */ + packet->max_size = tp->pathmtu; + if (!asoc) + return; - } else { - packet->max_size = tp->pathmtu; + /* update dst or transport pathmtu if in need */ + sk = asoc->base.sk; + if (!sctp_transport_dst_check(tp)) { + sctp_transport_route(tp, NULL, sctp_sk(sk)); + if (asoc->param_flags & SPP_PMTUD_ENABLE) + sctp_assoc_sync_pmtu(asoc); + } else if (!sctp_transport_pmtu_check(tp)) { + if (asoc->param_flags & SPP_PMTUD_ENABLE) + sctp_assoc_sync_pmtu(asoc); } - if (ecn_capable && sctp_packet_empty(packet)) { - struct sctp_chunk *chunk; + /* If there a is a prepend chunk stick it on the list before + * any other chunks get appended. + */ + if (ecn_capable) { + struct sctp_chunk *chunk = sctp_get_ecne_prepend(asoc); - /* If there a is a prepend chunk stick it on the list before - * any other chunks get appended. - */ - chunk = sctp_get_ecne_prepend(asoc); if (chunk) sctp_packet_append_chunk(packet, chunk); } + + if (!tp->dst) + return; + + /* set packet max_size with gso_max_size if gso is enabled*/ + rcu_read_lock(); + if (__sk_dst_get(sk) != tp->dst) { + dst_hold(tp->dst); + sk_setup_caps(sk, tp->dst); + } + packet->max_size = sk_can_gso(sk) ? tp->dst->dev->gso_max_size + : asoc->pathmtu; + rcu_read_unlock(); } /* Initialize the packet structure. */ @@ -582,12 +592,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) sh->vtag = htonl(packet->vtag); sh->checksum = 0; - /* update dst if in need */ - if (!sctp_transport_dst_check(tp)) { - sctp_transport_route(tp, NULL, sctp_sk(sk)); - if (asoc && asoc->param_flags & SPP_PMTUD_ENABLE) - sctp_assoc_sync_pmtu(sk, asoc); - } + /* drop packet if no dst */ dst = dst_clone(tp->dst); if (!dst) { IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); @@ -704,7 +709,7 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, */ if ((sctp_sk(asoc->base.sk)->nodelay || inflight == 0) && - !chunk->msg->force_delay) + !asoc->force_delay) /* Nothing unacked */ return SCTP_XMIT_OK; diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 025ccff67072..8081476ed313 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1026,8 +1026,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) /* RFC 2960 6.5 Every DATA chunk MUST carry a valid * stream identifier. */ - if (chunk->sinfo.sinfo_stream >= - asoc->c.sinit_num_ostreams) { + if (chunk->sinfo.sinfo_stream >= asoc->stream->outcnt) { /* Mark as failed send. */ sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM); diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 206377fe91ec..a0b29d43627f 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -361,8 +361,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) sctp_seq_dump_remote_addrs(seq, assoc); seq_printf(seq, "\t%8lu %5d %5d %4d %4d %4d %8d " "%8d %8d %8d %8d", - assoc->hbinterval, assoc->c.sinit_max_instreams, - assoc->c.sinit_num_ostreams, assoc->max_retrans, + assoc->hbinterval, assoc->stream->incnt, + assoc->stream->outcnt, assoc->max_retrans, assoc->init_retries, assoc->shutdown_retries, assoc->rtx_data_chunks, atomic_read(&sk->sk_wmem_alloc), diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 969a30c7bb54..118faff6a332 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2460,15 +2460,10 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk, * association. */ if (!asoc->temp) { - int error; - - asoc->stream = sctp_stream_new(asoc->c.sinit_max_instreams, - asoc->c.sinit_num_ostreams, gfp); - if (!asoc->stream) + if (sctp_stream_init(asoc, gfp)) goto clean_up; - error = sctp_assoc_set_id(asoc, gfp); - if (error) + if (sctp_assoc_set_id(asoc, gfp)) goto clean_up; } diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index e03bb1aab4d0..24c6ccce7539 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3946,7 +3946,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net, /* Silently discard the chunk if stream-id is not valid */ sctp_walk_fwdtsn(skip, chunk) { - if (ntohs(skip->stream) >= asoc->c.sinit_max_instreams) + if (ntohs(skip->stream) >= asoc->stream->incnt) goto discard_noforce; } @@ -4017,7 +4017,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn_fast( /* Silently discard the chunk if stream-id is not valid */ sctp_walk_fwdtsn(skip, chunk) { - if (ntohs(skip->stream) >= asoc->c.sinit_max_instreams) + if (ntohs(skip->stream) >= asoc->stream->incnt) goto gen_shutdown; } @@ -6353,7 +6353,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, * and discard the DATA chunk. */ sid = ntohs(data_hdr->stream); - if (sid >= asoc->c.sinit_max_instreams) { + if (sid >= asoc->stream->incnt) { /* Mark tsn as received even though we drop it */ sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_TSN, SCTP_U32(tsn)); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 0f378ea2ae38..d9d4c92e06b3 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1907,7 +1907,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) } if (asoc->pmtu_pending) - sctp_assoc_pending_pmtu(sk, asoc); + sctp_assoc_pending_pmtu(asoc); /* If fragmentation is disabled and the message length exceeds the * association fragmentation point, return EMSGSIZE. The I-D @@ -1920,7 +1920,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) } /* Check for invalid stream. */ - if (sinfo->sinfo_stream >= asoc->c.sinit_num_ostreams) { + if (sinfo->sinfo_stream >= asoc->stream->outcnt) { err = -EINVAL; goto out_free; } @@ -1965,7 +1965,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) err = PTR_ERR(datamsg); goto out_free; } - datamsg->force_delay = !!(msg->msg_flags & MSG_MORE); + asoc->force_delay = !!(msg->msg_flags & MSG_MORE); /* Now send the (possibly) fragmented message. */ list_for_each_entry(chunk, &datamsg->chunks, frag_list) { @@ -2435,7 +2435,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, if ((params->spp_flags & SPP_PMTUD_DISABLE) && params->spp_pathmtu) { if (trans) { trans->pathmtu = params->spp_pathmtu; - sctp_assoc_sync_pmtu(sctp_opt2sk(sp), asoc); + sctp_assoc_sync_pmtu(asoc); } else if (asoc) { asoc->pathmtu = params->spp_pathmtu; } else { @@ -2451,7 +2451,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, (trans->param_flags & ~SPP_PMTUD) | pmtud_change; if (update) { sctp_transport_pmtu(trans, sctp_opt2sk(sp)); - sctp_assoc_sync_pmtu(sctp_opt2sk(sp), asoc); + sctp_assoc_sync_pmtu(asoc); } } else if (asoc) { asoc->param_flags = @@ -4461,8 +4461,8 @@ int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, info->sctpi_rwnd = asoc->a_rwnd; info->sctpi_unackdata = asoc->unack_data; info->sctpi_penddata = sctp_tsnmap_pending(&asoc->peer.tsn_map); - info->sctpi_instrms = asoc->c.sinit_max_instreams; - info->sctpi_outstrms = asoc->c.sinit_num_ostreams; + info->sctpi_instrms = asoc->stream->incnt; + info->sctpi_outstrms = asoc->stream->outcnt; list_for_each(pos, &asoc->base.inqueue.in_chunk_list) info->sctpi_inqueue++; list_for_each(pos, &asoc->outqueue.out_chunk_list) @@ -4691,8 +4691,8 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len, status.sstat_unackdata = asoc->unack_data; status.sstat_penddata = sctp_tsnmap_pending(&asoc->peer.tsn_map); - status.sstat_instrms = asoc->c.sinit_max_instreams; - status.sstat_outstrms = asoc->c.sinit_num_ostreams; + status.sstat_instrms = asoc->stream->incnt; + status.sstat_outstrms = asoc->stream->outcnt; status.sstat_fragmentation_point = asoc->frag_point; status.sstat_primary.spinfo_assoc_id = sctp_assoc2id(transport->asoc); memcpy(&status.sstat_primary.spinfo_address, &transport->ipaddr, @@ -7034,6 +7034,9 @@ int sctp_inet_listen(struct socket *sock, int backlog) if (sock->state != SS_UNCONNECTED) goto out; + if (!sctp_sstate(sk, LISTENING) && !sctp_sstate(sk, CLOSED)) + goto out; + /* If backlog is zero, disable listening. */ if (!backlog) { if (sctp_sstate(sk, CLOSED)) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 1c6cc04fa3a4..bbed997e1c5f 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -35,33 +35,60 @@ #include <net/sctp/sctp.h> #include <net/sctp/sm.h> -struct sctp_stream *sctp_stream_new(__u16 incnt, __u16 outcnt, gfp_t gfp) +int sctp_stream_new(struct sctp_association *asoc, gfp_t gfp) { struct sctp_stream *stream; int i; stream = kzalloc(sizeof(*stream), gfp); if (!stream) - return NULL; + return -ENOMEM; - stream->outcnt = outcnt; + stream->outcnt = asoc->c.sinit_num_ostreams; stream->out = kcalloc(stream->outcnt, sizeof(*stream->out), gfp); if (!stream->out) { kfree(stream); - return NULL; + return -ENOMEM; } for (i = 0; i < stream->outcnt; i++) stream->out[i].state = SCTP_STREAM_OPEN; - stream->incnt = incnt; + asoc->stream = stream; + + return 0; +} + +int sctp_stream_init(struct sctp_association *asoc, gfp_t gfp) +{ + struct sctp_stream *stream = asoc->stream; + int i; + + /* Initial stream->out size may be very big, so free it and alloc + * a new one with new outcnt to save memory. + */ + kfree(stream->out); + stream->outcnt = asoc->c.sinit_num_ostreams; + stream->out = kcalloc(stream->outcnt, sizeof(*stream->out), gfp); + if (!stream->out) + goto nomem; + + for (i = 0; i < stream->outcnt; i++) + stream->out[i].state = SCTP_STREAM_OPEN; + + stream->incnt = asoc->c.sinit_max_instreams; stream->in = kcalloc(stream->incnt, sizeof(*stream->in), gfp); if (!stream->in) { kfree(stream->out); - kfree(stream); - return NULL; + goto nomem; } - return stream; + return 0; + +nomem: + asoc->stream = NULL; + kfree(stream); + + return -ENOMEM; } void sctp_stream_free(struct sctp_stream *stream) diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 3379668af368..721eeebfcd8a 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -251,14 +251,13 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; } -void sctp_transport_update_pmtu(struct sock *sk, struct sctp_transport *t, u32 pmtu) +void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) { - struct dst_entry *dst; + struct dst_entry *dst = sctp_transport_dst_check(t); if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { pr_warn("%s: Reported pmtu %d too low, using default minimum of %d\n", - __func__, pmtu, - SCTP_DEFAULT_MINSEGMENT); + __func__, pmtu, SCTP_DEFAULT_MINSEGMENT); /* Use default minimum segment size and disable * pmtu discovery on this transport. */ @@ -267,17 +266,13 @@ void sctp_transport_update_pmtu(struct sock *sk, struct sctp_transport *t, u32 p t->pathmtu = pmtu; } - dst = sctp_transport_dst_check(t); - if (!dst) - t->af_specific->get_dst(t, &t->saddr, &t->fl, sk); - if (dst) { - dst->ops->update_pmtu(dst, sk, NULL, pmtu); - + dst->ops->update_pmtu(dst, t->asoc->base.sk, NULL, pmtu); dst = sctp_transport_dst_check(t); - if (!dst) - t->af_specific->get_dst(t, &t->saddr, &t->fl, sk); } + + if (!dst) + t->af_specific->get_dst(t, &t->saddr, &t->fl, t->asoc->base.sk); } /* Caches the dst entry and source address for a transport's destination diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 8931e33b6541..2b720fa35c4f 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1635,6 +1635,7 @@ static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv, xprt = &svsk->sk_xprt; svc_xprt_init(net, &svc_tcp_bc_class, xprt, serv); + set_bit(XPT_CONG_CTRL, &svsk->sk_xprt.xpt_flags); serv->sv_bc_xprt = xprt; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index c13a5c35ce14..fc8f14c7bfec 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -127,6 +127,7 @@ static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *serv, xprt = &cma_xprt->sc_xprt; svc_xprt_init(net, &svc_rdma_bc_class, xprt, serv); + set_bit(XPT_CONG_CTRL, &xprt->xpt_flags); serv->sv_bc_xprt = xprt; dprintk("svcrdma: %s(%p)\n", __func__, xprt); diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 16b6b5988be9..570a2b67ca10 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -132,12 +132,10 @@ static int wiphy_resume(struct device *dev) /* Age scan results with time spent in suspend */ cfg80211_bss_age(rdev, get_seconds() - rdev->suspend_at); - if (rdev->ops->resume) { - rtnl_lock(); - if (rdev->wiphy.registered) - ret = rdev_resume(rdev); - rtnl_unlock(); - } + rtnl_lock(); + if (rdev->wiphy.registered && rdev->ops->resume) + ret = rdev_resume(rdev); + rtnl_unlock(); return ret; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 9705c279494b..40a8aa39220d 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -412,7 +412,14 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es up = nla_data(rp); ulen = xfrm_replay_state_esn_len(up); - if (nla_len(rp) < ulen || xfrm_replay_state_esn_len(replay_esn) != ulen) + /* Check the overall length and the internal bitmap length to avoid + * potential overflow. */ + if (nla_len(rp) < ulen || + xfrm_replay_state_esn_len(replay_esn) != ulen || + replay_esn->bmp_len != up->bmp_len) + return -EINVAL; + + if (up->replay_window > up->bmp_len * sizeof(__u32) * 8) return -EINVAL; return 0; |