diff options
Diffstat (limited to 'net')
-rw-r--r-- | net/bridge/netfilter/ebtables.c | 11 | ||||
-rw-r--r-- | net/ife/ife.c | 38 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 7 | ||||
-rw-r--r-- | net/ipv6/netfilter/Kconfig | 55 | ||||
-rw-r--r-- | net/ipv6/route.c | 2 | ||||
-rw-r--r-- | net/ipv6/seg6_iptunnel.c | 2 | ||||
-rw-r--r-- | net/l2tp/l2tp_debugfs.c | 5 | ||||
-rw-r--r-- | net/l2tp/l2tp_ppp.c | 12 | ||||
-rw-r--r-- | net/llc/af_llc.c | 21 | ||||
-rw-r--r-- | net/llc/llc_c_ac.c | 9 | ||||
-rw-r--r-- | net/llc/llc_conn.c | 22 | ||||
-rw-r--r-- | net/netfilter/Kconfig | 1 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_ctl.c | 8 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_sync.c | 155 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_expect.c | 5 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_extend.c | 2 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_sip.c | 16 | ||||
-rw-r--r-- | net/netfilter/nf_tables_api.c | 69 | ||||
-rw-r--r-- | net/netfilter/xt_connmark.c | 49 | ||||
-rw-r--r-- | net/packet/af_packet.c | 60 | ||||
-rw-r--r-- | net/packet/internal.h | 10 | ||||
-rw-r--r-- | net/sched/act_ife.c | 9 | ||||
-rw-r--r-- | net/strparser/strparser.c | 2 |
23 files changed, 347 insertions, 223 deletions
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 032e0fe45940..28a4c3490359 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1825,13 +1825,14 @@ static int compat_table_info(const struct ebt_table_info *info, { unsigned int size = info->entries_size; const void *entries = info->entries; - int ret; newinfo->entries_size = size; - - ret = xt_compat_init_offsets(NFPROTO_BRIDGE, info->nentries); - if (ret) - return ret; + if (info->nentries) { + int ret = xt_compat_init_offsets(NFPROTO_BRIDGE, + info->nentries); + if (ret) + return ret; + } return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info, entries, newinfo); diff --git a/net/ife/ife.c b/net/ife/ife.c index 7d1ec76e7f43..13bbf8cb6a39 100644 --- a/net/ife/ife.c +++ b/net/ife/ife.c @@ -69,6 +69,9 @@ void *ife_decode(struct sk_buff *skb, u16 *metalen) int total_pull; u16 ifehdrln; + if (!pskb_may_pull(skb, skb->dev->hard_header_len + IFE_METAHDRLEN)) + return NULL; + ifehdr = (struct ifeheadr *) (skb->data + skb->dev->hard_header_len); ifehdrln = ntohs(ifehdr->metalen); total_pull = skb->dev->hard_header_len + ifehdrln; @@ -92,12 +95,43 @@ struct meta_tlvhdr { __be16 len; }; +static bool __ife_tlv_meta_valid(const unsigned char *skbdata, + const unsigned char *ifehdr_end) +{ + const struct meta_tlvhdr *tlv; + u16 tlvlen; + + if (unlikely(skbdata + sizeof(*tlv) > ifehdr_end)) + return false; + + tlv = (const struct meta_tlvhdr *)skbdata; + tlvlen = ntohs(tlv->len); + + /* tlv length field is inc header, check on minimum */ + if (tlvlen < NLA_HDRLEN) + return false; + + /* overflow by NLA_ALIGN check */ + if (NLA_ALIGN(tlvlen) < tlvlen) + return false; + + if (unlikely(skbdata + NLA_ALIGN(tlvlen) > ifehdr_end)) + return false; + + return true; +} + /* Caller takes care of presenting data in network order */ -void *ife_tlv_meta_decode(void *skbdata, u16 *attrtype, u16 *dlen, u16 *totlen) +void *ife_tlv_meta_decode(void *skbdata, const void *ifehdr_end, u16 *attrtype, + u16 *dlen, u16 *totlen) { - struct meta_tlvhdr *tlv = (struct meta_tlvhdr *) skbdata; + struct meta_tlvhdr *tlv; + + if (!__ife_tlv_meta_valid(skbdata, ifehdr_end)) + return NULL; + tlv = (struct meta_tlvhdr *)skbdata; *dlen = ntohs(tlv->len) - NLA_HDRLEN; *attrtype = ntohs(tlv->type); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5a17cfc75326..8acbe5fd2098 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3889,11 +3889,8 @@ const u8 *tcp_parse_md5sig_option(const struct tcphdr *th) int length = (th->doff << 2) - sizeof(*th); const u8 *ptr = (const u8 *)(th + 1); - /* If the TCP option is too short, we can short cut */ - if (length < TCPOLEN_MD5SIG) - return NULL; - - while (length > 0) { + /* If not enough data remaining, we can short cut */ + while (length >= TCPOLEN_MD5SIG) { int opcode = *ptr++; int opsize; diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index ccbfa83e4bb0..ce77bcc2490c 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -48,6 +48,34 @@ config NFT_CHAIN_ROUTE_IPV6 fields such as the source, destination, flowlabel, hop-limit and the packet mark. +if NF_NAT_IPV6 + +config NFT_CHAIN_NAT_IPV6 + tristate "IPv6 nf_tables nat chain support" + help + This option enables the "nat" chain for IPv6 in nf_tables. This + chain type is used to perform Network Address Translation (NAT) + packet transformations such as the source, destination address and + source and destination ports. + +config NFT_MASQ_IPV6 + tristate "IPv6 masquerade support for nf_tables" + depends on NFT_MASQ + select NF_NAT_MASQUERADE_IPV6 + help + This is the expression that provides IPv4 masquerading support for + nf_tables. + +config NFT_REDIR_IPV6 + tristate "IPv6 redirect support for nf_tables" + depends on NFT_REDIR + select NF_NAT_REDIRECT + help + This is the expression that provides IPv4 redirect support for + nf_tables. + +endif # NF_NAT_IPV6 + config NFT_REJECT_IPV6 select NF_REJECT_IPV6 default NFT_REJECT @@ -107,39 +135,12 @@ config NF_NAT_IPV6 if NF_NAT_IPV6 -config NFT_CHAIN_NAT_IPV6 - depends on NF_TABLES_IPV6 - tristate "IPv6 nf_tables nat chain support" - help - This option enables the "nat" chain for IPv6 in nf_tables. This - chain type is used to perform Network Address Translation (NAT) - packet transformations such as the source, destination address and - source and destination ports. - config NF_NAT_MASQUERADE_IPV6 tristate "IPv6 masquerade support" help This is the kernel functionality to provide NAT in the masquerade flavour (automatic source address selection) for IPv6. -config NFT_MASQ_IPV6 - tristate "IPv6 masquerade support for nf_tables" - depends on NF_TABLES_IPV6 - depends on NFT_MASQ - select NF_NAT_MASQUERADE_IPV6 - help - This is the expression that provides IPv4 masquerading support for - nf_tables. - -config NFT_REDIR_IPV6 - tristate "IPv6 redirect support for nf_tables" - depends on NF_TABLES_IPV6 - depends on NFT_REDIR - select NF_NAT_REDIRECT - help - This is the expression that provides IPv4 redirect support for - nf_tables. - endif # NF_NAT_IPV6 config IP6_NF_IPTABLES diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 432c4bcc1111..7ee0a34fba46 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4053,6 +4053,7 @@ void rt6_mtu_change(struct net_device *dev, unsigned int mtu) static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) }, + [RTA_PREFSRC] = { .len = sizeof(struct in6_addr) }, [RTA_OIF] = { .type = NLA_U32 }, [RTA_IIF] = { .type = NLA_U32 }, [RTA_PRIORITY] = { .type = NLA_U32 }, @@ -4064,6 +4065,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_EXPIRES] = { .type = NLA_U32 }, [RTA_UID] = { .type = NLA_U32 }, [RTA_MARK] = { .type = NLA_U32 }, + [RTA_TABLE] = { .type = NLA_U32 }, }; static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index f343e6f0fc95..5fe139484919 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -136,7 +136,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) isrh->nexthdr = proto; hdr->daddr = isrh->segments[isrh->first_segment]; - set_tun_src(net, ip6_dst_idev(dst)->dev, &hdr->daddr, &hdr->saddr); + set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr); #ifdef CONFIG_IPV6_SEG6_HMAC if (sr_has_hmac(isrh)) { diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index b8f9d45bfeb1..7f1e842ef05a 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -106,8 +106,11 @@ static void l2tp_dfs_seq_stop(struct seq_file *p, void *v) return; /* Drop reference taken by last invocation of l2tp_dfs_next_tunnel() */ - if (pd->tunnel) + if (pd->tunnel) { l2tp_tunnel_dec_refcount(pd->tunnel); + pd->tunnel = NULL; + pd->session = NULL; + } } static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 7d0c963680e6..1fd9e145076a 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -619,6 +619,13 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, lock_sock(sk); error = -EINVAL; + + if (sockaddr_len != sizeof(struct sockaddr_pppol2tp) && + sockaddr_len != sizeof(struct sockaddr_pppol2tpv3) && + sockaddr_len != sizeof(struct sockaddr_pppol2tpin6) && + sockaddr_len != sizeof(struct sockaddr_pppol2tpv3in6)) + goto end; + if (sp->sa_protocol != PX_PROTO_OL2TP) goto end; @@ -1618,8 +1625,11 @@ static void pppol2tp_seq_stop(struct seq_file *p, void *v) return; /* Drop reference taken by last invocation of pppol2tp_next_tunnel() */ - if (pd->tunnel) + if (pd->tunnel) { l2tp_tunnel_dec_refcount(pd->tunnel); + pd->tunnel = NULL; + pd->session = NULL; + } } static void pppol2tp_seq_tunnel_show(struct seq_file *m, void *v) diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 6d29b2b94e84..cb80ebb38311 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -189,7 +189,6 @@ static int llc_ui_release(struct socket *sock) { struct sock *sk = sock->sk; struct llc_sock *llc; - struct llc_sap *sap; if (unlikely(sk == NULL)) goto out; @@ -200,15 +199,19 @@ static int llc_ui_release(struct socket *sock) llc->laddr.lsap, llc->daddr.lsap); if (!llc_send_disc(sk)) llc_ui_wait_for_disc(sk, sk->sk_rcvtimeo); - sap = llc->sap; - /* Hold this for release_sock(), so that llc_backlog_rcv() could still - * use it. - */ - llc_sap_hold(sap); - if (!sock_flag(sk, SOCK_ZAPPED)) + if (!sock_flag(sk, SOCK_ZAPPED)) { + struct llc_sap *sap = llc->sap; + + /* Hold this for release_sock(), so that llc_backlog_rcv() + * could still use it. + */ + llc_sap_hold(sap); llc_sap_remove_socket(llc->sap, sk); - release_sock(sk); - llc_sap_put(sap); + release_sock(sk); + llc_sap_put(sap); + } else { + release_sock(sk); + } if (llc->dev) dev_put(llc->dev); sock_put(sk); diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c index 163121192aca..4d78375f9872 100644 --- a/net/llc/llc_c_ac.c +++ b/net/llc/llc_c_ac.c @@ -1099,14 +1099,7 @@ int llc_conn_ac_inc_tx_win_size(struct sock *sk, struct sk_buff *skb) int llc_conn_ac_stop_all_timers(struct sock *sk, struct sk_buff *skb) { - struct llc_sock *llc = llc_sk(sk); - - del_timer(&llc->pf_cycle_timer.timer); - del_timer(&llc->ack_timer.timer); - del_timer(&llc->rej_sent_timer.timer); - del_timer(&llc->busy_state_timer.timer); - llc->ack_must_be_send = 0; - llc->ack_pf = 0; + llc_sk_stop_all_timers(sk, false); return 0; } diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 110e32bcb399..c0ac522b48a1 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -961,6 +961,26 @@ out: return sk; } +void llc_sk_stop_all_timers(struct sock *sk, bool sync) +{ + struct llc_sock *llc = llc_sk(sk); + + if (sync) { + del_timer_sync(&llc->pf_cycle_timer.timer); + del_timer_sync(&llc->ack_timer.timer); + del_timer_sync(&llc->rej_sent_timer.timer); + del_timer_sync(&llc->busy_state_timer.timer); + } else { + del_timer(&llc->pf_cycle_timer.timer); + del_timer(&llc->ack_timer.timer); + del_timer(&llc->rej_sent_timer.timer); + del_timer(&llc->busy_state_timer.timer); + } + + llc->ack_must_be_send = 0; + llc->ack_pf = 0; +} + /** * llc_sk_free - Frees a LLC socket * @sk - socket to free @@ -973,7 +993,7 @@ void llc_sk_free(struct sock *sk) llc->state = LLC_CONN_OUT_OF_SVC; /* Stop all (possibly) running timers */ - llc_conn_ac_stop_all_timers(sk, NULL); + llc_sk_stop_all_timers(sk, true); #ifdef DEBUG_LLC_CONN_ALLOC printk(KERN_INFO "%s: unackq=%d, txq=%d\n", __func__, skb_queue_len(&llc->pdu_unack_q), diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 704b3832dbad..44d8a55e9721 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -594,6 +594,7 @@ config NFT_QUOTA config NFT_REJECT default m if NETFILTER_ADVANCED=n tristate "Netfilter nf_tables reject support" + depends on !NF_TABLES_INET || (IPV6!=m || m) help This option adds the "reject" expression that you can use to explicitly deny and notify via TCP reset/ICMP informational errors diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 5ebde4b15810..f36098887ad0 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2384,11 +2384,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) strlcpy(cfg.mcast_ifn, dm->mcast_ifn, sizeof(cfg.mcast_ifn)); cfg.syncid = dm->syncid; - rtnl_lock(); - mutex_lock(&ipvs->sync_mutex); ret = start_sync_thread(ipvs, &cfg, dm->state); - mutex_unlock(&ipvs->sync_mutex); - rtnl_unlock(); } else { mutex_lock(&ipvs->sync_mutex); ret = stop_sync_thread(ipvs, dm->state); @@ -3481,12 +3477,8 @@ static int ip_vs_genl_new_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs) if (ipvs->mixed_address_family_dests > 0) return -EINVAL; - rtnl_lock(); - mutex_lock(&ipvs->sync_mutex); ret = start_sync_thread(ipvs, &c, nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); - mutex_unlock(&ipvs->sync_mutex); - rtnl_unlock(); return ret; } diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index fbaf3bd05b2e..001501e25625 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -49,6 +49,7 @@ #include <linux/kthread.h> #include <linux/wait.h> #include <linux/kernel.h> +#include <linux/sched/signal.h> #include <asm/unaligned.h> /* Used for ntoh_seq and hton_seq */ @@ -1360,15 +1361,9 @@ static void set_mcast_pmtudisc(struct sock *sk, int val) /* * Specifiy default interface for outgoing multicasts */ -static int set_mcast_if(struct sock *sk, char *ifname) +static int set_mcast_if(struct sock *sk, struct net_device *dev) { - struct net_device *dev; struct inet_sock *inet = inet_sk(sk); - struct net *net = sock_net(sk); - - dev = __dev_get_by_name(net, ifname); - if (!dev) - return -ENODEV; if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) return -EINVAL; @@ -1396,19 +1391,14 @@ static int set_mcast_if(struct sock *sk, char *ifname) * in the in_addr structure passed in as a parameter. */ static int -join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) +join_mcast_group(struct sock *sk, struct in_addr *addr, struct net_device *dev) { - struct net *net = sock_net(sk); struct ip_mreqn mreq; - struct net_device *dev; int ret; memset(&mreq, 0, sizeof(mreq)); memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr)); - dev = __dev_get_by_name(net, ifname); - if (!dev) - return -ENODEV; if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) return -EINVAL; @@ -1423,15 +1413,10 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) #ifdef CONFIG_IP_VS_IPV6 static int join_mcast_group6(struct sock *sk, struct in6_addr *addr, - char *ifname) + struct net_device *dev) { - struct net *net = sock_net(sk); - struct net_device *dev; int ret; - dev = __dev_get_by_name(net, ifname); - if (!dev) - return -ENODEV; if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) return -EINVAL; @@ -1443,24 +1428,18 @@ static int join_mcast_group6(struct sock *sk, struct in6_addr *addr, } #endif -static int bind_mcastif_addr(struct socket *sock, char *ifname) +static int bind_mcastif_addr(struct socket *sock, struct net_device *dev) { - struct net *net = sock_net(sock->sk); - struct net_device *dev; __be32 addr; struct sockaddr_in sin; - dev = __dev_get_by_name(net, ifname); - if (!dev) - return -ENODEV; - addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); if (!addr) pr_err("You probably need to specify IP address on " "multicast interface.\n"); IP_VS_DBG(7, "binding socket with (%s) %pI4\n", - ifname, &addr); + dev->name, &addr); /* Now bind the socket with the address of multicast interface */ sin.sin_family = AF_INET; @@ -1493,7 +1472,8 @@ static void get_mcast_sockaddr(union ipvs_sockaddr *sa, int *salen, /* * Set up sending multicast socket over UDP */ -static struct socket *make_send_sock(struct netns_ipvs *ipvs, int id) +static int make_send_sock(struct netns_ipvs *ipvs, int id, + struct net_device *dev, struct socket **sock_ret) { /* multicast addr */ union ipvs_sockaddr mcast_addr; @@ -1505,9 +1485,10 @@ static struct socket *make_send_sock(struct netns_ipvs *ipvs, int id) IPPROTO_UDP, &sock); if (result < 0) { pr_err("Error during creation of socket; terminating\n"); - return ERR_PTR(result); + goto error; } - result = set_mcast_if(sock->sk, ipvs->mcfg.mcast_ifn); + *sock_ret = sock; + result = set_mcast_if(sock->sk, dev); if (result < 0) { pr_err("Error setting outbound mcast interface\n"); goto error; @@ -1522,7 +1503,7 @@ static struct socket *make_send_sock(struct netns_ipvs *ipvs, int id) set_sock_size(sock->sk, 1, result); if (AF_INET == ipvs->mcfg.mcast_af) - result = bind_mcastif_addr(sock, ipvs->mcfg.mcast_ifn); + result = bind_mcastif_addr(sock, dev); else result = 0; if (result < 0) { @@ -1538,19 +1519,18 @@ static struct socket *make_send_sock(struct netns_ipvs *ipvs, int id) goto error; } - return sock; + return 0; error: - sock_release(sock); - return ERR_PTR(result); + return result; } /* * Set up receiving multicast socket over UDP */ -static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id, - int ifindex) +static int make_receive_sock(struct netns_ipvs *ipvs, int id, + struct net_device *dev, struct socket **sock_ret) { /* multicast addr */ union ipvs_sockaddr mcast_addr; @@ -1562,8 +1542,9 @@ static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id, IPPROTO_UDP, &sock); if (result < 0) { pr_err("Error during creation of socket; terminating\n"); - return ERR_PTR(result); + goto error; } + *sock_ret = sock; /* it is equivalent to the REUSEADDR option in user-space */ sock->sk->sk_reuse = SK_CAN_REUSE; result = sysctl_sync_sock_size(ipvs); @@ -1571,7 +1552,7 @@ static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id, set_sock_size(sock->sk, 0, result); get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->bcfg, id); - sock->sk->sk_bound_dev_if = ifindex; + sock->sk->sk_bound_dev_if = dev->ifindex; result = sock->ops->bind(sock, (struct sockaddr *)&mcast_addr, salen); if (result < 0) { pr_err("Error binding to the multicast addr\n"); @@ -1582,21 +1563,20 @@ static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id, #ifdef CONFIG_IP_VS_IPV6 if (ipvs->bcfg.mcast_af == AF_INET6) result = join_mcast_group6(sock->sk, &mcast_addr.in6.sin6_addr, - ipvs->bcfg.mcast_ifn); + dev); else #endif result = join_mcast_group(sock->sk, &mcast_addr.in.sin_addr, - ipvs->bcfg.mcast_ifn); + dev); if (result < 0) { pr_err("Error joining to the multicast group\n"); goto error; } - return sock; + return 0; error: - sock_release(sock); - return ERR_PTR(result); + return result; } @@ -1778,13 +1758,12 @@ static int sync_thread_backup(void *data) int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, int state) { - struct ip_vs_sync_thread_data *tinfo; + struct ip_vs_sync_thread_data *tinfo = NULL; struct task_struct **array = NULL, *task; - struct socket *sock; struct net_device *dev; char *name; int (*threadfn)(void *data); - int id, count, hlen; + int id = 0, count, hlen; int result = -ENOMEM; u16 mtu, min_mtu; @@ -1792,6 +1771,18 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %zd bytes\n", sizeof(struct ip_vs_sync_conn_v0)); + /* Do not hold one mutex and then to block on another */ + for (;;) { + rtnl_lock(); + if (mutex_trylock(&ipvs->sync_mutex)) + break; + rtnl_unlock(); + mutex_lock(&ipvs->sync_mutex); + if (rtnl_trylock()) + break; + mutex_unlock(&ipvs->sync_mutex); + } + if (!ipvs->sync_state) { count = clamp(sysctl_sync_ports(ipvs), 1, IPVS_SYNC_PORTS_MAX); ipvs->threads_mask = count - 1; @@ -1810,7 +1801,8 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, dev = __dev_get_by_name(ipvs->net, c->mcast_ifn); if (!dev) { pr_err("Unknown mcast interface: %s\n", c->mcast_ifn); - return -ENODEV; + result = -ENODEV; + goto out_early; } hlen = (AF_INET6 == c->mcast_af) ? sizeof(struct ipv6hdr) + sizeof(struct udphdr) : @@ -1827,26 +1819,30 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, c->sync_maxlen = mtu - hlen; if (state == IP_VS_STATE_MASTER) { + result = -EEXIST; if (ipvs->ms) - return -EEXIST; + goto out_early; ipvs->mcfg = *c; name = "ipvs-m:%d:%d"; threadfn = sync_thread_master; } else if (state == IP_VS_STATE_BACKUP) { + result = -EEXIST; if (ipvs->backup_threads) - return -EEXIST; + goto out_early; ipvs->bcfg = *c; name = "ipvs-b:%d:%d"; threadfn = sync_thread_backup; } else { - return -EINVAL; + result = -EINVAL; + goto out_early; } if (state == IP_VS_STATE_MASTER) { struct ipvs_master_sync_state *ms; + result = -ENOMEM; ipvs->ms = kcalloc(count, sizeof(ipvs->ms[0]), GFP_KERNEL); if (!ipvs->ms) goto out; @@ -1862,39 +1858,38 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, } else { array = kcalloc(count, sizeof(struct task_struct *), GFP_KERNEL); + result = -ENOMEM; if (!array) goto out; } - tinfo = NULL; for (id = 0; id < count; id++) { - if (state == IP_VS_STATE_MASTER) - sock = make_send_sock(ipvs, id); - else - sock = make_receive_sock(ipvs, id, dev->ifindex); - if (IS_ERR(sock)) { - result = PTR_ERR(sock); - goto outtinfo; - } + result = -ENOMEM; tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL); if (!tinfo) - goto outsocket; + goto out; tinfo->ipvs = ipvs; - tinfo->sock = sock; + tinfo->sock = NULL; if (state == IP_VS_STATE_BACKUP) { tinfo->buf = kmalloc(ipvs->bcfg.sync_maxlen, GFP_KERNEL); if (!tinfo->buf) - goto outtinfo; + goto out; } else { tinfo->buf = NULL; } tinfo->id = id; + if (state == IP_VS_STATE_MASTER) + result = make_send_sock(ipvs, id, dev, &tinfo->sock); + else + result = make_receive_sock(ipvs, id, dev, &tinfo->sock); + if (result < 0) + goto out; task = kthread_run(threadfn, tinfo, name, ipvs->gen, id); if (IS_ERR(task)) { result = PTR_ERR(task); - goto outtinfo; + goto out; } tinfo = NULL; if (state == IP_VS_STATE_MASTER) @@ -1911,20 +1906,20 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, ipvs->sync_state |= state; spin_unlock_bh(&ipvs->sync_buff_lock); + mutex_unlock(&ipvs->sync_mutex); + rtnl_unlock(); + /* increase the module use count */ ip_vs_use_count_inc(); return 0; -outsocket: - sock_release(sock); - -outtinfo: - if (tinfo) { - sock_release(tinfo->sock); - kfree(tinfo->buf); - kfree(tinfo); - } +out: + /* We do not need RTNL lock anymore, release it here so that + * sock_release below and in the kthreads can use rtnl_lock + * to leave the mcast group. + */ + rtnl_unlock(); count = id; while (count-- > 0) { if (state == IP_VS_STATE_MASTER) @@ -1932,13 +1927,23 @@ outtinfo: else kthread_stop(array[count]); } - kfree(array); - -out: if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) { kfree(ipvs->ms); ipvs->ms = NULL; } + mutex_unlock(&ipvs->sync_mutex); + if (tinfo) { + if (tinfo->sock) + sock_release(tinfo->sock); + kfree(tinfo->buf); + kfree(tinfo); + } + kfree(array); + return result; + +out_early: + mutex_unlock(&ipvs->sync_mutex); + rtnl_unlock(); return result; } diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 8ef21d9f9a00..4b2b3d53acfc 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -252,7 +252,7 @@ static inline int expect_clash(const struct nf_conntrack_expect *a, static inline int expect_matches(const struct nf_conntrack_expect *a, const struct nf_conntrack_expect *b) { - return a->master == b->master && a->class == b->class && + return a->master == b->master && nf_ct_tuple_equal(&a->tuple, &b->tuple) && nf_ct_tuple_mask_equal(&a->mask, &b->mask) && net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) && @@ -421,6 +421,9 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) h = nf_ct_expect_dst_hash(net, &expect->tuple); hlist_for_each_entry_safe(i, next, &nf_ct_expect_hash[h], hnode) { if (expect_matches(i, expect)) { + if (i->class != expect->class) + return -EALREADY; + if (nf_ct_remove_expect(i)) break; } else if (expect_clash(i, expect)) { diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 9fe0ddc333fb..277bbfe26478 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -9,6 +9,7 @@ * 2 of the License, or (at your option) any later version. */ #include <linux/kernel.h> +#include <linux/kmemleak.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/rcupdate.h> @@ -71,6 +72,7 @@ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) rcu_read_unlock(); alloc = max(newlen, NF_CT_EXT_PREALLOC); + kmemleak_not_leak(old); new = __krealloc(old, alloc, gfp); if (!new) return NULL; diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 4dbb5bad4363..908e51e2dc2b 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -938,11 +938,19 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, datalen, rtp_exp, rtcp_exp, mediaoff, medialen, daddr); else { - if (nf_ct_expect_related(rtp_exp) == 0) { - if (nf_ct_expect_related(rtcp_exp) != 0) - nf_ct_unexpect_related(rtp_exp); - else + /* -EALREADY handling works around end-points that send + * SDP messages with identical port but different media type, + * we pretend expectation was set up. + */ + int errp = nf_ct_expect_related(rtp_exp); + + if (errp == 0 || errp == -EALREADY) { + int errcp = nf_ct_expect_related(rtcp_exp); + + if (errcp == 0 || errcp == -EALREADY) ret = NF_ACCEPT; + else if (errp == 0) + nf_ct_unexpect_related(rtp_exp); } } nf_ct_expect_put(rtcp_exp); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 9134cc429ad4..04d4e3772584 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2361,41 +2361,46 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, } if (nlh->nlmsg_flags & NLM_F_REPLACE) { - if (nft_is_active_next(net, old_rule)) { - trans = nft_trans_rule_add(&ctx, NFT_MSG_DELRULE, - old_rule); - if (trans == NULL) { - err = -ENOMEM; - goto err2; - } - nft_deactivate_next(net, old_rule); - chain->use--; - list_add_tail_rcu(&rule->list, &old_rule->list); - } else { + if (!nft_is_active_next(net, old_rule)) { err = -ENOENT; goto err2; } - } else if (nlh->nlmsg_flags & NLM_F_APPEND) - if (old_rule) - list_add_rcu(&rule->list, &old_rule->list); - else - list_add_tail_rcu(&rule->list, &chain->rules); - else { - if (old_rule) - list_add_tail_rcu(&rule->list, &old_rule->list); - else - list_add_rcu(&rule->list, &chain->rules); - } + trans = nft_trans_rule_add(&ctx, NFT_MSG_DELRULE, + old_rule); + if (trans == NULL) { + err = -ENOMEM; + goto err2; + } + nft_deactivate_next(net, old_rule); + chain->use--; - if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) { - err = -ENOMEM; - goto err3; + if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) { + err = -ENOMEM; + goto err2; + } + + list_add_tail_rcu(&rule->list, &old_rule->list); + } else { + if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) { + err = -ENOMEM; + goto err2; + } + + if (nlh->nlmsg_flags & NLM_F_APPEND) { + if (old_rule) + list_add_rcu(&rule->list, &old_rule->list); + else + list_add_tail_rcu(&rule->list, &chain->rules); + } else { + if (old_rule) + list_add_tail_rcu(&rule->list, &old_rule->list); + else + list_add_rcu(&rule->list, &chain->rules); + } } chain->use++; return 0; -err3: - list_del_rcu(&rule->list); err2: nf_tables_rule_destroy(&ctx, rule); err1: @@ -3207,18 +3212,20 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, err = ops->init(set, &desc, nla); if (err < 0) - goto err2; + goto err3; err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set); if (err < 0) - goto err3; + goto err4; list_add_tail_rcu(&set->list, &table->sets); table->use++; return 0; -err3: +err4: ops->destroy(set); +err3: + kfree(set->name); err2: kvfree(set); err1: @@ -5738,7 +5745,7 @@ static void nft_chain_commit_update(struct nft_trans *trans) struct nft_base_chain *basechain; if (nft_trans_chain_name(trans)) - strcpy(trans->ctx.chain->name, nft_trans_chain_name(trans)); + swap(trans->ctx.chain->name, nft_trans_chain_name(trans)); if (!nft_is_base_chain(trans->ctx.chain)) return; diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index 773da82190dc..94df000abb92 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -36,11 +36,10 @@ MODULE_ALIAS("ipt_connmark"); MODULE_ALIAS("ip6t_connmark"); static unsigned int -connmark_tg_shift(struct sk_buff *skb, - const struct xt_connmark_tginfo1 *info, - u8 shift_bits, u8 shift_dir) +connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo2 *info) { enum ip_conntrack_info ctinfo; + u_int32_t new_targetmark; struct nf_conn *ct; u_int32_t newmark; @@ -51,34 +50,39 @@ connmark_tg_shift(struct sk_buff *skb, switch (info->mode) { case XT_CONNMARK_SET: newmark = (ct->mark & ~info->ctmask) ^ info->ctmark; - if (shift_dir == D_SHIFT_RIGHT) - newmark >>= shift_bits; + if (info->shift_dir == D_SHIFT_RIGHT) + newmark >>= info->shift_bits; else - newmark <<= shift_bits; + newmark <<= info->shift_bits; + if (ct->mark != newmark) { ct->mark = newmark; nf_conntrack_event_cache(IPCT_MARK, ct); } break; case XT_CONNMARK_SAVE: - newmark = (ct->mark & ~info->ctmask) ^ - (skb->mark & info->nfmask); - if (shift_dir == D_SHIFT_RIGHT) - newmark >>= shift_bits; + new_targetmark = (skb->mark & info->nfmask); + if (info->shift_dir == D_SHIFT_RIGHT) + new_targetmark >>= info->shift_bits; else - newmark <<= shift_bits; + new_targetmark <<= info->shift_bits; + + newmark = (ct->mark & ~info->ctmask) ^ + new_targetmark; if (ct->mark != newmark) { ct->mark = newmark; nf_conntrack_event_cache(IPCT_MARK, ct); } break; case XT_CONNMARK_RESTORE: - newmark = (skb->mark & ~info->nfmask) ^ - (ct->mark & info->ctmask); - if (shift_dir == D_SHIFT_RIGHT) - newmark >>= shift_bits; + new_targetmark = (ct->mark & info->ctmask); + if (info->shift_dir == D_SHIFT_RIGHT) + new_targetmark >>= info->shift_bits; else - newmark <<= shift_bits; + new_targetmark <<= info->shift_bits; + + newmark = (skb->mark & ~info->nfmask) ^ + new_targetmark; skb->mark = newmark; break; } @@ -89,8 +93,14 @@ static unsigned int connmark_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_connmark_tginfo1 *info = par->targinfo; - - return connmark_tg_shift(skb, info, 0, 0); + const struct xt_connmark_tginfo2 info2 = { + .ctmark = info->ctmark, + .ctmask = info->ctmask, + .nfmask = info->nfmask, + .mode = info->mode, + }; + + return connmark_tg_shift(skb, &info2); } static unsigned int @@ -98,8 +108,7 @@ connmark_tg_v2(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_connmark_tginfo2 *info = par->targinfo; - return connmark_tg_shift(skb, (const struct xt_connmark_tginfo1 *)info, - info->shift_bits, info->shift_dir); + return connmark_tg_shift(skb, info); } static int connmark_tg_check(const struct xt_tgchk_param *par) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c31b0687396a..01f3515cada0 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -329,11 +329,11 @@ static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb) skb_set_queue_mapping(skb, queue_index); } -/* register_prot_hook must be invoked with the po->bind_lock held, +/* __register_prot_hook must be invoked through register_prot_hook * or from a context in which asynchronous accesses to the packet * socket is not possible (packet_create()). */ -static void register_prot_hook(struct sock *sk) +static void __register_prot_hook(struct sock *sk) { struct packet_sock *po = pkt_sk(sk); @@ -348,8 +348,13 @@ static void register_prot_hook(struct sock *sk) } } -/* {,__}unregister_prot_hook() must be invoked with the po->bind_lock - * held. If the sync parameter is true, we will temporarily drop +static void register_prot_hook(struct sock *sk) +{ + lockdep_assert_held_once(&pkt_sk(sk)->bind_lock); + __register_prot_hook(sk); +} + +/* If the sync parameter is true, we will temporarily drop * the po->bind_lock and do a synchronize_net to make sure no * asynchronous packet processing paths still refer to the elements * of po->prot_hook. If the sync parameter is false, it is the @@ -359,6 +364,8 @@ static void __unregister_prot_hook(struct sock *sk, bool sync) { struct packet_sock *po = pkt_sk(sk); + lockdep_assert_held_once(&po->bind_lock); + po->running = 0; if (po->fanout) @@ -3252,7 +3259,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, if (proto) { po->prot_hook.type = proto; - register_prot_hook(sk); + __register_prot_hook(sk); } mutex_lock(&net->packet.sklist_lock); @@ -3732,12 +3739,18 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (optlen != sizeof(val)) return -EINVAL; - if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) - return -EBUSY; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; - po->tp_loss = !!val; - return 0; + + lock_sock(sk); + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { + ret = -EBUSY; + } else { + po->tp_loss = !!val; + ret = 0; + } + release_sock(sk); + return ret; } case PACKET_AUXDATA: { @@ -3748,7 +3761,9 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; + lock_sock(sk); po->auxdata = !!val; + release_sock(sk); return 0; } case PACKET_ORIGDEV: @@ -3760,7 +3775,9 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; + lock_sock(sk); po->origdev = !!val; + release_sock(sk); return 0; } case PACKET_VNET_HDR: @@ -3769,15 +3786,20 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (sock->type != SOCK_RAW) return -EINVAL; - if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) - return -EBUSY; if (optlen < sizeof(val)) return -EINVAL; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; - po->has_vnet_hdr = !!val; - return 0; + lock_sock(sk); + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { + ret = -EBUSY; + } else { + po->has_vnet_hdr = !!val; + ret = 0; + } + release_sock(sk); + return ret; } case PACKET_TIMESTAMP: { @@ -3815,11 +3837,17 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (optlen != sizeof(val)) return -EINVAL; - if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) - return -EBUSY; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; - po->tp_tx_has_off = !!val; + + lock_sock(sk); + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { + ret = -EBUSY; + } else { + po->tp_tx_has_off = !!val; + ret = 0; + } + release_sock(sk); return 0; } case PACKET_QDISC_BYPASS: diff --git a/net/packet/internal.h b/net/packet/internal.h index a1d2b2319ae9..3bb7c5fb3bff 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -112,10 +112,12 @@ struct packet_sock { int copy_thresh; spinlock_t bind_lock; struct mutex pg_vec_lock; - unsigned int running:1, /* prot_hook is attached*/ - auxdata:1, + unsigned int running; /* bind_lock must be held */ + unsigned int auxdata:1, /* writer must hold sock lock */ origdev:1, - has_vnet_hdr:1; + has_vnet_hdr:1, + tp_loss:1, + tp_tx_has_off:1; int pressure; int ifindex; /* bound device */ __be16 num; @@ -125,8 +127,6 @@ struct packet_sock { enum tpacket_versions tp_version; unsigned int tp_hdrlen; unsigned int tp_reserve; - unsigned int tp_loss:1; - unsigned int tp_tx_has_off:1; unsigned int tp_tstamp; struct net_device __rcu *cached_dev; int (*xmit)(struct sk_buff *skb); diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index a5994cf0512b..8527cfdc446d 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -652,7 +652,7 @@ static int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_info *ife, } } - return 0; + return -ENOENT; } static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a, @@ -682,7 +682,12 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a, u16 mtype; u16 dlen; - curr_data = ife_tlv_meta_decode(tlv_data, &mtype, &dlen, NULL); + curr_data = ife_tlv_meta_decode(tlv_data, ifehdr_end, &mtype, + &dlen, NULL); + if (!curr_data) { + qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats)); + return TC_ACT_SHOT; + } if (find_decode_metaid(skb, ife, mtype, dlen, curr_data)) { /* abuse overlimits to count when we receive metadata diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index 805b139756db..092bebc70048 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -67,7 +67,7 @@ static void strp_abort_strp(struct strparser *strp, int err) static void strp_start_timer(struct strparser *strp, long timeo) { - if (timeo) + if (timeo && timeo != LONG_MAX) mod_delayed_work(strp_wq, &strp->msg_timer_work, timeo); } |