summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/batman-adv/hard-interface.c1
-rw-r--r--net/batman-adv/tp_meter.c1
-rw-r--r--net/bluetooth/6lowpan.c4
-rw-r--r--net/bluetooth/hci_conn.c26
-rw-r--r--net/bluetooth/l2cap_core.c2
-rw-r--r--net/bluetooth/rfcomm/tty.c2
-rw-r--r--net/bluetooth/sco.c2
-rw-r--r--net/can/bcm.c50
-rw-r--r--net/ceph/ceph_fs.c3
-rw-r--r--net/ceph/osd_client.c1
-rw-r--r--net/core/dev.c19
-rw-r--r--net/core/ethtool.c1
-rw-r--r--net/core/filter.c68
-rw-r--r--net/core/flow.c6
-rw-r--r--net/core/flow_dissector.c13
-rw-r--r--net/core/net_namespace.c2
-rw-r--r--net/core/rtnetlink.c29
-rw-r--r--net/core/sock.c10
-rw-r--r--net/dccp/ipv4.c28
-rw-r--r--net/dccp/ipv6.c19
-rw-r--r--net/dccp/proto.c4
-rw-r--r--net/dsa/dsa.c13
-rw-r--r--net/dsa/dsa2.c4
-rw-r--r--net/dsa/slave.c19
-rw-r--r--net/ipv4/Kconfig1
-rw-r--r--net/ipv4/af_inet.c11
-rw-r--r--net/ipv4/esp4.c2
-rw-r--r--net/ipv4/fib_frontend.c20
-rw-r--r--net/ipv4/fib_trie.c90
-rw-r--r--net/ipv4/icmp.c4
-rw-r--r--net/ipv4/igmp.c50
-rw-r--r--net/ipv4/ip_forward.c2
-rw-r--r--net/ipv4/ip_output.c27
-rw-r--r--net/ipv4/ip_tunnel_core.c11
-rw-r--r--net/ipv4/ipmr.c2
-rw-r--r--net/ipv4/netfilter.c5
-rw-r--r--net/ipv4/netfilter/arp_tables.c4
-rw-r--r--net/ipv4/netfilter/nft_dup_ipv4.c6
-rw-r--r--net/ipv4/route.c4
-rw-r--r--net/ipv4/tcp.c4
-rw-r--r--net/ipv4/tcp_cong.c4
-rw-r--r--net/ipv4/tcp_dctcp.c13
-rw-r--r--net/ipv4/tcp_ipv4.c19
-rw-r--r--net/ipv4/udp.c8
-rw-r--r--net/ipv4/udp_impl.h2
-rw-r--r--net/ipv4/udplite.c2
-rw-r--r--net/ipv6/addrconf.c18
-rw-r--r--net/ipv6/datagram.c4
-rw-r--r--net/ipv6/esp6.c2
-rw-r--r--net/ipv6/icmp.c6
-rw-r--r--net/ipv6/ip6_offload.c2
-rw-r--r--net/ipv6/ip6_output.c2
-rw-r--r--net/ipv6/ip6_tunnel.c14
-rw-r--r--net/ipv6/ip6_udp_tunnel.c3
-rw-r--r--net/ipv6/ip6_vti.c31
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c4
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c2
-rw-r--r--net/ipv6/netfilter/nf_reject_ipv6.c1
-rw-r--r--net/ipv6/netfilter/nft_dup_ipv6.c6
-rw-r--r--net/ipv6/output_core.c2
-rw-r--r--net/ipv6/route.c4
-rw-r--r--net/ipv6/tcp_ipv6.c14
-rw-r--r--net/ipv6/udp.c8
-rw-r--r--net/ipv6/udp_impl.h2
-rw-r--r--net/ipv6/udplite.c2
-rw-r--r--net/l2tp/l2tp_ip.c66
-rw-r--r--net/l2tp/l2tp_ip6.c84
-rw-r--r--net/mac80211/sta_info.c2
-rw-r--r--net/mac80211/tx.c14
-rw-r--r--net/mac80211/vht.c16
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c7
-rw-r--r--net/netfilter/nf_conntrack_core.c49
-rw-r--r--net/netfilter/nf_conntrack_helper.c11
-rw-r--r--net/netfilter/nf_conntrack_sip.c5
-rw-r--r--net/netfilter/nf_nat_core.c49
-rw-r--r--net/netfilter/nf_tables_api.c32
-rw-r--r--net/netfilter/nft_dynset.c19
-rw-r--r--net/netfilter/nft_hash.c7
-rw-r--r--net/netfilter/nft_range.c6
-rw-r--r--net/netfilter/nft_set_hash.c19
-rw-r--r--net/netfilter/nft_set_rbtree.c2
-rw-r--r--net/netfilter/xt_connmark.c4
-rw-r--r--net/netlink/af_netlink.c27
-rw-r--r--net/netlink/af_netlink.h2
-rw-r--r--net/netlink/diag.c5
-rw-r--r--net/netlink/genetlink.c4
-rw-r--r--net/openvswitch/conntrack.c5
-rw-r--r--net/packet/af_packet.c18
-rw-r--r--net/rds/tcp.c2
-rw-r--r--net/sched/act_pedit.c24
-rw-r--r--net/sched/cls_api.c3
-rw-r--r--net/sched/cls_basic.c4
-rw-r--r--net/sched/cls_bpf.c4
-rw-r--r--net/sched/cls_cgroup.c7
-rw-r--r--net/sched/cls_flow.c1
-rw-r--r--net/sched/cls_flower.c41
-rw-r--r--net/sched/cls_matchall.c1
-rw-r--r--net/sched/cls_rsvp.h3
-rw-r--r--net/sched/cls_tcindex.c1
-rw-r--r--net/sctp/input.c35
-rw-r--r--net/sctp/ipv6.c2
-rw-r--r--net/sctp/socket.c27
-rw-r--r--net/socket.c17
-rw-r--r--net/sunrpc/clnt.c7
-rw-r--r--net/sunrpc/svc_xprt.c11
-rw-r--r--net/sunrpc/svcsock.c21
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c37
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c6
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h3
-rw-r--r--net/tipc/bearer.c11
-rw-r--r--net/tipc/bearer.h13
-rw-r--r--net/tipc/link.c40
-rw-r--r--net/tipc/monitor.c10
-rw-r--r--net/tipc/socket.c50
-rw-r--r--net/tipc/udp_media.c5
-rw-r--r--net/unix/af_unix.c20
-rw-r--r--net/wireless/core.h1
-rw-r--r--net/wireless/scan.c69
-rw-r--r--net/wireless/util.c3
-rw-r--r--net/xfrm/xfrm_policy.c10
-rw-r--r--net/xfrm/xfrm_user.c2
122 files changed, 1147 insertions, 544 deletions
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index e034afbd1bb0..08ce36147c4c 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -652,6 +652,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
batadv_softif_destroy_sysfs(hard_iface->soft_iface);
}
+ hard_iface->soft_iface = NULL;
batadv_hardif_put(hard_iface);
out:
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index 2333777f919d..8af1611b8ab2 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -837,6 +837,7 @@ static int batadv_tp_send(void *arg)
primary_if = batadv_primary_if_get_selected(bat_priv);
if (unlikely(!primary_if)) {
err = BATADV_TP_REASON_DST_UNREACHABLE;
+ tp_vars->reason = err;
goto out;
}
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index d020299baba4..1904a93f47d5 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -1090,7 +1090,6 @@ static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type,
{
struct hci_conn *hcon;
struct hci_dev *hdev;
- bdaddr_t *src = BDADDR_ANY;
int n;
n = sscanf(buf, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx %hhu",
@@ -1101,7 +1100,8 @@ static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type,
if (n < 7)
return -EINVAL;
- hdev = hci_get_route(addr, src);
+ /* The LE_PUBLIC address type is ignored because of BDADDR_ANY */
+ hdev = hci_get_route(addr, BDADDR_ANY, BDADDR_LE_PUBLIC);
if (!hdev)
return -ENOENT;
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 3809617aa98d..dc59eae54717 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -613,7 +613,7 @@ int hci_conn_del(struct hci_conn *conn)
return 0;
}
-struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src)
+struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, uint8_t src_type)
{
int use_src = bacmp(src, BDADDR_ANY);
struct hci_dev *hdev = NULL, *d;
@@ -634,7 +634,29 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src)
*/
if (use_src) {
- if (!bacmp(&d->bdaddr, src)) {
+ bdaddr_t id_addr;
+ u8 id_addr_type;
+
+ if (src_type == BDADDR_BREDR) {
+ if (!lmp_bredr_capable(d))
+ continue;
+ bacpy(&id_addr, &d->bdaddr);
+ id_addr_type = BDADDR_BREDR;
+ } else {
+ if (!lmp_le_capable(d))
+ continue;
+
+ hci_copy_identity_address(d, &id_addr,
+ &id_addr_type);
+
+ /* Convert from HCI to three-value type */
+ if (id_addr_type == ADDR_LE_DEV_PUBLIC)
+ id_addr_type = BDADDR_LE_PUBLIC;
+ else
+ id_addr_type = BDADDR_LE_RANDOM;
+ }
+
+ if (!bacmp(&id_addr, src) && id_addr_type == src_type) {
hdev = d; break;
}
} else {
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index d4cad29b033f..577f1c01454a 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -7060,7 +7060,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
BT_DBG("%pMR -> %pMR (type %u) psm 0x%2.2x", &chan->src, dst,
dst_type, __le16_to_cpu(psm));
- hdev = hci_get_route(dst, &chan->src);
+ hdev = hci_get_route(dst, &chan->src, chan->src_type);
if (!hdev)
return -EHOSTUNREACH;
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 8e385a0ae60e..2f2cb5e27cdd 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -178,7 +178,7 @@ static void rfcomm_reparent_device(struct rfcomm_dev *dev)
struct hci_dev *hdev;
struct hci_conn *conn;
- hdev = hci_get_route(&dev->dst, &dev->src);
+ hdev = hci_get_route(&dev->dst, &dev->src, BDADDR_BREDR);
if (!hdev)
return;
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index f52bcbf2e58c..3125ce670c2f 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -219,7 +219,7 @@ static int sco_connect(struct sock *sk)
BT_DBG("%pMR -> %pMR", &sco_pi(sk)->src, &sco_pi(sk)->dst);
- hdev = hci_get_route(&sco_pi(sk)->dst, &sco_pi(sk)->src);
+ hdev = hci_get_route(&sco_pi(sk)->dst, &sco_pi(sk)->src, BDADDR_BREDR);
if (!hdev)
return -EHOSTUNREACH;
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 8e999ffdf28b..436a7537e6a9 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -77,7 +77,7 @@
(CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG) : \
(CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG))
-#define CAN_BCM_VERSION "20160617"
+#define CAN_BCM_VERSION "20161123"
MODULE_DESCRIPTION("PF_CAN broadcast manager protocol");
MODULE_LICENSE("Dual BSD/GPL");
@@ -109,8 +109,9 @@ struct bcm_op {
u32 count;
u32 nframes;
u32 currframe;
- struct canfd_frame *frames;
- struct canfd_frame *last_frames;
+ /* void pointers to arrays of struct can[fd]_frame */
+ void *frames;
+ void *last_frames;
struct canfd_frame sframe;
struct canfd_frame last_sframe;
struct sock *sk;
@@ -681,7 +682,7 @@ static void bcm_rx_handler(struct sk_buff *skb, void *data)
if (op->flags & RX_FILTER_ID) {
/* the easiest case */
- bcm_rx_update_and_send(op, &op->last_frames[0], rxframe);
+ bcm_rx_update_and_send(op, op->last_frames, rxframe);
goto rx_starttimer;
}
@@ -1068,7 +1069,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
if (msg_head->nframes) {
/* update CAN frames content */
- err = memcpy_from_msg((u8 *)op->frames, msg,
+ err = memcpy_from_msg(op->frames, msg,
msg_head->nframes * op->cfsiz);
if (err < 0)
return err;
@@ -1118,7 +1119,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
}
if (msg_head->nframes) {
- err = memcpy_from_msg((u8 *)op->frames, msg,
+ err = memcpy_from_msg(op->frames, msg,
msg_head->nframes * op->cfsiz);
if (err < 0) {
if (op->frames != &op->sframe)
@@ -1163,6 +1164,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
/* check flags */
if (op->flags & RX_RTR_FRAME) {
+ struct canfd_frame *frame0 = op->frames;
/* no timers in RTR-mode */
hrtimer_cancel(&op->thrtimer);
@@ -1174,8 +1176,8 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
* prevent a full-load-loopback-test ... ;-]
*/
if ((op->flags & TX_CP_CAN_ID) ||
- (op->frames[0].can_id == op->can_id))
- op->frames[0].can_id = op->can_id & ~CAN_RTR_FLAG;
+ (frame0->can_id == op->can_id))
+ frame0->can_id = op->can_id & ~CAN_RTR_FLAG;
} else {
if (op->flags & SETTIMER) {
@@ -1549,24 +1551,31 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len,
struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
struct sock *sk = sock->sk;
struct bcm_sock *bo = bcm_sk(sk);
+ int ret = 0;
if (len < sizeof(*addr))
return -EINVAL;
- if (bo->bound)
- return -EISCONN;
+ lock_sock(sk);
+
+ if (bo->bound) {
+ ret = -EISCONN;
+ goto fail;
+ }
/* bind a device to this socket */
if (addr->can_ifindex) {
struct net_device *dev;
dev = dev_get_by_index(&init_net, addr->can_ifindex);
- if (!dev)
- return -ENODEV;
-
+ if (!dev) {
+ ret = -ENODEV;
+ goto fail;
+ }
if (dev->type != ARPHRD_CAN) {
dev_put(dev);
- return -ENODEV;
+ ret = -ENODEV;
+ goto fail;
}
bo->ifindex = dev->ifindex;
@@ -1577,17 +1586,24 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len,
bo->ifindex = 0;
}
- bo->bound = 1;
-
if (proc_dir) {
/* unique socket address as filename */
sprintf(bo->procname, "%lu", sock_i_ino(sk));
bo->bcm_proc_read = proc_create_data(bo->procname, 0644,
proc_dir,
&bcm_proc_fops, sk);
+ if (!bo->bcm_proc_read) {
+ ret = -ENOMEM;
+ goto fail;
+ }
}
- return 0;
+ bo->bound = 1;
+
+fail:
+ release_sock(sk);
+
+ return ret;
}
static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
diff --git a/net/ceph/ceph_fs.c b/net/ceph/ceph_fs.c
index 7d54e944de5e..dcbe67ff3e2b 100644
--- a/net/ceph/ceph_fs.c
+++ b/net/ceph/ceph_fs.c
@@ -34,7 +34,8 @@ void ceph_file_layout_from_legacy(struct ceph_file_layout *fl,
fl->stripe_count = le32_to_cpu(legacy->fl_stripe_count);
fl->object_size = le32_to_cpu(legacy->fl_object_size);
fl->pool_id = le32_to_cpu(legacy->fl_pg_pool);
- if (fl->pool_id == 0)
+ if (fl->pool_id == 0 && fl->stripe_unit == 0 &&
+ fl->stripe_count == 0 && fl->object_size == 0)
fl->pool_id = -1;
}
EXPORT_SYMBOL(ceph_file_layout_from_legacy);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index d9bf7a1d0a58..e6ae15bc41b7 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -4094,6 +4094,7 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
osd_init(&osdc->homeless_osd);
osdc->homeless_osd.o_osdc = osdc;
osdc->homeless_osd.o_osd = CEPH_HOMELESS_OSD;
+ osdc->last_linger_id = CEPH_LINGER_ID_START;
osdc->linger_requests = RB_ROOT;
osdc->map_checks = RB_ROOT;
osdc->linger_map_checks = RB_ROOT;
diff --git a/net/core/dev.c b/net/core/dev.c
index 820bac239738..6666b28b6815 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1766,19 +1766,14 @@ EXPORT_SYMBOL_GPL(is_skb_forwardable);
int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
{
- if (skb_orphan_frags(skb, GFP_ATOMIC) ||
- unlikely(!is_skb_forwardable(dev, skb))) {
- atomic_long_inc(&dev->rx_dropped);
- kfree_skb(skb);
- return NET_RX_DROP;
- }
+ int ret = ____dev_forward_skb(dev, skb);
- skb_scrub_packet(skb, true);
- skb->priority = 0;
- skb->protocol = eth_type_trans(skb, dev);
- skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+ if (likely(!ret)) {
+ skb->protocol = eth_type_trans(skb, dev);
+ skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+ }
- return 0;
+ return ret;
}
EXPORT_SYMBOL_GPL(__dev_forward_skb);
@@ -2484,7 +2479,7 @@ int skb_checksum_help(struct sk_buff *skb)
goto out;
}
- *(__sum16 *)(skb->data + offset) = csum_fold(csum);
+ *(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0;
out_set_summed:
skb->ip_summed = CHECKSUM_NONE;
out:
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 977489820eb9..047a1752ece1 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -2479,6 +2479,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_GET_TS_INFO:
case ETHTOOL_GEEE:
case ETHTOOL_GTUNABLE:
+ case ETHTOOL_GLINKSETTINGS:
break;
default:
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
diff --git a/net/core/filter.c b/net/core/filter.c
index 00351cdf7d0c..b391209838ef 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1628,6 +1628,19 @@ static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb)
return dev_forward_skb(dev, skb);
}
+static inline int __bpf_rx_skb_no_mac(struct net_device *dev,
+ struct sk_buff *skb)
+{
+ int ret = ____dev_forward_skb(dev, skb);
+
+ if (likely(!ret)) {
+ skb->dev = dev;
+ ret = netif_rx(skb);
+ }
+
+ return ret;
+}
+
static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
{
int ret;
@@ -1647,6 +1660,51 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
return ret;
}
+static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev,
+ u32 flags)
+{
+ /* skb->mac_len is not set on normal egress */
+ unsigned int mlen = skb->network_header - skb->mac_header;
+
+ __skb_pull(skb, mlen);
+
+ /* At ingress, the mac header has already been pulled once.
+ * At egress, skb_pospull_rcsum has to be done in case that
+ * the skb is originated from ingress (i.e. a forwarded skb)
+ * to ensure that rcsum starts at net header.
+ */
+ if (!skb_at_tc_ingress(skb))
+ skb_postpull_rcsum(skb, skb_mac_header(skb), mlen);
+ skb_pop_mac_header(skb);
+ skb_reset_mac_len(skb);
+ return flags & BPF_F_INGRESS ?
+ __bpf_rx_skb_no_mac(dev, skb) : __bpf_tx_skb(dev, skb);
+}
+
+static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev,
+ u32 flags)
+{
+ bpf_push_mac_rcsum(skb);
+ return flags & BPF_F_INGRESS ?
+ __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
+}
+
+static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev,
+ u32 flags)
+{
+ switch (dev->type) {
+ case ARPHRD_TUNNEL:
+ case ARPHRD_TUNNEL6:
+ case ARPHRD_SIT:
+ case ARPHRD_IPGRE:
+ case ARPHRD_VOID:
+ case ARPHRD_NONE:
+ return __bpf_redirect_no_mac(skb, dev, flags);
+ default:
+ return __bpf_redirect_common(skb, dev, flags);
+ }
+}
+
BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
{
struct net_device *dev;
@@ -1675,10 +1733,7 @@ BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
return -ENOMEM;
}
- bpf_push_mac_rcsum(clone);
-
- return flags & BPF_F_INGRESS ?
- __bpf_rx_skb(dev, clone) : __bpf_tx_skb(dev, clone);
+ return __bpf_redirect(clone, dev, flags);
}
static const struct bpf_func_proto bpf_clone_redirect_proto = {
@@ -1722,10 +1777,7 @@ int skb_do_redirect(struct sk_buff *skb)
return -EINVAL;
}
- bpf_push_mac_rcsum(skb);
-
- return ri->flags & BPF_F_INGRESS ?
- __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
+ return __bpf_redirect(skb, dev, ri->flags);
}
static const struct bpf_func_proto bpf_redirect_proto = {
diff --git a/net/core/flow.c b/net/core/flow.c
index 3937b1b68d5b..18e8893d4be5 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -95,7 +95,6 @@ static void flow_cache_gc_task(struct work_struct *work)
list_for_each_entry_safe(fce, n, &gc_list, u.gc_list) {
flow_entry_kill(fce, xfrm);
atomic_dec(&xfrm->flow_cache_gc_count);
- WARN_ON(atomic_read(&xfrm->flow_cache_gc_count) < 0);
}
}
@@ -236,9 +235,8 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
if (fcp->hash_count > fc->high_watermark)
flow_cache_shrink(fc, fcp);
- if (fcp->hash_count > 2 * fc->high_watermark ||
- atomic_read(&net->xfrm.flow_cache_gc_count) > fc->high_watermark) {
- atomic_inc(&net->xfrm.flow_cache_genid);
+ if (atomic_read(&net->xfrm.flow_cache_gc_count) >
+ 2 * num_online_cpus() * fc->high_watermark) {
flo = ERR_PTR(-ENOBUFS);
goto ret_object;
}
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index ab193e5def07..c6d8207ffa7e 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -122,7 +122,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
struct flow_dissector_key_keyid *key_keyid;
bool skip_vlan = false;
u8 ip_proto = 0;
- bool ret = false;
+ bool ret;
if (!data) {
data = skb->data;
@@ -549,12 +549,17 @@ ip_proto_again:
out_good:
ret = true;
-out_bad:
+ key_control->thoff = (u16)nhoff;
+out:
key_basic->n_proto = proto;
key_basic->ip_proto = ip_proto;
- key_control->thoff = (u16)nhoff;
return ret;
+
+out_bad:
+ ret = false;
+ key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen);
+ goto out;
}
EXPORT_SYMBOL(__skb_flow_dissect);
@@ -1008,4 +1013,4 @@ static int __init init_default_flow_dissectors(void)
return 0;
}
-late_initcall_sync(init_default_flow_dissectors);
+core_initcall(init_default_flow_dissectors);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index f61c0e02a413..7001da910c6b 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -219,6 +219,8 @@ int peernet2id_alloc(struct net *net, struct net *peer)
bool alloc;
int id;
+ if (atomic_read(&net->count) == 0)
+ return NETNSA_NSID_NOT_ASSIGNED;
spin_lock_irqsave(&net->nsid_lock, flags);
alloc = atomic_read(&peer->count) == 0 ? false : true;
id = __peernet2id_alloc(net, peer, &alloc);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index fb7348f13501..a6196cf844f6 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -275,6 +275,7 @@ int rtnl_unregister(int protocol, int msgtype)
rtnl_msg_handlers[protocol][msgindex].doit = NULL;
rtnl_msg_handlers[protocol][msgindex].dumpit = NULL;
+ rtnl_msg_handlers[protocol][msgindex].calcit = NULL;
return 0;
}
@@ -839,18 +840,20 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
if (dev->dev.parent && dev_is_pci(dev->dev.parent) &&
(ext_filter_mask & RTEXT_FILTER_VF)) {
int num_vfs = dev_num_vf(dev->dev.parent);
- size_t size = nla_total_size(sizeof(struct nlattr));
- size += nla_total_size(num_vfs * sizeof(struct nlattr));
+ size_t size = nla_total_size(0);
size += num_vfs *
- (nla_total_size(sizeof(struct ifla_vf_mac)) +
- nla_total_size(MAX_VLAN_LIST_LEN *
- sizeof(struct nlattr)) +
+ (nla_total_size(0) +
+ nla_total_size(sizeof(struct ifla_vf_mac)) +
+ nla_total_size(sizeof(struct ifla_vf_vlan)) +
+ nla_total_size(0) + /* nest IFLA_VF_VLAN_LIST */
nla_total_size(MAX_VLAN_LIST_LEN *
sizeof(struct ifla_vf_vlan_info)) +
nla_total_size(sizeof(struct ifla_vf_spoofchk)) +
+ nla_total_size(sizeof(struct ifla_vf_tx_rate)) +
nla_total_size(sizeof(struct ifla_vf_rate)) +
nla_total_size(sizeof(struct ifla_vf_link_state)) +
nla_total_size(sizeof(struct ifla_vf_rss_query_en)) +
+ nla_total_size(0) + /* nest IFLA_VF_STATS */
/* IFLA_VF_STATS_RX_PACKETS */
nla_total_size_64bit(sizeof(__u64)) +
/* IFLA_VF_STATS_TX_PACKETS */
@@ -898,7 +901,8 @@ static size_t rtnl_port_size(const struct net_device *dev,
static size_t rtnl_xdp_size(const struct net_device *dev)
{
- size_t xdp_size = nla_total_size(1); /* XDP_ATTACHED */
+ size_t xdp_size = nla_total_size(0) + /* nest IFLA_XDP */
+ nla_total_size(1); /* XDP_ATTACHED */
if (!dev->netdev_ops->ndo_xdp)
return 0;
@@ -927,8 +931,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(4) /* IFLA_PROMISCUITY */
+ nla_total_size(4) /* IFLA_NUM_TX_QUEUES */
+ nla_total_size(4) /* IFLA_NUM_RX_QUEUES */
- + nla_total_size(4) /* IFLA_MAX_GSO_SEGS */
- + nla_total_size(4) /* IFLA_MAX_GSO_SIZE */
+ + nla_total_size(4) /* IFLA_GSO_MAX_SEGS */
+ + nla_total_size(4) /* IFLA_GSO_MAX_SIZE */
+ nla_total_size(1) /* IFLA_OPERSTATE */
+ nla_total_size(1) /* IFLA_LINKMODE */
+ nla_total_size(4) /* IFLA_CARRIER_CHANGES */
@@ -1605,7 +1609,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
head = &net->dev_index_head[h];
hlist_for_each_entry(dev, head, index_hlist) {
if (link_dump_filtered(dev, master_idx, kind_ops))
- continue;
+ goto cont;
if (idx < s_idx)
goto cont;
err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
@@ -2733,7 +2737,7 @@ static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
ext_filter_mask));
}
- return min_ifinfo_dump_size;
+ return nlmsg_total_size(min_ifinfo_dump_size);
}
static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
@@ -2848,7 +2852,10 @@ nla_put_failure:
static inline size_t rtnl_fdb_nlmsg_size(void)
{
- return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(ETH_ALEN);
+ return NLMSG_ALIGN(sizeof(struct ndmsg)) +
+ nla_total_size(ETH_ALEN) + /* NDA_LLADDR */
+ nla_total_size(sizeof(u16)) + /* NDA_VLAN */
+ 0;
}
static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, u16 vid, int type,
diff --git a/net/core/sock.c b/net/core/sock.c
index c73e28fc9c2a..00a074dbfe9b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -453,7 +453,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
EXPORT_SYMBOL(sock_queue_rcv_skb);
int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
- const int nested, unsigned int trim_cap)
+ const int nested, unsigned int trim_cap, bool refcounted)
{
int rc = NET_RX_SUCCESS;
@@ -487,7 +487,8 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
bh_unlock_sock(sk);
out:
- sock_put(sk);
+ if (refcounted)
+ sock_put(sk);
return rc;
discard_and_relse:
kfree_skb(skb);
@@ -714,7 +715,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
val = min_t(u32, val, sysctl_wmem_max);
set_sndbuf:
sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
- sk->sk_sndbuf = max_t(u32, val * 2, SOCK_MIN_SNDBUF);
+ sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
/* Wake up sending tasks if we upped the value. */
sk->sk_write_space(sk);
break;
@@ -750,7 +751,7 @@ set_rcvbuf:
* returning the value we actually used in getsockopt
* is the most desirable behavior.
*/
- sk->sk_rcvbuf = max_t(u32, val * 2, SOCK_MIN_RCVBUF);
+ sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
break;
case SO_RCVBUFFORCE:
@@ -1543,6 +1544,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
newsk->sk_err = 0;
+ newsk->sk_err_soft = 0;
newsk->sk_priority = 0;
newsk->sk_incoming_cpu = raw_smp_processor_id();
atomic64_set(&newsk->sk_cookie, 0);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 345a3aeb8c7e..edbe59d203ef 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -235,7 +235,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
{
const struct iphdr *iph = (struct iphdr *)skb->data;
const u8 offset = iph->ihl << 2;
- const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset);
+ const struct dccp_hdr *dh;
struct dccp_sock *dp;
struct inet_sock *inet;
const int type = icmp_hdr(skb)->type;
@@ -245,11 +245,13 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
int err;
struct net *net = dev_net(skb->dev);
- if (skb->len < offset + sizeof(*dh) ||
- skb->len < offset + __dccp_basic_hdr_len(dh)) {
- __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
- return;
- }
+ /* Only need dccph_dport & dccph_sport which are the first
+ * 4 bytes in dccp header.
+ * Our caller (icmp_socket_deliver()) already pulled 8 bytes for us.
+ */
+ BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8);
+ BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8);
+ dh = (struct dccp_hdr *)(skb->data + offset);
sk = __inet_lookup_established(net, &dccp_hashinfo,
iph->daddr, dh->dccph_dport,
@@ -698,6 +700,7 @@ int dccp_invalid_packet(struct sk_buff *skb)
{
const struct dccp_hdr *dh;
unsigned int cscov;
+ u8 dccph_doff;
if (skb->pkt_type != PACKET_HOST)
return 1;
@@ -719,18 +722,19 @@ int dccp_invalid_packet(struct sk_buff *skb)
/*
* If P.Data Offset is too small for packet type, drop packet and return
*/
- if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) {
- DCCP_WARN("P.Data Offset(%u) too small\n", dh->dccph_doff);
+ dccph_doff = dh->dccph_doff;
+ if (dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) {
+ DCCP_WARN("P.Data Offset(%u) too small\n", dccph_doff);
return 1;
}
/*
* If P.Data Offset is too too large for packet, drop packet and return
*/
- if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) {
- DCCP_WARN("P.Data Offset(%u) too large\n", dh->dccph_doff);
+ if (!pskb_may_pull(skb, dccph_doff * sizeof(u32))) {
+ DCCP_WARN("P.Data Offset(%u) too large\n", dccph_doff);
return 1;
}
-
+ dh = dccp_hdr(skb);
/*
* If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet
* has short sequence numbers), drop packet and return
@@ -868,7 +872,7 @@ lookup:
goto discard_and_relse;
nf_reset(skb);
- return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4);
+ return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4, refcounted);
no_dccp_socket:
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 3828f94b234c..715e5d1dc107 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -70,7 +70,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
- const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset);
+ const struct dccp_hdr *dh;
struct dccp_sock *dp;
struct ipv6_pinfo *np;
struct sock *sk;
@@ -78,12 +78,13 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
__u64 seq;
struct net *net = dev_net(skb->dev);
- if (skb->len < offset + sizeof(*dh) ||
- skb->len < offset + __dccp_basic_hdr_len(dh)) {
- __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
- ICMP6_MIB_INERRORS);
- return;
- }
+ /* Only need dccph_dport & dccph_sport which are the first
+ * 4 bytes in dccp header.
+ * Our caller (icmpv6_notify()) already pulled 8 bytes for us.
+ */
+ BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8);
+ BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8);
+ dh = (struct dccp_hdr *)(skb->data + offset);
sk = __inet6_lookup_established(net, &dccp_hashinfo,
&hdr->daddr, dh->dccph_dport,
@@ -738,7 +739,8 @@ lookup:
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_and_relse;
- return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4) ? -1 : 0;
+ return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4,
+ refcounted) ? -1 : 0;
no_dccp_socket:
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
@@ -956,6 +958,7 @@ static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = {
.getsockopt = ipv6_getsockopt,
.addr2sockaddr = inet6_csk_addr2sockaddr,
.sockaddr_len = sizeof(struct sockaddr_in6),
+ .bind_conflict = inet6_csk_bind_conflict,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_ipv6_setsockopt,
.compat_getsockopt = compat_ipv6_getsockopt,
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 41e65804ddf5..9fe25bf63296 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1009,6 +1009,10 @@ void dccp_close(struct sock *sk, long timeout)
__kfree_skb(skb);
}
+ /* If socket has been already reset kill it. */
+ if (sk->sk_state == DCCP_CLOSED)
+ goto adjudge_to_death;
+
if (data_was_unread) {
/* Unread data was tossed, send an appropriate Reset Code */
DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index a6902c1e2f28..7899919cd9f0 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -233,6 +233,8 @@ int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev,
genphy_read_status(phydev);
if (ds->ops->adjust_link)
ds->ops->adjust_link(ds, port, phydev);
+
+ put_device(&phydev->mdio.dev);
}
return 0;
@@ -504,15 +506,8 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
void dsa_cpu_dsa_destroy(struct device_node *port_dn)
{
- struct phy_device *phydev;
-
- if (of_phy_is_fixed_link(port_dn)) {
- phydev = of_phy_find_device(port_dn);
- if (phydev) {
- phy_device_free(phydev);
- fixed_phy_unregister(phydev);
- }
- }
+ if (of_phy_is_fixed_link(port_dn))
+ of_phy_deregister_fixed_link(port_dn);
}
static void dsa_switch_destroy(struct dsa_switch *ds)
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index f8a7d9aab437..5fff951a0a49 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -28,8 +28,10 @@ static struct dsa_switch_tree *dsa_get_dst(u32 tree)
struct dsa_switch_tree *dst;
list_for_each_entry(dst, &dsa_switch_trees, list)
- if (dst->tree == tree)
+ if (dst->tree == tree) {
+ kref_get(&dst->refcount);
return dst;
+ }
return NULL;
}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 6b1282c006b1..30e2e21d7619 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1125,7 +1125,7 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
p->phy_interface = mode;
phy_dn = of_parse_phandle(port_dn, "phy-handle", 0);
- if (of_phy_is_fixed_link(port_dn)) {
+ if (!phy_dn && of_phy_is_fixed_link(port_dn)) {
/* In the case of a fixed PHY, the DT node associated
* to the fixed PHY is the Port DT node
*/
@@ -1135,7 +1135,7 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
return ret;
}
phy_is_fixed = true;
- phy_dn = port_dn;
+ phy_dn = of_node_get(port_dn);
}
if (ds->ops->get_phy_flags)
@@ -1154,6 +1154,7 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
ret = dsa_slave_phy_connect(p, slave_dev, phy_id);
if (ret) {
netdev_err(slave_dev, "failed to connect to phy%d: %d\n", phy_id, ret);
+ of_node_put(phy_dn);
return ret;
}
} else {
@@ -1162,6 +1163,8 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
phy_flags,
p->phy_interface);
}
+
+ of_node_put(phy_dn);
}
if (p->phy && phy_is_fixed)
@@ -1174,6 +1177,8 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
ret = dsa_slave_phy_connect(p, slave_dev, p->port);
if (ret) {
netdev_err(slave_dev, "failed to connect to port %d: %d\n", p->port, ret);
+ if (phy_is_fixed)
+ of_phy_deregister_fixed_link(port_dn);
return ret;
}
}
@@ -1289,10 +1294,18 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
void dsa_slave_destroy(struct net_device *slave_dev)
{
struct dsa_slave_priv *p = netdev_priv(slave_dev);
+ struct dsa_switch *ds = p->parent;
+ struct device_node *port_dn;
+
+ port_dn = ds->ports[p->port].dn;
netif_carrier_off(slave_dev);
- if (p->phy)
+ if (p->phy) {
phy_disconnect(p->phy);
+
+ if (of_phy_is_fixed_link(port_dn))
+ of_phy_deregister_fixed_link(port_dn);
+ }
unregister_netdev(slave_dev);
free_netdev(slave_dev);
}
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 300b06888fdf..b54b3ca939db 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -715,6 +715,7 @@ config DEFAULT_TCP_CONG
default "reno" if DEFAULT_RENO
default "dctcp" if DEFAULT_DCTCP
default "cdg" if DEFAULT_CDG
+ default "bbr" if DEFAULT_BBR
default "cubic"
config TCP_MD5SIG
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 9648c97e541f..215143246e4b 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -533,9 +533,9 @@ EXPORT_SYMBOL(inet_dgram_connect);
static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)
{
- DEFINE_WAIT(wait);
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+ add_wait_queue(sk_sleep(sk), &wait);
sk->sk_write_pending += writebias;
/* Basic assumption: if someone sets sk->sk_err, he _must_
@@ -545,13 +545,12 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)
*/
while ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
release_sock(sk);
- timeo = schedule_timeout(timeo);
+ timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
lock_sock(sk);
if (signal_pending(current) || !timeo)
break;
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
}
- finish_wait(sk_sleep(sk), &wait);
+ remove_wait_queue(sk_sleep(sk), &wait);
sk->sk_write_pending -= writebias;
return timeo;
}
@@ -1234,7 +1233,7 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID);
/* fixed ID is invalid if DF bit is not set */
- if (fixedid && !(iph->frag_off & htons(IP_DF)))
+ if (fixedid && !(ip_hdr(skb)->frag_off & htons(IP_DF)))
goto out;
}
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index d95631d09248..20fb25e3027b 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -476,7 +476,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
esph = (void *)skb_push(skb, 4);
*seqhi = esph->spi;
esph->spi = esph->seq_no;
- esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.input.hi);
+ esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi;
aead_request_set_callback(req, 0, esp_input_done_esn, skb);
}
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index c3b80478226e..161fc0f0d752 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -151,7 +151,7 @@ static void fib_replace_table(struct net *net, struct fib_table *old,
int fib_unmerge(struct net *net)
{
- struct fib_table *old, *new;
+ struct fib_table *old, *new, *main_table;
/* attempt to fetch local table if it has been allocated */
old = fib_get_table(net, RT_TABLE_LOCAL);
@@ -162,11 +162,21 @@ int fib_unmerge(struct net *net)
if (!new)
return -ENOMEM;
+ /* table is already unmerged */
+ if (new == old)
+ return 0;
+
/* replace merged table with clean table */
- if (new != old) {
- fib_replace_table(net, old, new);
- fib_free_table(old);
- }
+ fib_replace_table(net, old, new);
+ fib_free_table(old);
+
+ /* attempt to fetch main table if it has been allocated */
+ main_table = fib_get_table(net, RT_TABLE_MAIN);
+ if (!main_table)
+ return 0;
+
+ /* flush local entries from main table */
+ fib_table_flush_external(main_table);
return 0;
}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 31cef3602585..026f309c51e9 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1743,8 +1743,10 @@ struct fib_table *fib_trie_unmerge(struct fib_table *oldtb)
local_l = fib_find_node(lt, &local_tp, l->key);
if (fib_insert_alias(lt, local_tp, local_l, new_fa,
- NULL, l->key))
+ NULL, l->key)) {
+ kmem_cache_free(fn_alias_kmem, new_fa);
goto out;
+ }
}
/* stop loop if key wrapped back to 0 */
@@ -1760,6 +1762,71 @@ out:
return NULL;
}
+/* Caller must hold RTNL */
+void fib_table_flush_external(struct fib_table *tb)
+{
+ struct trie *t = (struct trie *)tb->tb_data;
+ struct key_vector *pn = t->kv;
+ unsigned long cindex = 1;
+ struct hlist_node *tmp;
+ struct fib_alias *fa;
+
+ /* walk trie in reverse order */
+ for (;;) {
+ unsigned char slen = 0;
+ struct key_vector *n;
+
+ if (!(cindex--)) {
+ t_key pkey = pn->key;
+
+ /* cannot resize the trie vector */
+ if (IS_TRIE(pn))
+ break;
+
+ /* resize completed node */
+ pn = resize(t, pn);
+ cindex = get_index(pkey, pn);
+
+ continue;
+ }
+
+ /* grab the next available node */
+ n = get_child(pn, cindex);
+ if (!n)
+ continue;
+
+ if (IS_TNODE(n)) {
+ /* record pn and cindex for leaf walking */
+ pn = n;
+ cindex = 1ul << n->bits;
+
+ continue;
+ }
+
+ hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
+ /* if alias was cloned to local then we just
+ * need to remove the local copy from main
+ */
+ if (tb->tb_id != fa->tb_id) {
+ hlist_del_rcu(&fa->fa_list);
+ alias_free_mem_rcu(fa);
+ continue;
+ }
+
+ /* record local slen */
+ slen = fa->fa_slen;
+ }
+
+ /* update leaf slen */
+ n->slen = slen;
+
+ if (hlist_empty(&n->leaf)) {
+ put_child_root(pn, n->key, NULL);
+ node_free(n);
+ }
+ }
+}
+
/* Caller must hold RTNL. */
int fib_table_flush(struct net *net, struct fib_table *tb)
{
@@ -2413,22 +2480,19 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
struct key_vector *l, **tp = &iter->tnode;
t_key key;
- /* use cache location of next-to-find key */
+ /* use cached location of previously found key */
if (iter->pos > 0 && pos >= iter->pos) {
- pos -= iter->pos;
key = iter->key;
} else {
- iter->pos = 0;
+ iter->pos = 1;
key = 0;
}
- while ((l = leaf_walk_rcu(tp, key)) != NULL) {
+ pos -= iter->pos;
+
+ while ((l = leaf_walk_rcu(tp, key)) && (pos-- > 0)) {
key = l->key + 1;
iter->pos++;
-
- if (--pos <= 0)
- break;
-
l = NULL;
/* handle unlikely case of a key wrap */
@@ -2437,7 +2501,7 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
}
if (l)
- iter->key = key; /* remember it */
+ iter->key = l->key; /* remember it */
else
iter->pos = 0; /* forget it */
@@ -2465,7 +2529,7 @@ static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
return fib_route_get_idx(iter, *pos);
iter->pos = 0;
- iter->key = 0;
+ iter->key = KEY_MAX;
return SEQ_START_TOKEN;
}
@@ -2474,7 +2538,7 @@ static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct fib_route_iter *iter = seq->private;
struct key_vector *l = NULL;
- t_key key = iter->key;
+ t_key key = iter->key + 1;
++*pos;
@@ -2483,7 +2547,7 @@ static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
l = leaf_walk_rcu(&iter->tnode, key);
if (l) {
- iter->key = l->key + 1;
+ iter->key = l->key;
iter->pos++;
} else {
iter->pos = 0;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 38abe70e595f..48734ee6293f 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -477,7 +477,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
fl4->flowi4_proto = IPPROTO_ICMP;
fl4->fl4_icmp_type = type;
fl4->fl4_icmp_code = code;
- fl4->flowi4_oif = l3mdev_master_ifindex(skb_in->dev);
+ fl4->flowi4_oif = l3mdev_master_ifindex(skb_dst(skb_in)->dev);
security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
rt = __ip_route_output_key_hash(net, fl4,
@@ -502,7 +502,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
if (err)
goto relookup_failed;
- if (inet_addr_type_dev_table(net, skb_in->dev,
+ if (inet_addr_type_dev_table(net, skb_dst(skb_in)->dev,
fl4_dec.saddr) == RTN_LOCAL) {
rt2 = __ip_route_output_key(net, &fl4_dec);
if (IS_ERR(rt2))
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 606cc3e85d2b..15db786d50ed 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -162,7 +162,7 @@ static int unsolicited_report_interval(struct in_device *in_dev)
}
static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im);
-static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr);
+static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im);
static void igmpv3_clear_delrec(struct in_device *in_dev);
static int sf_setstate(struct ip_mc_list *pmc);
static void sf_markstate(struct ip_mc_list *pmc);
@@ -1130,10 +1130,15 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im)
spin_unlock_bh(&in_dev->mc_tomb_lock);
}
-static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr)
+/*
+ * restore ip_mc_list deleted records
+ */
+static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im)
{
struct ip_mc_list *pmc, *pmc_prev;
- struct ip_sf_list *psf, *psf_next;
+ struct ip_sf_list *psf;
+ struct net *net = dev_net(in_dev->dev);
+ __be32 multiaddr = im->multiaddr;
spin_lock_bh(&in_dev->mc_tomb_lock);
pmc_prev = NULL;
@@ -1149,16 +1154,26 @@ static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr)
in_dev->mc_tomb = pmc->next;
}
spin_unlock_bh(&in_dev->mc_tomb_lock);
+
+ spin_lock_bh(&im->lock);
if (pmc) {
- for (psf = pmc->tomb; psf; psf = psf_next) {
- psf_next = psf->sf_next;
- kfree(psf);
+ im->interface = pmc->interface;
+ im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ im->sfmode = pmc->sfmode;
+ if (pmc->sfmode == MCAST_INCLUDE) {
+ im->tomb = pmc->tomb;
+ im->sources = pmc->sources;
+ for (psf = im->sources; psf; psf = psf->sf_next)
+ psf->sf_crcount = im->crcount;
}
in_dev_put(pmc->interface);
- kfree(pmc);
}
+ spin_unlock_bh(&im->lock);
}
+/*
+ * flush ip_mc_list deleted records
+ */
static void igmpv3_clear_delrec(struct in_device *in_dev)
{
struct ip_mc_list *pmc, *nextpmc;
@@ -1366,7 +1381,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
ip_mc_hash_add(in_dev, im);
#ifdef CONFIG_IP_MULTICAST
- igmpv3_del_delrec(in_dev, im->multiaddr);
+ igmpv3_del_delrec(in_dev, im);
#endif
igmp_group_added(im);
if (!in_dev->dead)
@@ -1626,8 +1641,12 @@ void ip_mc_remap(struct in_device *in_dev)
ASSERT_RTNL();
- for_each_pmc_rtnl(in_dev, pmc)
+ for_each_pmc_rtnl(in_dev, pmc) {
+#ifdef CONFIG_IP_MULTICAST
+ igmpv3_del_delrec(in_dev, pmc);
+#endif
igmp_group_added(pmc);
+ }
}
/* Device going down */
@@ -1648,7 +1667,6 @@ void ip_mc_down(struct in_device *in_dev)
in_dev->mr_gq_running = 0;
if (del_timer(&in_dev->mr_gq_timer))
__in_dev_put(in_dev);
- igmpv3_clear_delrec(in_dev);
#endif
ip_mc_dec_group(in_dev, IGMP_ALL_HOSTS);
@@ -1688,8 +1706,12 @@ void ip_mc_up(struct in_device *in_dev)
#endif
ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS);
- for_each_pmc_rtnl(in_dev, pmc)
+ for_each_pmc_rtnl(in_dev, pmc) {
+#ifdef CONFIG_IP_MULTICAST
+ igmpv3_del_delrec(in_dev, pmc);
+#endif
igmp_group_added(pmc);
+ }
}
/*
@@ -1704,13 +1726,13 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
/* Deactivate timers */
ip_mc_down(in_dev);
+#ifdef CONFIG_IP_MULTICAST
+ igmpv3_clear_delrec(in_dev);
+#endif
while ((i = rtnl_dereference(in_dev->mc_list)) != NULL) {
in_dev->mc_list = i->next_rcu;
in_dev->mc_count--;
-
- /* We've dropped the groups in ip_mc_down already */
- ip_mc_clear_src(i);
ip_ma_put(i);
}
}
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 8b4ffd216839..9f0a7b96646f 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -117,7 +117,7 @@ int ip_forward(struct sk_buff *skb)
if (opt->is_strictroute && rt->rt_uses_gateway)
goto sr_failed;
- IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS;
+ IPCB(skb)->flags |= IPSKB_FORWARDED;
mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
if (ip_exceeds_mtu(skb, mtu)) {
IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 03e7f7310423..877bdb02e887 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -107,6 +107,8 @@ int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
if (unlikely(!skb))
return 0;
+ skb->protocol = htons(ETH_P_IP);
+
return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
net, sk, skb, NULL, skb_dst(skb)->dev,
dst_output);
@@ -239,19 +241,23 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk,
struct sk_buff *segs;
int ret = 0;
- /* common case: fragmentation of segments is not allowed,
- * or seglen is <= mtu
+ /* common case: seglen is <= mtu
*/
- if (((IPCB(skb)->flags & IPSKB_FRAG_SEGS) == 0) ||
- skb_gso_validate_mtu(skb, mtu))
+ if (skb_gso_validate_mtu(skb, mtu))
return ip_finish_output2(net, sk, skb);
- /* Slowpath - GSO segment length is exceeding the dst MTU.
+ /* Slowpath - GSO segment length exceeds the egress MTU.
*
- * This can happen in two cases:
- * 1) TCP GRO packet, DF bit not set
- * 2) skb arrived via virtio-net, we thus get TSO/GSO skbs directly
- * from host network stack.
+ * This can happen in several cases:
+ * - Forwarding of a TCP GRO skb, when DF flag is not set.
+ * - Forwarding of an skb that arrived on a virtualization interface
+ * (virtio-net/vhost/tap) with TSO/GSO size set by other network
+ * stack.
+ * - Local GSO skb transmitted on an NETIF_F_TSO tunnel stacked over an
+ * interface with a smaller MTU.
+ * - Arriving GRO skb (or GSO skb in a virtualized environment) that is
+ * bridged to a NETIF_F_TSO tunnel stacked over an interface with an
+ * insufficent MTU.
*/
features = netif_skb_features(skb);
BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_SGO_CB_OFFSET);
@@ -1579,7 +1585,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
}
oif = arg->bound_dev_if;
- oif = oif ? : skb->skb_iif;
+ if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
+ oif = skb->skb_iif;
flowi4_init_output(&fl4, oif,
IP4_REPLY_MARK(net, skb->mark),
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 777bc1883870..fed3d29f9eb3 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -63,7 +63,6 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
int pkt_len = skb->len - skb_inner_network_offset(skb);
struct net *net = dev_net(rt->dst.dev);
struct net_device *dev = skb->dev;
- int skb_iif = skb->skb_iif;
struct iphdr *iph;
int err;
@@ -73,16 +72,6 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
skb_dst_set(skb, &rt->dst);
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
- if (skb_iif && !(df & htons(IP_DF))) {
- /* Arrived from an ingress interface, got encapsulated, with
- * fragmentation of encapulating frames allowed.
- * If skb is gso, the resulting encapsulated network segments
- * may exceed dst mtu.
- * Allow IP Fragmentation of segments.
- */
- IPCB(skb)->flags |= IPSKB_FRAG_SEGS;
- }
-
/* Push down and install the IP header. */
skb_push(skb, sizeof(struct iphdr));
skb_reset_network_header(skb);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 5f006e13de56..27089f5ebbb1 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1749,7 +1749,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
vif->dev->stats.tx_bytes += skb->len;
}
- IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS;
+ IPCB(skb)->flags |= IPSKB_FORWARDED;
/* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
* not only before forwarding, but after forwarding on all output
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index c3776ff6749f..b3cc1335adbc 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -24,10 +24,11 @@ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_t
struct flowi4 fl4 = {};
__be32 saddr = iph->saddr;
__u8 flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0;
+ struct net_device *dev = skb_dst(skb)->dev;
unsigned int hh_len;
if (addr_type == RTN_UNSPEC)
- addr_type = inet_addr_type(net, saddr);
+ addr_type = inet_addr_type_dev_table(net, dev, saddr);
if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST)
flags |= FLOWI_FLAG_ANYSRC;
else
@@ -40,6 +41,8 @@ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_t
fl4.saddr = saddr;
fl4.flowi4_tos = RT_TOS(iph->tos);
fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0;
+ if (!fl4.flowi4_oif)
+ fl4.flowi4_oif = l3mdev_master_ifindex(dev);
fl4.flowi4_mark = skb->mark;
fl4.flowi4_flags = flags;
rt = ip_route_output_key(net, &fl4);
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index b31df597fd37..697538464e6e 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1201,8 +1201,8 @@ static int translate_compat_table(struct xt_table_info **pinfo,
newinfo->number = compatr->num_entries;
for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
- newinfo->hook_entry[i] = info->hook_entry[i];
- newinfo->underflow[i] = info->underflow[i];
+ newinfo->hook_entry[i] = compatr->hook_entry[i];
+ newinfo->underflow[i] = compatr->underflow[i];
}
entry1 = newinfo->entries;
pos = entry1;
diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c
index bf855e64fc45..0c01a270bf9f 100644
--- a/net/ipv4/netfilter/nft_dup_ipv4.c
+++ b/net/ipv4/netfilter/nft_dup_ipv4.c
@@ -28,7 +28,7 @@ static void nft_dup_ipv4_eval(const struct nft_expr *expr,
struct in_addr gw = {
.s_addr = (__force __be32)regs->data[priv->sreg_addr],
};
- int oif = regs->data[priv->sreg_dev];
+ int oif = priv->sreg_dev ? regs->data[priv->sreg_dev] : -1;
nf_dup_ipv4(pkt->net, pkt->skb, pkt->hook, &gw, oif);
}
@@ -59,7 +59,9 @@ static int nft_dup_ipv4_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
struct nft_dup_ipv4 *priv = nft_expr_priv(expr);
- if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr) ||
+ if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr))
+ goto nla_put_failure;
+ if (priv->sreg_dev &&
nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev))
goto nla_put_failure;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 62d4d90c1389..2a57566e6e91 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -753,7 +753,9 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
goto reject_redirect;
}
- n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw);
+ n = __ipv4_neigh_lookup(rt->dst.dev, new_gw);
+ if (!n)
+ n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev);
if (!IS_ERR(n)) {
if (!(n->nud_state & NUD_VALID)) {
neigh_event_send(n, NULL);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3251fe71f39f..814af89c1bd3 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1164,7 +1164,7 @@ restart:
err = -EPIPE;
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
- goto out_err;
+ goto do_error;
sg = !!(sk->sk_route_caps & NETIF_F_SG);
@@ -1241,7 +1241,7 @@ new_segment:
if (!skb_can_coalesce(skb, i, pfrag->page,
pfrag->offset)) {
- if (i == sysctl_max_skb_frags || !sg) {
+ if (i >= sysctl_max_skb_frags || !sg) {
tcp_mark_push(tp, skb);
goto new_segment;
}
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 1294af4e0127..f9038d6b109e 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -200,8 +200,10 @@ static void tcp_reinit_congestion_control(struct sock *sk,
icsk->icsk_ca_ops = ca;
icsk->icsk_ca_setsockopt = 1;
- if (sk->sk_state != TCP_CLOSE)
+ if (sk->sk_state != TCP_CLOSE) {
+ memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
tcp_init_congestion_control(sk);
+ }
}
/* Manage refcounts on socket close. */
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 10d728b6804c..ab37c6775630 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -56,6 +56,7 @@ struct dctcp {
u32 next_seq;
u32 ce_state;
u32 delayed_ack_reserved;
+ u32 loss_cwnd;
};
static unsigned int dctcp_shift_g __read_mostly = 4; /* g = 1/2^4 */
@@ -96,6 +97,7 @@ static void dctcp_init(struct sock *sk)
ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA);
ca->delayed_ack_reserved = 0;
+ ca->loss_cwnd = 0;
ca->ce_state = 0;
dctcp_reset(tp, ca);
@@ -111,9 +113,10 @@ static void dctcp_init(struct sock *sk)
static u32 dctcp_ssthresh(struct sock *sk)
{
- const struct dctcp *ca = inet_csk_ca(sk);
+ struct dctcp *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk);
+ ca->loss_cwnd = tp->snd_cwnd;
return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U);
}
@@ -308,12 +311,20 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr,
return 0;
}
+static u32 dctcp_cwnd_undo(struct sock *sk)
+{
+ const struct dctcp *ca = inet_csk_ca(sk);
+
+ return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
+}
+
static struct tcp_congestion_ops dctcp __read_mostly = {
.init = dctcp_init,
.in_ack_event = dctcp_update_alpha,
.cwnd_event = dctcp_cwnd_event,
.ssthresh = dctcp_ssthresh,
.cong_avoid = tcp_reno_cong_avoid,
+ .undo_cwnd = dctcp_cwnd_undo,
.set_state = dctcp_state,
.get_info = dctcp_get_info,
.flags = TCP_CONG_NEEDS_ECN,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 61b7be303eec..2259114c7242 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1564,6 +1564,21 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(tcp_add_backlog);
+int tcp_filter(struct sock *sk, struct sk_buff *skb)
+{
+ struct tcphdr *th = (struct tcphdr *)skb->data;
+ unsigned int eaten = skb->len;
+ int err;
+
+ err = sk_filter_trim_cap(sk, skb, th->doff * 4);
+ if (!err) {
+ eaten -= skb->len;
+ TCP_SKB_CB(skb)->end_seq -= eaten;
+ }
+ return err;
+}
+EXPORT_SYMBOL(tcp_filter);
+
/*
* From tcp_input.c
*/
@@ -1676,8 +1691,10 @@ process:
nf_reset(skb);
- if (sk_filter(sk, skb))
+ if (tcp_filter(sk, skb))
goto discard_and_relse;
+ th = (const struct tcphdr *)skb->data;
+ iph = ip_hdr(skb);
skb->dev = NULL;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index d123d68f4d1d..5bab6c3f7a2f 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1455,7 +1455,7 @@ static void udp_v4_rehash(struct sock *sk)
udp_lib_rehash(sk, new_hash);
}
-static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
int rc;
@@ -1652,10 +1652,10 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
if (use_hash2) {
hash2_any = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum) &
- udp_table.mask;
- hash2 = udp4_portaddr_hash(net, daddr, hnum) & udp_table.mask;
+ udptable->mask;
+ hash2 = udp4_portaddr_hash(net, daddr, hnum) & udptable->mask;
start_lookup:
- hslot = &udp_table.hash2[hash2];
+ hslot = &udptable->hash2[hash2];
offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
}
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index 7e0fe4bdd967..feb50a16398d 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h
@@ -25,7 +25,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
int flags, int *addr_len);
int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
int flags);
-int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
+int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
void udp_destroy_sock(struct sock *sk);
#ifdef CONFIG_PROC_FS
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index af817158d830..ff450c2aad9b 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -50,7 +50,7 @@ struct proto udplite_prot = {
.sendmsg = udp_sendmsg,
.recvmsg = udp_recvmsg,
.sendpage = udp_sendpage,
- .backlog_rcv = udp_queue_rcv_skb,
+ .backlog_rcv = __udp_queue_rcv_skb,
.hash = udp_lib_hash,
.unhash = udp_lib_unhash,
.get_port = udp_v4_get_port,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 060dd9922018..4bc5ba3ae452 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -183,7 +183,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
static void addrconf_dad_start(struct inet6_ifaddr *ifp);
static void addrconf_dad_work(struct work_struct *w);
-static void addrconf_dad_completed(struct inet6_ifaddr *ifp);
+static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id);
static void addrconf_dad_run(struct inet6_dev *idev);
static void addrconf_rs_timer(unsigned long data);
static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
@@ -2898,6 +2898,7 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
spin_lock_bh(&ifp->lock);
ifp->flags &= ~IFA_F_TENTATIVE;
spin_unlock_bh(&ifp->lock);
+ rt_genid_bump_ipv6(dev_net(idev->dev));
ipv6_ifa_notify(RTM_NEWADDR, ifp);
in6_ifa_put(ifp);
}
@@ -3740,7 +3741,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
{
struct inet6_dev *idev = ifp->idev;
struct net_device *dev = idev->dev;
- bool notify = false;
+ bool bump_id, notify = false;
addrconf_join_solict(dev, &ifp->addr);
@@ -3755,11 +3756,12 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
idev->cnf.accept_dad < 1 ||
!(ifp->flags&IFA_F_TENTATIVE) ||
ifp->flags & IFA_F_NODAD) {
+ bump_id = ifp->flags & IFA_F_TENTATIVE;
ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
spin_unlock(&ifp->lock);
read_unlock_bh(&idev->lock);
- addrconf_dad_completed(ifp);
+ addrconf_dad_completed(ifp, bump_id);
return;
}
@@ -3819,8 +3821,8 @@ static void addrconf_dad_work(struct work_struct *w)
struct inet6_ifaddr,
dad_work);
struct inet6_dev *idev = ifp->idev;
+ bool bump_id, disable_ipv6 = false;
struct in6_addr mcaddr;
- bool disable_ipv6 = false;
enum {
DAD_PROCESS,
@@ -3890,11 +3892,12 @@ static void addrconf_dad_work(struct work_struct *w)
* DAD was successful
*/
+ bump_id = ifp->flags & IFA_F_TENTATIVE;
ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
spin_unlock(&ifp->lock);
write_unlock_bh(&idev->lock);
- addrconf_dad_completed(ifp);
+ addrconf_dad_completed(ifp, bump_id);
goto out;
}
@@ -3931,7 +3934,7 @@ static bool ipv6_lonely_lladdr(struct inet6_ifaddr *ifp)
return true;
}
-static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
+static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id)
{
struct net_device *dev = ifp->idev->dev;
struct in6_addr lladdr;
@@ -3983,6 +3986,9 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
spin_unlock(&ifp->lock);
write_unlock_bh(&ifp->idev->lock);
}
+
+ if (bump_id)
+ rt_genid_bump_ipv6(dev_net(dev));
}
static void addrconf_dad_run(struct inet6_dev *idev)
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 37874e2f30ed..ccf40550c475 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -139,7 +139,8 @@ void ip6_datagram_release_cb(struct sock *sk)
}
EXPORT_SYMBOL_GPL(ip6_datagram_release_cb);
-static int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr,
+ int addr_len)
{
struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
struct inet_sock *inet = inet_sk(sk);
@@ -252,6 +253,7 @@ ipv4_connected:
out:
return err;
}
+EXPORT_SYMBOL_GPL(__ip6_datagram_connect);
int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 060a60b2f8a6..111ba55fd512 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -418,7 +418,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
esph = (void *)skb_push(skb, 4);
*seqhi = esph->spi;
esph->spi = esph->seq_no;
- esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.input.hi);
+ esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi;
aead_request_set_callback(req, 0, esp_input_done_esn, skb);
}
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index bd59c343d35f..2772004ba5a1 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -447,8 +447,10 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
if (__ipv6_addr_needs_scope_id(addr_type))
iif = skb->dev->ifindex;
- else
- iif = l3mdev_master_ifindex(skb->dev);
+ else {
+ dst = skb_dst(skb);
+ iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
+ }
/*
* Must not send error if the source does not uniquely
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 1fcf61f1cbc3..89c59e656f44 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -99,7 +99,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
segs = ops->callbacks.gso_segment(skb, features);
}
- if (IS_ERR(segs))
+ if (IS_ERR_OR_NULL(segs))
goto out;
gso_partial = !!(skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 6001e781164e..59eb4ed99ce8 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1366,7 +1366,7 @@ emsgsize:
if (((length > mtu) ||
(skb && skb_is_gso(skb))) &&
(sk->sk_protocol == IPPROTO_UDP) &&
- (rt->dst.dev->features & NETIF_F_UFO) &&
+ (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
(sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) {
err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
hh_len, fragheaderlen, exthdrlen,
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 87784560dc46..d76674efe523 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1034,6 +1034,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
int mtu;
unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen;
unsigned int max_headroom = psh_hlen;
+ bool use_cache = false;
u8 hop_limit;
int err = -1;
@@ -1066,7 +1067,15 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
neigh_release(neigh);
- } else if (!fl6->flowi6_mark)
+ } else if (!(t->parms.flags &
+ (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) {
+ /* enable the cache only only if the routing decision does
+ * not depend on the current inner header value
+ */
+ use_cache = true;
+ }
+
+ if (use_cache)
dst = dst_cache_get(&t->dst_cache);
if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr))
@@ -1150,7 +1159,7 @@ route_lookup:
if (t->encap.type != TUNNEL_ENCAP_NONE)
goto tx_err_dst_release;
} else {
- if (!fl6->flowi6_mark && ndst)
+ if (use_cache && ndst)
dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr);
}
skb_dst_set(skb, dst);
@@ -1172,7 +1181,6 @@ route_lookup:
if (err)
return err;
- skb->protocol = htons(ETH_P_IPV6);
skb_push(skb, sizeof(struct ipv6hdr));
skb_reset_network_header(skb);
ipv6h = ipv6_hdr(skb);
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
index a7520528ecd2..b283f293ee4a 100644
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -88,9 +88,6 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
uh->len = htons(skb->len);
- memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
- IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED
- | IPSKB_REROUTED);
skb_dst_set(skb, dst);
udp6_set_csum(nocheck, skb, saddr, daddr, skb->len);
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 8a02ca8a11af..c299c1e2bbf0 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -1138,6 +1138,33 @@ static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = {
.priority = 100,
};
+static bool is_vti6_tunnel(const struct net_device *dev)
+{
+ return dev->netdev_ops == &vti6_netdev_ops;
+}
+
+static int vti6_device_event(struct notifier_block *unused,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct ip6_tnl *t = netdev_priv(dev);
+
+ if (!is_vti6_tunnel(dev))
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case NETDEV_DOWN:
+ if (!net_eq(t->net, dev_net(dev)))
+ xfrm_garbage_collect(t->net);
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block vti6_notifier_block __read_mostly = {
+ .notifier_call = vti6_device_event,
+};
+
/**
* vti6_tunnel_init - register protocol and reserve needed resources
*
@@ -1148,6 +1175,8 @@ static int __init vti6_tunnel_init(void)
const char *msg;
int err;
+ register_netdevice_notifier(&vti6_notifier_block);
+
msg = "tunnel device";
err = register_pernet_device(&vti6_net_ops);
if (err < 0)
@@ -1180,6 +1209,7 @@ xfrm_proto_ah_failed:
xfrm_proto_esp_failed:
unregister_pernet_device(&vti6_net_ops);
pernet_dev_failed:
+ unregister_netdevice_notifier(&vti6_notifier_block);
pr_err("vti6 init: failed to register %s\n", msg);
return err;
}
@@ -1194,6 +1224,7 @@ static void __exit vti6_tunnel_cleanup(void)
xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH);
xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
unregister_pernet_device(&vti6_net_ops);
+ unregister_netdevice_notifier(&vti6_notifier_block);
}
module_init(vti6_tunnel_init);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index e4347aeb2e65..9948b5ce52da 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -576,11 +576,11 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
/* Jumbo payload inhibits frag. header */
if (ipv6_hdr(skb)->payload_len == 0) {
pr_debug("payload len = 0\n");
- return -EINVAL;
+ return 0;
}
if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0)
- return -EINVAL;
+ return 0;
if (!pskb_may_pull(skb, fhoff + sizeof(*fhdr)))
return -ENOMEM;
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index f7aab5ab93a5..f06b0471f39f 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -69,7 +69,7 @@ static unsigned int ipv6_defrag(void *priv,
if (err == -EINPROGRESS)
return NF_STOLEN;
- return NF_ACCEPT;
+ return err == 0 ? NF_ACCEPT : NF_DROP;
}
static struct nf_hook_ops ipv6_defrag_ops[] = {
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index a5400223fd74..10090400c72f 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -156,6 +156,7 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
fl6.daddr = oip6h->saddr;
fl6.fl6_sport = otcph->dest;
fl6.fl6_dport = otcph->source;
+ fl6.flowi6_oif = l3mdev_master_ifindex(skb_dst(oldskb)->dev);
security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6));
dst = ip6_route_output(net, NULL, &fl6);
if (dst->error) {
diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c
index 8bfd470cbe72..831f86e1ec08 100644
--- a/net/ipv6/netfilter/nft_dup_ipv6.c
+++ b/net/ipv6/netfilter/nft_dup_ipv6.c
@@ -26,7 +26,7 @@ static void nft_dup_ipv6_eval(const struct nft_expr *expr,
{
struct nft_dup_ipv6 *priv = nft_expr_priv(expr);
struct in6_addr *gw = (struct in6_addr *)&regs->data[priv->sreg_addr];
- int oif = regs->data[priv->sreg_dev];
+ int oif = priv->sreg_dev ? regs->data[priv->sreg_dev] : -1;
nf_dup_ipv6(pkt->net, pkt->skb, pkt->hook, gw, oif);
}
@@ -57,7 +57,9 @@ static int nft_dup_ipv6_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
struct nft_dup_ipv6 *priv = nft_expr_priv(expr);
- if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr) ||
+ if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr))
+ goto nla_put_failure;
+ if (priv->sreg_dev &&
nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev))
goto nla_put_failure;
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 7cca8ac66fe9..cd4252346a32 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -155,6 +155,8 @@ int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
if (unlikely(!skb))
return 0;
+ skb->protocol = htons(ETH_P_IPV6);
+
return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
net, sk, skb, NULL, skb_dst(skb)->dev,
dst_output);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 947ed1ded026..1b57e11e6e0d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1364,6 +1364,9 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
if (rt6->rt6i_flags & RTF_LOCAL)
return;
+ if (dst_metric_locked(dst, RTAX_MTU))
+ return;
+
dst_confirm(dst);
mtu = max_t(u32, mtu, IPV6_MIN_MTU);
if (mtu >= dst_mtu(dst))
@@ -2758,6 +2761,7 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
PMTU discouvery.
*/
if (rt->dst.dev == arg->dev &&
+ dst_metric_raw(&rt->dst, RTAX_MTU) &&
!dst_metric_locked(&rt->dst, RTAX_MTU)) {
if (rt->rt6i_flags & RTF_CACHE) {
/* For RTF_CACHE with rt6i_pmtu == 0
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5a27ab4eab39..b9f1fee9a886 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -818,8 +818,12 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
fl6.flowi6_proto = IPPROTO_TCP;
if (rt6_need_strict(&fl6.daddr) && !oif)
fl6.flowi6_oif = tcp_v6_iif(skb);
- else
- fl6.flowi6_oif = oif ? : skb->skb_iif;
+ else {
+ if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
+ oif = skb->skb_iif;
+
+ fl6.flowi6_oif = oif;
+ }
fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
fl6.fl6_dport = t1->dest;
@@ -1225,7 +1229,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
if (skb->protocol == htons(ETH_P_IP))
return tcp_v4_do_rcv(sk, skb);
- if (sk_filter(sk, skb))
+ if (tcp_filter(sk, skb))
goto discard;
/*
@@ -1453,8 +1457,10 @@ process:
if (tcp_v6_inbound_md5_hash(sk, skb))
goto discard_and_relse;
- if (sk_filter(sk, skb))
+ if (tcp_filter(sk, skb))
goto discard_and_relse;
+ th = (const struct tcphdr *)skb->data;
+ hdr = ipv6_hdr(skb);
skb->dev = NULL;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index b2ef061e6836..e4a8000d59ad 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -514,7 +514,7 @@ out:
return;
}
-static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
int rc;
@@ -706,10 +706,10 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
if (use_hash2) {
hash2_any = udp6_portaddr_hash(net, &in6addr_any, hnum) &
- udp_table.mask;
- hash2 = udp6_portaddr_hash(net, daddr, hnum) & udp_table.mask;
+ udptable->mask;
+ hash2 = udp6_portaddr_hash(net, daddr, hnum) & udptable->mask;
start_lookup:
- hslot = &udp_table.hash2[hash2];
+ hslot = &udptable->hash2[hash2];
offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
}
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index f6eb1ab34f4b..e78bdc76dcc3 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -26,7 +26,7 @@ int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
int flags, int *addr_len);
-int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
+int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
void udpv6_destroy_sock(struct sock *sk);
#ifdef CONFIG_PROC_FS
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 47d0d2b87106..2f5101a12283 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -45,7 +45,7 @@ struct proto udplitev6_prot = {
.getsockopt = udpv6_getsockopt,
.sendmsg = udpv6_sendmsg,
.recvmsg = udpv6_recvmsg,
- .backlog_rcv = udpv6_queue_rcv_skb,
+ .backlog_rcv = __udpv6_queue_rcv_skb,
.hash = udp_lib_hash,
.unhash = udp_lib_unhash,
.get_port = udp_v6_get_port,
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index fce25afb652a..8938b6ba57a0 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -61,7 +61,8 @@ static struct sock *__l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif
if ((l2tp->conn_id == tunnel_id) &&
net_eq(sock_net(sk), net) &&
!(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) &&
- !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
+ (!sk->sk_bound_dev_if || !dif ||
+ sk->sk_bound_dev_if == dif))
goto found;
}
@@ -182,15 +183,17 @@ pass_up:
struct iphdr *iph = (struct iphdr *) skb_network_header(skb);
read_lock_bh(&l2tp_ip_lock);
- sk = __l2tp_ip_bind_lookup(net, iph->daddr, 0, tunnel_id);
+ sk = __l2tp_ip_bind_lookup(net, iph->daddr, inet_iif(skb),
+ tunnel_id);
+ if (!sk) {
+ read_unlock_bh(&l2tp_ip_lock);
+ goto discard;
+ }
+
+ sock_hold(sk);
read_unlock_bh(&l2tp_ip_lock);
}
- if (sk == NULL)
- goto discard;
-
- sock_hold(sk);
-
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_put;
@@ -251,22 +254,17 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
int ret;
int chk_addr_ret;
- if (!sock_flag(sk, SOCK_ZAPPED))
- return -EINVAL;
if (addr_len < sizeof(struct sockaddr_l2tpip))
return -EINVAL;
if (addr->l2tp_family != AF_INET)
return -EINVAL;
- ret = -EADDRINUSE;
- read_lock_bh(&l2tp_ip_lock);
- if (__l2tp_ip_bind_lookup(net, addr->l2tp_addr.s_addr,
- sk->sk_bound_dev_if, addr->l2tp_conn_id))
- goto out_in_use;
+ lock_sock(sk);
- read_unlock_bh(&l2tp_ip_lock);
+ ret = -EINVAL;
+ if (!sock_flag(sk, SOCK_ZAPPED))
+ goto out;
- lock_sock(sk);
if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_l2tpip))
goto out;
@@ -280,14 +278,22 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_rcv_saddr = inet->inet_saddr = addr->l2tp_addr.s_addr;
if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
inet->inet_saddr = 0; /* Use device */
- sk_dst_reset(sk);
+ write_lock_bh(&l2tp_ip_lock);
+ if (__l2tp_ip_bind_lookup(net, addr->l2tp_addr.s_addr,
+ sk->sk_bound_dev_if, addr->l2tp_conn_id)) {
+ write_unlock_bh(&l2tp_ip_lock);
+ ret = -EADDRINUSE;
+ goto out;
+ }
+
+ sk_dst_reset(sk);
l2tp_ip_sk(sk)->conn_id = addr->l2tp_conn_id;
- write_lock_bh(&l2tp_ip_lock);
sk_add_bind_node(sk, &l2tp_ip_bind_table);
sk_del_node_init(sk);
write_unlock_bh(&l2tp_ip_lock);
+
ret = 0;
sock_reset_flag(sk, SOCK_ZAPPED);
@@ -295,11 +301,6 @@ out:
release_sock(sk);
return ret;
-
-out_in_use:
- read_unlock_bh(&l2tp_ip_lock);
-
- return ret;
}
static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
@@ -307,21 +308,24 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *) uaddr;
int rc;
- if (sock_flag(sk, SOCK_ZAPPED)) /* Must bind first - autobinding does not work */
- return -EINVAL;
-
if (addr_len < sizeof(*lsa))
return -EINVAL;
if (ipv4_is_multicast(lsa->l2tp_addr.s_addr))
return -EINVAL;
- rc = ip4_datagram_connect(sk, uaddr, addr_len);
- if (rc < 0)
- return rc;
-
lock_sock(sk);
+ /* Must bind first - autobinding does not work */
+ if (sock_flag(sk, SOCK_ZAPPED)) {
+ rc = -EINVAL;
+ goto out_sk;
+ }
+
+ rc = __ip4_datagram_connect(sk, uaddr, addr_len);
+ if (rc < 0)
+ goto out_sk;
+
l2tp_ip_sk(sk)->peer_conn_id = lsa->l2tp_conn_id;
write_lock_bh(&l2tp_ip_lock);
@@ -329,7 +333,9 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
sk_add_bind_node(sk, &l2tp_ip_bind_table);
write_unlock_bh(&l2tp_ip_lock);
+out_sk:
release_sock(sk);
+
return rc;
}
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index ad3468c32b53..aa821cb639e5 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -72,8 +72,9 @@ static struct sock *__l2tp_ip6_bind_lookup(struct net *net,
if ((l2tp->conn_id == tunnel_id) &&
net_eq(sock_net(sk), net) &&
- !(addr && ipv6_addr_equal(addr, laddr)) &&
- !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
+ (!addr || ipv6_addr_equal(addr, laddr)) &&
+ (!sk->sk_bound_dev_if || !dif ||
+ sk->sk_bound_dev_if == dif))
goto found;
}
@@ -196,16 +197,17 @@ pass_up:
struct ipv6hdr *iph = ipv6_hdr(skb);
read_lock_bh(&l2tp_ip6_lock);
- sk = __l2tp_ip6_bind_lookup(net, &iph->daddr,
- 0, tunnel_id);
+ sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, inet6_iif(skb),
+ tunnel_id);
+ if (!sk) {
+ read_unlock_bh(&l2tp_ip6_lock);
+ goto discard;
+ }
+
+ sock_hold(sk);
read_unlock_bh(&l2tp_ip6_lock);
}
- if (sk == NULL)
- goto discard;
-
- sock_hold(sk);
-
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_put;
@@ -266,11 +268,10 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
struct sockaddr_l2tpip6 *addr = (struct sockaddr_l2tpip6 *) uaddr;
struct net *net = sock_net(sk);
__be32 v4addr = 0;
+ int bound_dev_if;
int addr_type;
int err;
- if (!sock_flag(sk, SOCK_ZAPPED))
- return -EINVAL;
if (addr->l2tp_family != AF_INET6)
return -EINVAL;
if (addr_len < sizeof(*addr))
@@ -286,41 +287,34 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (addr_type & IPV6_ADDR_MULTICAST)
return -EADDRNOTAVAIL;
- err = -EADDRINUSE;
- read_lock_bh(&l2tp_ip6_lock);
- if (__l2tp_ip6_bind_lookup(net, &addr->l2tp_addr,
- sk->sk_bound_dev_if, addr->l2tp_conn_id))
- goto out_in_use;
- read_unlock_bh(&l2tp_ip6_lock);
-
lock_sock(sk);
err = -EINVAL;
+ if (!sock_flag(sk, SOCK_ZAPPED))
+ goto out_unlock;
+
if (sk->sk_state != TCP_CLOSE)
goto out_unlock;
+ bound_dev_if = sk->sk_bound_dev_if;
+
/* Check if the address belongs to the host. */
rcu_read_lock();
if (addr_type != IPV6_ADDR_ANY) {
struct net_device *dev = NULL;
if (addr_type & IPV6_ADDR_LINKLOCAL) {
- if (addr_len >= sizeof(struct sockaddr_in6) &&
- addr->l2tp_scope_id) {
- /* Override any existing binding, if another
- * one is supplied by user.
- */
- sk->sk_bound_dev_if = addr->l2tp_scope_id;
- }
+ if (addr->l2tp_scope_id)
+ bound_dev_if = addr->l2tp_scope_id;
/* Binding to link-local address requires an
- interface */
- if (!sk->sk_bound_dev_if)
+ * interface.
+ */
+ if (!bound_dev_if)
goto out_unlock_rcu;
err = -ENODEV;
- dev = dev_get_by_index_rcu(sock_net(sk),
- sk->sk_bound_dev_if);
+ dev = dev_get_by_index_rcu(sock_net(sk), bound_dev_if);
if (!dev)
goto out_unlock_rcu;
}
@@ -335,13 +329,22 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
}
rcu_read_unlock();
- inet->inet_rcv_saddr = inet->inet_saddr = v4addr;
+ write_lock_bh(&l2tp_ip6_lock);
+ if (__l2tp_ip6_bind_lookup(net, &addr->l2tp_addr, bound_dev_if,
+ addr->l2tp_conn_id)) {
+ write_unlock_bh(&l2tp_ip6_lock);
+ err = -EADDRINUSE;
+ goto out_unlock;
+ }
+
+ inet->inet_saddr = v4addr;
+ inet->inet_rcv_saddr = v4addr;
+ sk->sk_bound_dev_if = bound_dev_if;
sk->sk_v6_rcv_saddr = addr->l2tp_addr;
np->saddr = addr->l2tp_addr;
l2tp_ip6_sk(sk)->conn_id = addr->l2tp_conn_id;
- write_lock_bh(&l2tp_ip6_lock);
sk_add_bind_node(sk, &l2tp_ip6_bind_table);
sk_del_node_init(sk);
write_unlock_bh(&l2tp_ip6_lock);
@@ -354,10 +357,7 @@ out_unlock_rcu:
rcu_read_unlock();
out_unlock:
release_sock(sk);
- return err;
-out_in_use:
- read_unlock_bh(&l2tp_ip6_lock);
return err;
}
@@ -370,9 +370,6 @@ static int l2tp_ip6_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_type;
int rc;
- if (sock_flag(sk, SOCK_ZAPPED)) /* Must bind first - autobinding does not work */
- return -EINVAL;
-
if (addr_len < sizeof(*lsa))
return -EINVAL;
@@ -389,10 +386,18 @@ static int l2tp_ip6_connect(struct sock *sk, struct sockaddr *uaddr,
return -EINVAL;
}
- rc = ip6_datagram_connect(sk, uaddr, addr_len);
-
lock_sock(sk);
+ /* Must bind first - autobinding does not work */
+ if (sock_flag(sk, SOCK_ZAPPED)) {
+ rc = -EINVAL;
+ goto out_sk;
+ }
+
+ rc = __ip6_datagram_connect(sk, uaddr, addr_len);
+ if (rc < 0)
+ goto out_sk;
+
l2tp_ip6_sk(sk)->peer_conn_id = lsa->l2tp_conn_id;
write_lock_bh(&l2tp_ip6_lock);
@@ -400,6 +405,7 @@ static int l2tp_ip6_connect(struct sock *sk, struct sockaddr *uaddr,
sk_add_bind_node(sk, &l2tp_ip6_bind_table);
write_unlock_bh(&l2tp_ip6_lock);
+out_sk:
release_sock(sk);
return rc;
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 78e9ecbc96e6..8e05032689f0 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -688,7 +688,7 @@ static void __sta_info_recalc_tim(struct sta_info *sta, bool ignore_pending)
}
/* No need to do anything if the driver does all */
- if (!local->ops->set_tim)
+ if (ieee80211_hw_check(&local->hw, AP_LINK_PS))
return;
if (sta->dead)
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 1c56abc49627..bd5f4be89435 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1501,7 +1501,6 @@ static bool ieee80211_queue_skb(struct ieee80211_local *local,
struct sta_info *sta,
struct sk_buff *skb)
{
- struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
struct fq *fq = &local->fq;
struct ieee80211_vif *vif;
struct txq_info *txqi;
@@ -1526,8 +1525,6 @@ static bool ieee80211_queue_skb(struct ieee80211_local *local,
if (!txqi)
return false;
- info->control.vif = vif;
-
spin_lock_bh(&fq->lock);
ieee80211_txq_enqueue(local, txqi, skb);
spin_unlock_bh(&fq->lock);
@@ -3213,7 +3210,6 @@ static void ieee80211_xmit_fast_finish(struct ieee80211_sub_if_data *sdata,
if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) {
tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
- *ieee80211_get_qos_ctl(hdr) = tid;
hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid);
} else {
info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
@@ -3338,6 +3334,11 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
(tid_tx ? IEEE80211_TX_CTL_AMPDU : 0);
info->control.flags = IEEE80211_TX_CTRL_FAST_XMIT;
+ if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) {
+ tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
+ *ieee80211_get_qos_ctl(hdr) = tid;
+ }
+
__skb_queue_head_init(&tx.skbs);
tx.flags = IEEE80211_TX_UNICAST;
@@ -3426,6 +3427,11 @@ begin:
goto begin;
}
+ if (test_bit(IEEE80211_TXQ_AMPDU, &txqi->flags))
+ info->flags |= IEEE80211_TX_CTL_AMPDU;
+ else
+ info->flags &= ~IEEE80211_TX_CTL_AMPDU;
+
if (info->control.flags & IEEE80211_TX_CTRL_FAST_XMIT) {
struct sta_info *sta = container_of(txq->sta, struct sta_info,
sta);
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index ee715764a828..6832bf6ab69f 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -270,6 +270,22 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
vht_cap->vht_mcs.tx_mcs_map |= cpu_to_le16(peer_tx << i * 2);
}
+ /*
+ * This is a workaround for VHT-enabled STAs which break the spec
+ * and have the VHT-MCS Rx map filled in with value 3 for all eight
+ * spacial streams, an example is AR9462.
+ *
+ * As per spec, in section 22.1.1 Introduction to the VHT PHY
+ * A VHT STA shall support at least single spactial stream VHT-MCSs
+ * 0 to 7 (transmit and receive) in all supported channel widths.
+ */
+ if (vht_cap->vht_mcs.rx_mcs_map == cpu_to_le16(0xFFFF)) {
+ vht_cap->vht_supported = false;
+ sdata_info(sdata, "Ignoring VHT IE from %pM due to invalid rx_mcs_map\n",
+ sta->addr);
+ return;
+ }
+
/* finally set up the bandwidth */
switch (vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) {
case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ:
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index c3c809b2e712..a6e44ef2ec9a 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2845,7 +2845,7 @@ static struct genl_family ip_vs_genl_family = {
.hdrsize = 0,
.name = IPVS_GENL_NAME,
.version = IPVS_GENL_VERSION,
- .maxattr = IPVS_CMD_MAX,
+ .maxattr = IPVS_CMD_ATTR_MAX,
.netnsok = true, /* Make ipvsadm to work on netns */
};
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 1b07578bedf3..9350530c16c1 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -283,6 +283,7 @@ struct ip_vs_sync_buff {
*/
static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho)
{
+ memset(ho, 0, sizeof(*ho));
ho->init_seq = get_unaligned_be32(&no->init_seq);
ho->delta = get_unaligned_be32(&no->delta);
ho->previous_delta = get_unaligned_be32(&no->previous_delta);
@@ -917,8 +918,10 @@ static void ip_vs_proc_conn(struct netns_ipvs *ipvs, struct ip_vs_conn_param *pa
kfree(param->pe_data);
}
- if (opt)
- memcpy(&cp->in_seq, opt, sizeof(*opt));
+ if (opt) {
+ cp->in_seq = opt->in_seq;
+ cp->out_seq = opt->out_seq;
+ }
atomic_set(&cp->in_pkts, sysctl_sync_threshold(ipvs));
cp->state = state;
cp->old_state = cp->state;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index df2f5a3901df..0f87e5d21be7 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -76,6 +76,7 @@ struct conntrack_gc_work {
struct delayed_work dwork;
u32 last_bucket;
bool exiting;
+ long next_gc_run;
};
static __read_mostly struct kmem_cache *nf_conntrack_cachep;
@@ -83,9 +84,11 @@ static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
static __read_mostly bool nf_conntrack_locks_all;
+/* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */
#define GC_MAX_BUCKETS_DIV 64u
-#define GC_MAX_BUCKETS 8192u
-#define GC_INTERVAL (5 * HZ)
+/* upper bound of scan intervals */
+#define GC_INTERVAL_MAX (2 * HZ)
+/* maximum conntracks to evict per gc run */
#define GC_MAX_EVICTS 256u
static struct conntrack_gc_work conntrack_gc_work;
@@ -936,13 +939,13 @@ static noinline int early_drop(struct net *net, unsigned int _hash)
static void gc_worker(struct work_struct *work)
{
unsigned int i, goal, buckets = 0, expired_count = 0;
- unsigned long next_run = GC_INTERVAL;
- unsigned int ratio, scanned = 0;
struct conntrack_gc_work *gc_work;
+ unsigned int ratio, scanned = 0;
+ unsigned long next_run;
gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
- goal = min(nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV, GC_MAX_BUCKETS);
+ goal = nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV;
i = gc_work->last_bucket;
do {
@@ -982,17 +985,47 @@ static void gc_worker(struct work_struct *work)
if (gc_work->exiting)
return;
+ /*
+ * Eviction will normally happen from the packet path, and not
+ * from this gc worker.
+ *
+ * This worker is only here to reap expired entries when system went
+ * idle after a busy period.
+ *
+ * The heuristics below are supposed to balance conflicting goals:
+ *
+ * 1. Minimize time until we notice a stale entry
+ * 2. Maximize scan intervals to not waste cycles
+ *
+ * Normally, expired_count will be 0, this increases the next_run time
+ * to priorize 2) above.
+ *
+ * As soon as a timed-out entry is found, move towards 1) and increase
+ * the scan frequency.
+ * In case we have lots of evictions next scan is done immediately.
+ */
ratio = scanned ? expired_count * 100 / scanned : 0;
- if (ratio >= 90 || expired_count == GC_MAX_EVICTS)
+ if (ratio >= 90 || expired_count == GC_MAX_EVICTS) {
+ gc_work->next_gc_run = 0;
next_run = 0;
+ } else if (expired_count) {
+ gc_work->next_gc_run /= 2U;
+ next_run = msecs_to_jiffies(1);
+ } else {
+ if (gc_work->next_gc_run < GC_INTERVAL_MAX)
+ gc_work->next_gc_run += msecs_to_jiffies(1);
+
+ next_run = gc_work->next_gc_run;
+ }
gc_work->last_bucket = i;
- schedule_delayed_work(&gc_work->dwork, next_run);
+ queue_delayed_work(system_long_wq, &gc_work->dwork, next_run);
}
static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
{
INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
+ gc_work->next_gc_run = GC_INTERVAL_MAX;
gc_work->exiting = false;
}
@@ -1885,7 +1918,7 @@ int nf_conntrack_init_start(void)
nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
conntrack_gc_work_init(&conntrack_gc_work);
- schedule_delayed_work(&conntrack_gc_work.dwork, GC_INTERVAL);
+ queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, GC_INTERVAL_MAX);
return 0;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 336e21559e01..7341adf7059d 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -138,9 +138,14 @@ __nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum)
for (i = 0; i < nf_ct_helper_hsize; i++) {
hlist_for_each_entry_rcu(h, &nf_ct_helper_hash[i], hnode) {
- if (!strcmp(h->name, name) &&
- h->tuple.src.l3num == l3num &&
- h->tuple.dst.protonum == protonum)
+ if (strcmp(h->name, name))
+ continue;
+
+ if (h->tuple.src.l3num != NFPROTO_UNSPEC &&
+ h->tuple.src.l3num != l3num)
+ continue;
+
+ if (h->tuple.dst.protonum == protonum)
return h;
}
}
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 621b81c7bddc..c3fc14e021ec 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -1436,9 +1436,12 @@ static int process_sip_request(struct sk_buff *skb, unsigned int protoff,
handler = &sip_handlers[i];
if (handler->request == NULL)
continue;
- if (*datalen < handler->len ||
+ if (*datalen < handler->len + 2 ||
strncasecmp(*dptr, handler->method, handler->len))
continue;
+ if ((*dptr)[handler->len] != ' ' ||
+ !isalpha((*dptr)[handler->len+1]))
+ continue;
if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CSEQ,
&matchoff, &matchlen) <= 0) {
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index bbb8f3df79f7..5b9c884a452e 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -42,7 +42,7 @@ struct nf_nat_conn_key {
const struct nf_conntrack_zone *zone;
};
-static struct rhashtable nf_nat_bysource_table;
+static struct rhltable nf_nat_bysource_table;
inline const struct nf_nat_l3proto *
__nf_nat_l3proto_find(u8 family)
@@ -193,9 +193,12 @@ static int nf_nat_bysource_cmp(struct rhashtable_compare_arg *arg,
const struct nf_nat_conn_key *key = arg->key;
const struct nf_conn *ct = obj;
- return same_src(ct, key->tuple) &&
- net_eq(nf_ct_net(ct), key->net) &&
- nf_ct_zone_equal(ct, key->zone, IP_CT_DIR_ORIGINAL);
+ if (!same_src(ct, key->tuple) ||
+ !net_eq(nf_ct_net(ct), key->net) ||
+ !nf_ct_zone_equal(ct, key->zone, IP_CT_DIR_ORIGINAL))
+ return 1;
+
+ return 0;
}
static struct rhashtable_params nf_nat_bysource_params = {
@@ -204,7 +207,6 @@ static struct rhashtable_params nf_nat_bysource_params = {
.obj_cmpfn = nf_nat_bysource_cmp,
.nelem_hint = 256,
.min_size = 1024,
- .nulls_base = (1U << RHT_BASE_SHIFT),
};
/* Only called for SRC manip */
@@ -223,12 +225,15 @@ find_appropriate_src(struct net *net,
.tuple = tuple,
.zone = zone
};
+ struct rhlist_head *hl;
- ct = rhashtable_lookup_fast(&nf_nat_bysource_table, &key,
- nf_nat_bysource_params);
- if (!ct)
+ hl = rhltable_lookup(&nf_nat_bysource_table, &key,
+ nf_nat_bysource_params);
+ if (!hl)
return 0;
+ ct = container_of(hl, typeof(*ct), nat_bysource);
+
nf_ct_invert_tuplepr(result,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
result->dst = tuple->dst;
@@ -446,11 +451,17 @@ nf_nat_setup_info(struct nf_conn *ct,
}
if (maniptype == NF_NAT_MANIP_SRC) {
+ struct nf_nat_conn_key key = {
+ .net = nf_ct_net(ct),
+ .tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+ .zone = nf_ct_zone(ct),
+ };
int err;
- err = rhashtable_insert_fast(&nf_nat_bysource_table,
- &ct->nat_bysource,
- nf_nat_bysource_params);
+ err = rhltable_insert_key(&nf_nat_bysource_table,
+ &key,
+ &ct->nat_bysource,
+ nf_nat_bysource_params);
if (err)
return NF_DROP;
}
@@ -567,8 +578,8 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
* will delete entry from already-freed table.
*/
ct->status &= ~IPS_NAT_DONE_MASK;
- rhashtable_remove_fast(&nf_nat_bysource_table, &ct->nat_bysource,
- nf_nat_bysource_params);
+ rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource,
+ nf_nat_bysource_params);
/* don't delete conntrack. Although that would make things a lot
* simpler, we'd end up flushing all conntracks on nat rmmod.
@@ -698,8 +709,8 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
if (!nat)
return;
- rhashtable_remove_fast(&nf_nat_bysource_table, &ct->nat_bysource,
- nf_nat_bysource_params);
+ rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource,
+ nf_nat_bysource_params);
}
static struct nf_ct_ext_type nat_extend __read_mostly = {
@@ -834,13 +845,13 @@ static int __init nf_nat_init(void)
{
int ret;
- ret = rhashtable_init(&nf_nat_bysource_table, &nf_nat_bysource_params);
+ ret = rhltable_init(&nf_nat_bysource_table, &nf_nat_bysource_params);
if (ret)
return ret;
ret = nf_ct_extend_register(&nat_extend);
if (ret < 0) {
- rhashtable_destroy(&nf_nat_bysource_table);
+ rhltable_destroy(&nf_nat_bysource_table);
printk(KERN_ERR "nf_nat_core: Unable to register extension\n");
return ret;
}
@@ -864,7 +875,7 @@ static int __init nf_nat_init(void)
return 0;
cleanup_extend:
- rhashtable_destroy(&nf_nat_bysource_table);
+ rhltable_destroy(&nf_nat_bysource_table);
nf_ct_extend_unregister(&nat_extend);
return ret;
}
@@ -883,7 +894,7 @@ static void __exit nf_nat_cleanup(void)
for (i = 0; i < NFPROTO_NUMPROTO; i++)
kfree(nf_nat_l4protos[i]);
- rhashtable_destroy(&nf_nat_bysource_table);
+ rhltable_destroy(&nf_nat_bysource_table);
}
MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 24db22257586..e5194f6f906c 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2570,7 +2570,8 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
}
if (set->timeout &&
- nla_put_be64(skb, NFTA_SET_TIMEOUT, cpu_to_be64(set->timeout),
+ nla_put_be64(skb, NFTA_SET_TIMEOUT,
+ cpu_to_be64(jiffies_to_msecs(set->timeout)),
NFTA_SET_PAD))
goto nla_put_failure;
if (set->gc_int &&
@@ -2859,7 +2860,8 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
if (nla[NFTA_SET_TIMEOUT] != NULL) {
if (!(flags & NFT_SET_TIMEOUT))
return -EINVAL;
- timeout = be64_to_cpu(nla_get_be64(nla[NFTA_SET_TIMEOUT]));
+ timeout = msecs_to_jiffies(be64_to_cpu(nla_get_be64(
+ nla[NFTA_SET_TIMEOUT])));
}
gc_int = 0;
if (nla[NFTA_SET_GC_INTERVAL] != NULL) {
@@ -2956,12 +2958,14 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
if (err < 0)
- goto err2;
+ goto err3;
list_add_tail_rcu(&set->list, &table->sets);
table->use++;
return 0;
+err3:
+ ops->destroy(set);
err2:
kfree(set);
err1:
@@ -3176,7 +3180,8 @@ static int nf_tables_fill_setelem(struct sk_buff *skb,
if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) &&
nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT,
- cpu_to_be64(*nft_set_ext_timeout(ext)),
+ cpu_to_be64(jiffies_to_msecs(
+ *nft_set_ext_timeout(ext))),
NFTA_SET_ELEM_PAD))
goto nla_put_failure;
@@ -3445,21 +3450,22 @@ void *nft_set_elem_init(const struct nft_set *set,
memcpy(nft_set_ext_data(ext), data, set->dlen);
if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION))
*nft_set_ext_expiration(ext) =
- jiffies + msecs_to_jiffies(timeout);
+ jiffies + timeout;
if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT))
*nft_set_ext_timeout(ext) = timeout;
return elem;
}
-void nft_set_elem_destroy(const struct nft_set *set, void *elem)
+void nft_set_elem_destroy(const struct nft_set *set, void *elem,
+ bool destroy_expr)
{
struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
nft_data_uninit(nft_set_ext_key(ext), NFT_DATA_VALUE);
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
nft_data_uninit(nft_set_ext_data(ext), set->dtype);
- if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
+ if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext));
kfree(elem);
@@ -3532,7 +3538,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
if (nla[NFTA_SET_ELEM_TIMEOUT] != NULL) {
if (!(set->flags & NFT_SET_TIMEOUT))
return -EINVAL;
- timeout = be64_to_cpu(nla_get_be64(nla[NFTA_SET_ELEM_TIMEOUT]));
+ timeout = msecs_to_jiffies(be64_to_cpu(nla_get_be64(
+ nla[NFTA_SET_ELEM_TIMEOUT])));
} else if (set->flags & NFT_SET_TIMEOUT) {
timeout = set->timeout;
}
@@ -3565,6 +3572,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
dreg = nft_type_to_reg(set->dtype);
list_for_each_entry(binding, &set->bindings, list) {
struct nft_ctx bind_ctx = {
+ .net = ctx->net,
.afi = ctx->afi,
.table = ctx->table,
.chain = (struct nft_chain *)binding->chain,
@@ -3812,7 +3820,7 @@ void nft_set_gc_batch_release(struct rcu_head *rcu)
gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu);
for (i = 0; i < gcb->head.cnt; i++)
- nft_set_elem_destroy(gcb->head.set, gcb->elems[i]);
+ nft_set_elem_destroy(gcb->head.set, gcb->elems[i], true);
kfree(gcb);
}
EXPORT_SYMBOL_GPL(nft_set_gc_batch_release);
@@ -4030,7 +4038,7 @@ static void nf_tables_commit_release(struct nft_trans *trans)
break;
case NFT_MSG_DELSETELEM:
nft_set_elem_destroy(nft_trans_elem_set(trans),
- nft_trans_elem(trans).priv);
+ nft_trans_elem(trans).priv, true);
break;
}
kfree(trans);
@@ -4171,7 +4179,7 @@ static void nf_tables_abort_release(struct nft_trans *trans)
break;
case NFT_MSG_NEWSETELEM:
nft_set_elem_destroy(nft_trans_elem_set(trans),
- nft_trans_elem(trans).priv);
+ nft_trans_elem(trans).priv, true);
break;
}
kfree(trans);
@@ -4421,7 +4429,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
* Otherwise a 0 is returned and the attribute value is stored in the
* destination variable.
*/
-unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest)
+int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest)
{
u32 val;
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 517f08767a3c..31ca94793aa9 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -44,18 +44,22 @@ static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr,
&regs->data[priv->sreg_key],
&regs->data[priv->sreg_data],
timeout, GFP_ATOMIC);
- if (elem == NULL) {
- if (set->size)
- atomic_dec(&set->nelems);
- return NULL;
- }
+ if (elem == NULL)
+ goto err1;
ext = nft_set_elem_ext(set, elem);
if (priv->expr != NULL &&
nft_expr_clone(nft_set_ext_expr(ext), priv->expr) < 0)
- return NULL;
+ goto err2;
return elem;
+
+err2:
+ nft_set_elem_destroy(set, elem, false);
+err1:
+ if (set->size)
+ atomic_dec(&set->nelems);
+ return NULL;
}
static void nft_dynset_eval(const struct nft_expr *expr,
@@ -139,6 +143,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
return PTR_ERR(set);
}
+ if (set->ops->update == NULL)
+ return -EOPNOTSUPP;
+
if (set->flags & NFT_SET_CONSTANT)
return -EBUSY;
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index baf694de3935..d5447a22275c 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -53,6 +53,7 @@ static int nft_hash_init(const struct nft_ctx *ctx,
{
struct nft_hash *priv = nft_expr_priv(expr);
u32 len;
+ int err;
if (!tb[NFTA_HASH_SREG] ||
!tb[NFTA_HASH_DREG] ||
@@ -67,8 +68,10 @@ static int nft_hash_init(const struct nft_ctx *ctx,
priv->sreg = nft_parse_register(tb[NFTA_HASH_SREG]);
priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]);
- len = ntohl(nla_get_be32(tb[NFTA_HASH_LEN]));
- if (len == 0 || len > U8_MAX)
+ err = nft_parse_u32_check(tb[NFTA_HASH_LEN], U8_MAX, &len);
+ if (err < 0)
+ return err;
+ if (len == 0)
return -ERANGE;
priv->len = len;
diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c
index fbc88009ca2e..8f0aaaea1376 100644
--- a/net/netfilter/nft_range.c
+++ b/net/netfilter/nft_range.c
@@ -59,6 +59,12 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr
int err;
u32 op;
+ if (!tb[NFTA_RANGE_SREG] ||
+ !tb[NFTA_RANGE_OP] ||
+ !tb[NFTA_RANGE_FROM_DATA] ||
+ !tb[NFTA_RANGE_TO_DATA])
+ return -EINVAL;
+
err = nft_data_init(NULL, &priv->data_from, sizeof(priv->data_from),
&desc_from, tb[NFTA_RANGE_FROM_DATA]);
if (err < 0)
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 3794cb2fc788..a3dface3e6e6 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -98,7 +98,7 @@ static bool nft_hash_update(struct nft_set *set, const u32 *key,
const struct nft_set_ext **ext)
{
struct nft_hash *priv = nft_set_priv(set);
- struct nft_hash_elem *he;
+ struct nft_hash_elem *he, *prev;
struct nft_hash_cmp_arg arg = {
.genmask = NFT_GENMASK_ANY,
.set = set,
@@ -112,15 +112,24 @@ static bool nft_hash_update(struct nft_set *set, const u32 *key,
he = new(set, expr, regs);
if (he == NULL)
goto err1;
- if (rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node,
- nft_hash_params))
+
+ prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node,
+ nft_hash_params);
+ if (IS_ERR(prev))
goto err2;
+
+ /* Another cpu may race to insert the element with the same key */
+ if (prev) {
+ nft_set_elem_destroy(set, he, true);
+ he = prev;
+ }
+
out:
*ext = &he->ext;
return true;
err2:
- nft_set_elem_destroy(set, he);
+ nft_set_elem_destroy(set, he, true);
err1:
return false;
}
@@ -332,7 +341,7 @@ static int nft_hash_init(const struct nft_set *set,
static void nft_hash_elem_destroy(void *ptr, void *arg)
{
- nft_set_elem_destroy((const struct nft_set *)arg, ptr);
+ nft_set_elem_destroy((const struct nft_set *)arg, ptr, true);
}
static void nft_hash_destroy(const struct nft_set *set)
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 38b5bda242f8..36493a7cae88 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -266,7 +266,7 @@ static void nft_rbtree_destroy(const struct nft_set *set)
while ((node = priv->root.rb_node) != NULL) {
rb_erase(node, &priv->root);
rbe = rb_entry(node, struct nft_rbtree_elem, node);
- nft_set_elem_destroy(set, rbe);
+ nft_set_elem_destroy(set, rbe, true);
}
}
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 69f78e96fdb4..b83e158e116a 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -44,7 +44,7 @@ connmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
u_int32_t newmark;
ct = nf_ct_get(skb, &ctinfo);
- if (ct == NULL)
+ if (ct == NULL || nf_ct_is_untracked(ct))
return XT_CONTINUE;
switch (info->mode) {
@@ -97,7 +97,7 @@ connmark_mt(const struct sk_buff *skb, struct xt_action_param *par)
const struct nf_conn *ct;
ct = nf_ct_get(skb, &ctinfo);
- if (ct == NULL)
+ if (ct == NULL || nf_ct_is_untracked(ct))
return false;
return ((ct->mark & info->mask) == info->mark) ^ info->invert;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 62bea4591054..602e5ebe9db3 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -322,14 +322,11 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
sk_mem_charge(sk, skb->truesize);
}
-static void netlink_sock_destruct(struct sock *sk)
+static void __netlink_sock_destruct(struct sock *sk)
{
struct netlink_sock *nlk = nlk_sk(sk);
if (nlk->cb_running) {
- if (nlk->cb.done)
- nlk->cb.done(&nlk->cb);
-
module_put(nlk->cb.module);
kfree_skb(nlk->cb.skb);
}
@@ -346,6 +343,28 @@ static void netlink_sock_destruct(struct sock *sk)
WARN_ON(nlk_sk(sk)->groups);
}
+static void netlink_sock_destruct_work(struct work_struct *work)
+{
+ struct netlink_sock *nlk = container_of(work, struct netlink_sock,
+ work);
+
+ nlk->cb.done(&nlk->cb);
+ __netlink_sock_destruct(&nlk->sk);
+}
+
+static void netlink_sock_destruct(struct sock *sk)
+{
+ struct netlink_sock *nlk = nlk_sk(sk);
+
+ if (nlk->cb_running && nlk->cb.done) {
+ INIT_WORK(&nlk->work, netlink_sock_destruct_work);
+ schedule_work(&nlk->work);
+ return;
+ }
+
+ __netlink_sock_destruct(sk);
+}
+
/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
* SMP. Look, when several writers sleep and reader wakes them up, all but one
* immediately hit write lock and grab all the cpus. Exclusive sleep solves
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index 3cfd6cc60504..4fdb38318977 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -3,6 +3,7 @@
#include <linux/rhashtable.h>
#include <linux/atomic.h>
+#include <linux/workqueue.h>
#include <net/sock.h>
#define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8)
@@ -33,6 +34,7 @@ struct netlink_sock {
struct rhash_head node;
struct rcu_head rcu;
+ struct work_struct work;
};
static inline struct netlink_sock *nlk_sk(struct sock *sk)
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index b2f0e986a6f4..a5546249fb10 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -178,11 +178,8 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
}
cb->args[1] = i;
} else {
- if (req->sdiag_protocol >= MAX_LINKS) {
- read_unlock(&nl_table_lock);
- rcu_read_unlock();
+ if (req->sdiag_protocol >= MAX_LINKS)
return -ENOENT;
- }
err = __netlink_diag_dump(skb, cb, req->sdiag_protocol, s_num);
}
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 23cc12639ba7..49c28e8ef01b 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -404,7 +404,7 @@ int __genl_register_family(struct genl_family *family)
err = genl_validate_assign_mc_groups(family);
if (err)
- goto errout_locked;
+ goto errout_free;
list_add_tail(&family->family_list, genl_family_chain(family->id));
genl_unlock_all();
@@ -417,6 +417,8 @@ int __genl_register_family(struct genl_family *family)
return 0;
+errout_free:
+ kfree(family->attrbuf);
errout_locked:
genl_unlock_all();
errout:
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 31045ef44a82..fecefa2dc94e 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -370,8 +370,11 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key,
skb_orphan(skb);
memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
err = nf_ct_frag6_gather(net, skb, user);
- if (err)
+ if (err) {
+ if (err != -EINPROGRESS)
+ kfree_skb(skb);
return err;
+ }
key->ip.proto = ipv6_hdr(skb)->nexthdr;
ovs_cb.mru = IP6CB(skb)->frag_max_size;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index d2238b204691..dd2332390c45 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3648,19 +3648,25 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (optlen != sizeof(val))
return -EINVAL;
- if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
- return -EBUSY;
if (copy_from_user(&val, optval, sizeof(val)))
return -EFAULT;
switch (val) {
case TPACKET_V1:
case TPACKET_V2:
case TPACKET_V3:
- po->tp_version = val;
- return 0;
+ break;
default:
return -EINVAL;
}
+ lock_sock(sk);
+ if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
+ ret = -EBUSY;
+ } else {
+ po->tp_version = val;
+ ret = 0;
+ }
+ release_sock(sk);
+ return ret;
}
case PACKET_RESERVE:
{
@@ -4164,6 +4170,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
/* Added to avoid minimal code churn */
struct tpacket_req *req = &req_u->req;
+ lock_sock(sk);
/* Opening a Tx-ring is NOT supported in TPACKET_V3 */
if (!closing && tx_ring && (po->tp_version > TPACKET_V2)) {
net_warn_ratelimited("Tx-ring is not supported.\n");
@@ -4245,7 +4252,6 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
goto out;
}
- lock_sock(sk);
/* Detach socket from network */
spin_lock(&po->bind_lock);
@@ -4294,11 +4300,11 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
if (!tx_ring)
prb_shutdown_retire_blk_timer(po, rb_queue);
}
- release_sock(sk);
if (pg_vec)
free_pg_vec(pg_vec, order, req->tp_block_nr);
out:
+ release_sock(sk);
return err;
}
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index fcddacc92e01..20e2923dc827 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -659,6 +659,8 @@ out_recv:
out_pernet:
unregister_pernet_subsys(&rds_tcp_net_ops);
out_slab:
+ if (unregister_netdevice_notifier(&rds_tcp_dev_notifier))
+ pr_warn("could not unregister rds_tcp_dev_notifier\n");
kmem_cache_destroy(rds_tcp_conn_slab);
out:
return ret;
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index b54d56d4959b..cf9b2fe8eac6 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -108,6 +108,17 @@ static void tcf_pedit_cleanup(struct tc_action *a, int bind)
kfree(keys);
}
+static bool offset_valid(struct sk_buff *skb, int offset)
+{
+ if (offset > 0 && offset > skb->len)
+ return false;
+
+ if (offset < 0 && -offset > skb_headroom(skb))
+ return false;
+
+ return true;
+}
+
static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
@@ -134,6 +145,11 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
if (tkey->offmask) {
char *d, _d;
+ if (!offset_valid(skb, off + tkey->at)) {
+ pr_info("tc filter pedit 'at' offset %d out of bounds\n",
+ off + tkey->at);
+ goto bad;
+ }
d = skb_header_pointer(skb, off + tkey->at, 1,
&_d);
if (!d)
@@ -146,10 +162,10 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
" offset must be on 32 bit boundaries\n");
goto bad;
}
- if (offset > 0 && offset > skb->len) {
- pr_info("tc filter pedit"
- " offset %d can't exceed pkt length %d\n",
- offset, skb->len);
+
+ if (!offset_valid(skb, off + offset)) {
+ pr_info("tc filter pedit offset %d out of bounds\n",
+ offset);
goto bad;
}
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 2b2a7974e4bb..b05d4a2155b0 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -430,7 +430,8 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
if (!skb)
return -ENOBUFS;
- if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq, 0, event) <= 0) {
+ if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq,
+ n->nlmsg_flags, event) <= 0) {
kfree_skb(skb);
return -EINVAL;
}
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index eb219b78cd49..5877f6061b57 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -62,9 +62,6 @@ static unsigned long basic_get(struct tcf_proto *tp, u32 handle)
struct basic_head *head = rtnl_dereference(tp->root);
struct basic_filter *f;
- if (head == NULL)
- return 0UL;
-
list_for_each_entry(f, &head->flist, link) {
if (f->handle == handle) {
l = (unsigned long) f;
@@ -109,7 +106,6 @@ static bool basic_destroy(struct tcf_proto *tp, bool force)
tcf_unbind_filter(tp, &f->res);
call_rcu(&f->rcu, basic_delete_filter);
}
- RCU_INIT_POINTER(tp->root, NULL);
kfree_rcu(head, rcu);
return true;
}
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index bb1d5a487081..0a47ba5e6109 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -292,7 +292,6 @@ static bool cls_bpf_destroy(struct tcf_proto *tp, bool force)
call_rcu(&prog->rcu, __cls_bpf_delete_prog);
}
- RCU_INIT_POINTER(tp->root, NULL);
kfree_rcu(head, rcu);
return true;
}
@@ -303,9 +302,6 @@ static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle)
struct cls_bpf_prog *prog;
unsigned long ret = 0UL;
- if (head == NULL)
- return 0UL;
-
list_for_each_entry(prog, &head->plist, link) {
if (prog->handle == handle) {
ret = (unsigned long) prog;
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 85233c470035..c1f20077837f 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -137,11 +137,10 @@ static bool cls_cgroup_destroy(struct tcf_proto *tp, bool force)
if (!force)
return false;
-
- if (head) {
- RCU_INIT_POINTER(tp->root, NULL);
+ /* Head can still be NULL due to cls_cgroup_init(). */
+ if (head)
call_rcu(&head->rcu, cls_cgroup_destroy_rcu);
- }
+
return true;
}
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index e39672394c7b..6575aba87630 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -596,7 +596,6 @@ static bool flow_destroy(struct tcf_proto *tp, bool force)
list_del_rcu(&f->list);
call_rcu(&f->rcu, flow_destroy_filter);
}
- RCU_INIT_POINTER(tp->root, NULL);
kfree_rcu(head, rcu);
return true;
}
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index f6f40fba599b..904442421db3 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -13,6 +13,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/rhashtable.h>
+#include <linux/workqueue.h>
#include <linux/if_ether.h>
#include <linux/in6.h>
@@ -64,7 +65,10 @@ struct cls_fl_head {
bool mask_assigned;
struct list_head filters;
struct rhashtable_params ht_params;
- struct rcu_head rcu;
+ union {
+ struct work_struct work;
+ struct rcu_head rcu;
+ };
};
struct cls_fl_filter {
@@ -269,6 +273,24 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
}
+static void fl_destroy_sleepable(struct work_struct *work)
+{
+ struct cls_fl_head *head = container_of(work, struct cls_fl_head,
+ work);
+ if (head->mask_assigned)
+ rhashtable_destroy(&head->ht);
+ kfree(head);
+ module_put(THIS_MODULE);
+}
+
+static void fl_destroy_rcu(struct rcu_head *rcu)
+{
+ struct cls_fl_head *head = container_of(rcu, struct cls_fl_head, rcu);
+
+ INIT_WORK(&head->work, fl_destroy_sleepable);
+ schedule_work(&head->work);
+}
+
static bool fl_destroy(struct tcf_proto *tp, bool force)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
@@ -282,10 +304,9 @@ static bool fl_destroy(struct tcf_proto *tp, bool force)
list_del_rcu(&f->list);
call_rcu(&f->rcu, fl_destroy_filter);
}
- RCU_INIT_POINTER(tp->root, NULL);
- if (head->mask_assigned)
- rhashtable_destroy(&head->ht);
- kfree_rcu(head, rcu);
+
+ __module_get(THIS_MODULE);
+ call_rcu(&head->rcu, fl_destroy_rcu);
return true;
}
@@ -711,8 +732,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
goto errout;
if (fold) {
- rhashtable_remove_fast(&head->ht, &fold->ht_node,
- head->ht_params);
+ if (!tc_skip_sw(fold->flags))
+ rhashtable_remove_fast(&head->ht, &fold->ht_node,
+ head->ht_params);
fl_hw_destroy_filter(tp, (unsigned long)fold);
}
@@ -739,8 +761,9 @@ static int fl_delete(struct tcf_proto *tp, unsigned long arg)
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *f = (struct cls_fl_filter *) arg;
- rhashtable_remove_fast(&head->ht, &f->ht_node,
- head->ht_params);
+ if (!tc_skip_sw(f->flags))
+ rhashtable_remove_fast(&head->ht, &f->ht_node,
+ head->ht_params);
list_del_rcu(&f->list);
fl_hw_destroy_filter(tp, (unsigned long)f);
tcf_unbind_filter(tp, &f->res);
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 25927b6c4436..f935429bd5ef 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -114,7 +114,6 @@ static bool mall_destroy(struct tcf_proto *tp, bool force)
call_rcu(&f->rcu, mall_destroy_filter);
}
- RCU_INIT_POINTER(tp->root, NULL);
kfree_rcu(head, rcu);
return true;
}
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 4f05a19fb073..322438fb3ffc 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -152,7 +152,8 @@ static int rsvp_classify(struct sk_buff *skb, const struct tcf_proto *tp,
return -1;
nhptr = ip_hdr(skb);
#endif
-
+ if (unlikely(!head))
+ return -1;
restart:
#if RSVP_DST_LEN == 4
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 96144bdf30db..0751245a6ace 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -543,7 +543,6 @@ static bool tcindex_destroy(struct tcf_proto *tp, bool force)
walker.fn = tcindex_destroy_element;
tcindex_walk(tp, &walker);
- RCU_INIT_POINTER(tp->root, NULL);
call_rcu(&p->rcu, __tcindex_destroy);
return true;
}
diff --git a/net/sctp/input.c b/net/sctp/input.c
index a2ea1d1cc06a..a01a56ec8b8c 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -181,9 +181,10 @@ int sctp_rcv(struct sk_buff *skb)
* bound to another interface, via SO_BINDTODEVICE, treat it as OOTB
*/
if (sk->sk_bound_dev_if && (sk->sk_bound_dev_if != af->skb_iif(skb))) {
- if (asoc) {
- sctp_association_put(asoc);
+ if (transport) {
+ sctp_transport_put(transport);
asoc = NULL;
+ transport = NULL;
} else {
sctp_endpoint_put(ep);
ep = NULL;
@@ -269,8 +270,8 @@ int sctp_rcv(struct sk_buff *skb)
bh_unlock_sock(sk);
/* Release the asoc/ep ref we took in the lookup calls. */
- if (asoc)
- sctp_association_put(asoc);
+ if (transport)
+ sctp_transport_put(transport);
else
sctp_endpoint_put(ep);
@@ -283,8 +284,8 @@ discard_it:
discard_release:
/* Release the asoc/ep ref we took in the lookup calls. */
- if (asoc)
- sctp_association_put(asoc);
+ if (transport)
+ sctp_transport_put(transport);
else
sctp_endpoint_put(ep);
@@ -300,6 +301,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb)
{
struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk;
struct sctp_inq *inqueue = &chunk->rcvr->inqueue;
+ struct sctp_transport *t = chunk->transport;
struct sctp_ep_common *rcvr = NULL;
int backloged = 0;
@@ -351,7 +353,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb)
done:
/* Release the refs we took in sctp_add_backlog */
if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type)
- sctp_association_put(sctp_assoc(rcvr));
+ sctp_transport_put(t);
else if (SCTP_EP_TYPE_SOCKET == rcvr->type)
sctp_endpoint_put(sctp_ep(rcvr));
else
@@ -363,6 +365,7 @@ done:
static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb)
{
struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk;
+ struct sctp_transport *t = chunk->transport;
struct sctp_ep_common *rcvr = chunk->rcvr;
int ret;
@@ -373,7 +376,7 @@ static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb)
* from us
*/
if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type)
- sctp_association_hold(sctp_assoc(rcvr));
+ sctp_transport_hold(t);
else if (SCTP_EP_TYPE_SOCKET == rcvr->type)
sctp_endpoint_hold(sctp_ep(rcvr));
else
@@ -537,15 +540,15 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb,
return sk;
out:
- sctp_association_put(asoc);
+ sctp_transport_put(transport);
return NULL;
}
/* Common cleanup code for icmp/icmpv6 error handler. */
-void sctp_err_finish(struct sock *sk, struct sctp_association *asoc)
+void sctp_err_finish(struct sock *sk, struct sctp_transport *t)
{
bh_unlock_sock(sk);
- sctp_association_put(asoc);
+ sctp_transport_put(t);
}
/*
@@ -641,7 +644,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
}
out_unlock:
- sctp_err_finish(sk, asoc);
+ sctp_err_finish(sk, transport);
}
/*
@@ -952,11 +955,8 @@ static struct sctp_association *__sctp_lookup_association(
goto out;
asoc = t->asoc;
- sctp_association_hold(asoc);
*pt = t;
- sctp_transport_put(t);
-
out:
return asoc;
}
@@ -986,7 +986,7 @@ int sctp_has_association(struct net *net,
struct sctp_transport *transport;
if ((asoc = sctp_lookup_association(net, laddr, paddr, &transport))) {
- sctp_association_put(asoc);
+ sctp_transport_put(transport);
return 1;
}
@@ -1021,7 +1021,6 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net,
struct sctphdr *sh = sctp_hdr(skb);
union sctp_params params;
sctp_init_chunk_t *init;
- struct sctp_transport *transport;
struct sctp_af *af;
/*
@@ -1052,7 +1051,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net,
af->from_addr_param(paddr, params.addr, sh->source, 0);
- asoc = __sctp_lookup_association(net, laddr, paddr, &transport);
+ asoc = __sctp_lookup_association(net, laddr, paddr, transportp);
if (asoc)
return asoc;
}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index f473779e8b1c..176af3080a2b 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -198,7 +198,7 @@ static void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
}
out_unlock:
- sctp_err_finish(sk, asoc);
+ sctp_err_finish(sk, transport);
out:
if (likely(idev != NULL))
in6_dev_put(idev);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 9fbb6feb8c27..f23ad913dc7a 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1214,9 +1214,12 @@ static int __sctp_connect(struct sock *sk,
timeo = sock_sndtimeo(sk, f_flags & O_NONBLOCK);
- err = sctp_wait_for_connect(asoc, &timeo);
- if ((err == 0 || err == -EINPROGRESS) && assoc_id)
+ if (assoc_id)
*assoc_id = asoc->assoc_id;
+ err = sctp_wait_for_connect(asoc, &timeo);
+ /* Note: the asoc may be freed after the return of
+ * sctp_wait_for_connect.
+ */
/* Don't free association on exit. */
asoc = NULL;
@@ -4282,19 +4285,18 @@ static void sctp_shutdown(struct sock *sk, int how)
{
struct net *net = sock_net(sk);
struct sctp_endpoint *ep;
- struct sctp_association *asoc;
if (!sctp_style(sk, TCP))
return;
- if (how & SEND_SHUTDOWN) {
+ ep = sctp_sk(sk)->ep;
+ if (how & SEND_SHUTDOWN && !list_empty(&ep->asocs)) {
+ struct sctp_association *asoc;
+
sk->sk_state = SCTP_SS_CLOSING;
- ep = sctp_sk(sk)->ep;
- if (!list_empty(&ep->asocs)) {
- asoc = list_entry(ep->asocs.next,
- struct sctp_association, asocs);
- sctp_primitive_SHUTDOWN(net, asoc, NULL);
- }
+ asoc = list_entry(ep->asocs.next,
+ struct sctp_association, asocs);
+ sctp_primitive_SHUTDOWN(net, asoc, NULL);
}
}
@@ -4480,12 +4482,9 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *),
if (!transport || !sctp_transport_hold(transport))
goto out;
- sctp_association_hold(transport->asoc);
- sctp_transport_put(transport);
-
rcu_read_unlock();
err = cb(transport, p);
- sctp_association_put(transport->asoc);
+ sctp_transport_put(transport);
out:
return err;
diff --git a/net/socket.c b/net/socket.c
index 5a9bf5ee2464..73dc69f9681e 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -341,8 +341,23 @@ static const struct xattr_handler sockfs_xattr_handler = {
.get = sockfs_xattr_get,
};
+static int sockfs_security_xattr_set(const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *inode,
+ const char *suffix, const void *value,
+ size_t size, int flags)
+{
+ /* Handled by LSM. */
+ return -EAGAIN;
+}
+
+static const struct xattr_handler sockfs_security_xattr_handler = {
+ .prefix = XATTR_SECURITY_PREFIX,
+ .set = sockfs_security_xattr_set,
+};
+
static const struct xattr_handler *sockfs_xattr_handlers[] = {
&sockfs_xattr_handler,
+ &sockfs_security_xattr_handler,
NULL
};
@@ -2038,6 +2053,8 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
if (err)
break;
++datagrams;
+ if (msg_data_left(&msg_sys))
+ break;
cond_resched();
}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 34dd7b26ee5f..62a482790937 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2753,14 +2753,18 @@ EXPORT_SYMBOL_GPL(rpc_cap_max_reconnect_timeout);
void rpc_clnt_xprt_switch_put(struct rpc_clnt *clnt)
{
+ rcu_read_lock();
xprt_switch_put(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
+ rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_put);
void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt)
{
+ rcu_read_lock();
rpc_xprt_switch_add_xprt(rcu_dereference(clnt->cl_xpi.xpi_xpswitch),
xprt);
+ rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_add_xprt);
@@ -2770,9 +2774,8 @@ bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt,
struct rpc_xprt_switch *xps;
bool ret;
- xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch);
-
rcu_read_lock();
+ xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch);
ret = rpc_xprt_switch_has_addr(xps, sap);
rcu_read_unlock();
return ret;
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index c3f652395a80..3bc1d61694cb 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -1002,14 +1002,8 @@ static void svc_age_temp_xprts(unsigned long closure)
void svc_age_temp_xprts_now(struct svc_serv *serv, struct sockaddr *server_addr)
{
struct svc_xprt *xprt;
- struct svc_sock *svsk;
- struct socket *sock;
struct list_head *le, *next;
LIST_HEAD(to_be_closed);
- struct linger no_linger = {
- .l_onoff = 1,
- .l_linger = 0,
- };
spin_lock_bh(&serv->sv_lock);
list_for_each_safe(le, next, &serv->sv_tempsocks) {
@@ -1027,10 +1021,7 @@ void svc_age_temp_xprts_now(struct svc_serv *serv, struct sockaddr *server_addr)
list_del_init(le);
xprt = list_entry(le, struct svc_xprt, xpt_list);
dprintk("svc_age_temp_xprts_now: closing %p\n", xprt);
- svsk = container_of(xprt, struct svc_sock, sk_xprt);
- sock = svsk->sk_sock;
- kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER,
- (char *)&no_linger, sizeof(no_linger));
+ xprt->xpt_ops->xpo_kill_temp_xprt(xprt);
svc_close_xprt(xprt);
}
}
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 57625f64efd5..a4bc98265d88 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -438,6 +438,21 @@ static int svc_tcp_has_wspace(struct svc_xprt *xprt)
return !test_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
}
+static void svc_tcp_kill_temp_xprt(struct svc_xprt *xprt)
+{
+ struct svc_sock *svsk;
+ struct socket *sock;
+ struct linger no_linger = {
+ .l_onoff = 1,
+ .l_linger = 0,
+ };
+
+ svsk = container_of(xprt, struct svc_sock, sk_xprt);
+ sock = svsk->sk_sock;
+ kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER,
+ (char *)&no_linger, sizeof(no_linger));
+}
+
/*
* See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo
*/
@@ -648,6 +663,10 @@ static struct svc_xprt *svc_udp_accept(struct svc_xprt *xprt)
return NULL;
}
+static void svc_udp_kill_temp_xprt(struct svc_xprt *xprt)
+{
+}
+
static struct svc_xprt *svc_udp_create(struct svc_serv *serv,
struct net *net,
struct sockaddr *sa, int salen,
@@ -667,6 +686,7 @@ static struct svc_xprt_ops svc_udp_ops = {
.xpo_has_wspace = svc_udp_has_wspace,
.xpo_accept = svc_udp_accept,
.xpo_secure_port = svc_sock_secure_port,
+ .xpo_kill_temp_xprt = svc_udp_kill_temp_xprt,
};
static struct svc_xprt_class svc_udp_class = {
@@ -1242,6 +1262,7 @@ static struct svc_xprt_ops svc_tcp_ops = {
.xpo_has_wspace = svc_tcp_has_wspace,
.xpo_accept = svc_tcp_accept,
.xpo_secure_port = svc_sock_secure_port,
+ .xpo_kill_temp_xprt = svc_tcp_kill_temp_xprt,
};
static struct svc_xprt_class svc_tcp_class = {
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 210949562786..26b26beef2d4 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -44,18 +44,20 @@
* being done.
*
* When the underlying transport disconnects, MRs are left in one of
- * three states:
+ * four states:
*
* INVALID: The MR was not in use before the QP entered ERROR state.
- * (Or, the LOCAL_INV WR has not completed or flushed yet).
- *
- * STALE: The MR was being registered or unregistered when the QP
- * entered ERROR state, and the pending WR was flushed.
*
* VALID: The MR was registered before the QP entered ERROR state.
*
- * When frwr_op_map encounters STALE and VALID MRs, they are recovered
- * with ib_dereg_mr and then are re-initialized. Beause MR recovery
+ * FLUSHED_FR: The MR was being registered when the QP entered ERROR
+ * state, and the pending WR was flushed.
+ *
+ * FLUSHED_LI: The MR was being invalidated when the QP entered ERROR
+ * state, and the pending WR was flushed.
+ *
+ * When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered
+ * with ib_dereg_mr and then are re-initialized. Because MR recovery
* allocates fresh resources, it is deferred to a workqueue, and the
* recovered MRs are placed back on the rb_mws list when recovery is
* complete. frwr_op_map allocates another MR for the current RPC while
@@ -177,12 +179,15 @@ __frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
static void
frwr_op_recover_mr(struct rpcrdma_mw *mw)
{
+ enum rpcrdma_frmr_state state = mw->frmr.fr_state;
struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
int rc;
rc = __frwr_reset_mr(ia, mw);
- ib_dma_unmap_sg(ia->ri_device, mw->mw_sg, mw->mw_nents, mw->mw_dir);
+ if (state != FRMR_FLUSHED_LI)
+ ib_dma_unmap_sg(ia->ri_device,
+ mw->mw_sg, mw->mw_nents, mw->mw_dir);
if (rc)
goto out_release;
@@ -262,10 +267,8 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
}
static void
-__frwr_sendcompletion_flush(struct ib_wc *wc, struct rpcrdma_frmr *frmr,
- const char *wr)
+__frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr)
{
- frmr->fr_state = FRMR_IS_STALE;
if (wc->status != IB_WC_WR_FLUSH_ERR)
pr_err("rpcrdma: %s: %s (%u/0x%x)\n",
wr, ib_wc_status_msg(wc->status),
@@ -288,7 +291,8 @@ frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
if (wc->status != IB_WC_SUCCESS) {
cqe = wc->wr_cqe;
frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
- __frwr_sendcompletion_flush(wc, frmr, "fastreg");
+ frmr->fr_state = FRMR_FLUSHED_FR;
+ __frwr_sendcompletion_flush(wc, "fastreg");
}
}
@@ -308,7 +312,8 @@ frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
if (wc->status != IB_WC_SUCCESS) {
cqe = wc->wr_cqe;
frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
- __frwr_sendcompletion_flush(wc, frmr, "localinv");
+ frmr->fr_state = FRMR_FLUSHED_LI;
+ __frwr_sendcompletion_flush(wc, "localinv");
}
}
@@ -328,8 +333,10 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
/* WARNING: Only wr_cqe and status are reliable at this point */
cqe = wc->wr_cqe;
frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
- if (wc->status != IB_WC_SUCCESS)
- __frwr_sendcompletion_flush(wc, frmr, "localinv");
+ if (wc->status != IB_WC_SUCCESS) {
+ frmr->fr_state = FRMR_FLUSHED_LI;
+ __frwr_sendcompletion_flush(wc, "localinv");
+ }
complete(&frmr->fr_linv_done);
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 6864fb967038..1334de2715c2 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -67,6 +67,7 @@ static void svc_rdma_detach(struct svc_xprt *xprt);
static void svc_rdma_free(struct svc_xprt *xprt);
static int svc_rdma_has_wspace(struct svc_xprt *xprt);
static int svc_rdma_secure_port(struct svc_rqst *);
+static void svc_rdma_kill_temp_xprt(struct svc_xprt *);
static struct svc_xprt_ops svc_rdma_ops = {
.xpo_create = svc_rdma_create,
@@ -79,6 +80,7 @@ static struct svc_xprt_ops svc_rdma_ops = {
.xpo_has_wspace = svc_rdma_has_wspace,
.xpo_accept = svc_rdma_accept,
.xpo_secure_port = svc_rdma_secure_port,
+ .xpo_kill_temp_xprt = svc_rdma_kill_temp_xprt,
};
struct svc_xprt_class svc_rdma_class = {
@@ -1317,6 +1319,10 @@ static int svc_rdma_secure_port(struct svc_rqst *rqstp)
return 1;
}
+static void svc_rdma_kill_temp_xprt(struct svc_xprt *xprt)
+{
+}
+
int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
{
struct ib_send_wr *bad_wr, *n_wr;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 0d35b761c883..6e1bba358203 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -216,7 +216,8 @@ struct rpcrdma_rep {
enum rpcrdma_frmr_state {
FRMR_IS_INVALID, /* ready to be used */
FRMR_IS_VALID, /* in use */
- FRMR_IS_STALE, /* failed completion */
+ FRMR_FLUSHED_FR, /* flushed FASTREG WR */
+ FRMR_FLUSHED_LI, /* flushed LOCALINV WR */
};
struct rpcrdma_frmr {
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 975dbeb60ab0..52d74760fb68 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -421,6 +421,10 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
dev = dev_get_by_name(net, driver_name);
if (!dev)
return -ENODEV;
+ if (tipc_mtu_bad(dev, 0)) {
+ dev_put(dev);
+ return -EINVAL;
+ }
/* Associate TIPC bearer with L2 bearer */
rcu_assign_pointer(b->media_ptr, dev);
@@ -610,8 +614,6 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
if (!b)
return NOTIFY_DONE;
- b->mtu = dev->mtu;
-
switch (evt) {
case NETDEV_CHANGE:
if (netif_carrier_ok(dev))
@@ -624,6 +626,11 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
tipc_reset_bearer(net, b);
break;
case NETDEV_CHANGEMTU:
+ if (tipc_mtu_bad(dev, 0)) {
+ bearer_disable(net, b);
+ break;
+ }
+ b->mtu = dev->mtu;
tipc_reset_bearer(net, b);
break;
case NETDEV_CHANGEADDR:
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 78892e2f53e3..278ff7f616f9 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -39,6 +39,7 @@
#include "netlink.h"
#include "core.h"
+#include "msg.h"
#include <net/genetlink.h>
#define MAX_MEDIA 3
@@ -59,6 +60,9 @@
#define TIPC_MEDIA_TYPE_IB 2
#define TIPC_MEDIA_TYPE_UDP 3
+/* minimum bearer MTU */
+#define TIPC_MIN_BEARER_MTU (MAX_H_SIZE + INT_H_SIZE)
+
/**
* struct tipc_media_addr - destination address used by TIPC bearers
* @value: address info (format defined by media)
@@ -215,4 +219,13 @@ void tipc_bearer_xmit(struct net *net, u32 bearer_id,
void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id,
struct sk_buff_head *xmitq);
+/* check if device MTU is too low for tipc headers */
+static inline bool tipc_mtu_bad(struct net_device *dev, unsigned int reserve)
+{
+ if (dev->mtu >= TIPC_MIN_BEARER_MTU + reserve)
+ return false;
+ netdev_warn(dev, "MTU too low for tipc bearer\n");
+ return true;
+}
+
#endif /* _TIPC_BEARER_H */
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 1055164c6232..bda89bf9f4ff 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -47,8 +47,8 @@
#include <linux/pkt_sched.h>
struct tipc_stats {
- u32 sent_info; /* used in counting # sent packets */
- u32 recv_info; /* used in counting # recv'd packets */
+ u32 sent_pkts;
+ u32 recv_pkts;
u32 sent_states;
u32 recv_states;
u32 sent_probes;
@@ -857,7 +857,6 @@ void tipc_link_reset(struct tipc_link *l)
l->acked = 0;
l->silent_intv_cnt = 0;
l->rst_cnt = 0;
- l->stats.recv_info = 0;
l->stale_count = 0;
l->bc_peer_is_up = false;
memset(&l->mon_state, 0, sizeof(l->mon_state));
@@ -888,6 +887,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
struct sk_buff_head *transmq = &l->transmq;
struct sk_buff_head *backlogq = &l->backlogq;
struct sk_buff *skb, *_skb, *bskb;
+ int pkt_cnt = skb_queue_len(list);
/* Match msg importance against this and all higher backlog limits: */
if (!skb_queue_empty(backlogq)) {
@@ -901,6 +901,11 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
return -EMSGSIZE;
}
+ if (pkt_cnt > 1) {
+ l->stats.sent_fragmented++;
+ l->stats.sent_fragments += pkt_cnt;
+ }
+
/* Prepare each packet for sending, and add to relevant queue: */
while (skb_queue_len(list)) {
skb = skb_peek(list);
@@ -920,6 +925,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
__skb_queue_tail(xmitq, _skb);
TIPC_SKB_CB(skb)->ackers = l->ackers;
l->rcv_unacked = 0;
+ l->stats.sent_pkts++;
seqno++;
continue;
}
@@ -968,6 +974,7 @@ void tipc_link_advance_backlog(struct tipc_link *l, struct sk_buff_head *xmitq)
msg_set_ack(hdr, ack);
msg_set_bcast_ack(hdr, bc_ack);
l->rcv_unacked = 0;
+ l->stats.sent_pkts++;
seqno++;
}
l->snd_nxt = seqno;
@@ -1260,7 +1267,7 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
/* Deliver packet */
l->rcv_nxt++;
- l->stats.recv_info++;
+ l->stats.recv_pkts++;
if (!tipc_data_input(l, skb, l->inputq))
rc |= tipc_link_input(l, skb, l->inputq);
if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN))
@@ -1492,8 +1499,9 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL))
l->tolerance = peers_tol;
- if (peers_prio && in_range(peers_prio, TIPC_MIN_LINK_PRI,
- TIPC_MAX_LINK_PRI)) {
+ /* Update own prio if peer indicates a different value */
+ if ((peers_prio != l->priority) &&
+ in_range(peers_prio, 1, TIPC_MAX_LINK_PRI)) {
l->priority = peers_prio;
rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
}
@@ -1799,10 +1807,6 @@ void tipc_link_set_queue_limits(struct tipc_link *l, u32 win)
void tipc_link_reset_stats(struct tipc_link *l)
{
memset(&l->stats, 0, sizeof(l->stats));
- if (!link_is_bc_sndlink(l)) {
- l->stats.sent_info = l->snd_nxt;
- l->stats.recv_info = l->rcv_nxt;
- }
}
static void link_print(struct tipc_link *l, const char *str)
@@ -1866,12 +1870,12 @@ static int __tipc_nl_add_stats(struct sk_buff *skb, struct tipc_stats *s)
};
struct nla_map map[] = {
- {TIPC_NLA_STATS_RX_INFO, s->recv_info},
+ {TIPC_NLA_STATS_RX_INFO, 0},
{TIPC_NLA_STATS_RX_FRAGMENTS, s->recv_fragments},
{TIPC_NLA_STATS_RX_FRAGMENTED, s->recv_fragmented},
{TIPC_NLA_STATS_RX_BUNDLES, s->recv_bundles},
{TIPC_NLA_STATS_RX_BUNDLED, s->recv_bundled},
- {TIPC_NLA_STATS_TX_INFO, s->sent_info},
+ {TIPC_NLA_STATS_TX_INFO, 0},
{TIPC_NLA_STATS_TX_FRAGMENTS, s->sent_fragments},
{TIPC_NLA_STATS_TX_FRAGMENTED, s->sent_fragmented},
{TIPC_NLA_STATS_TX_BUNDLES, s->sent_bundles},
@@ -1946,9 +1950,9 @@ int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg,
goto attr_msg_full;
if (nla_put_u32(msg->skb, TIPC_NLA_LINK_MTU, link->mtu))
goto attr_msg_full;
- if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, link->rcv_nxt))
+ if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, link->stats.recv_pkts))
goto attr_msg_full;
- if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, link->snd_nxt))
+ if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, link->stats.sent_pkts))
goto attr_msg_full;
if (tipc_link_is_up(link))
@@ -2003,12 +2007,12 @@ static int __tipc_nl_add_bc_link_stat(struct sk_buff *skb,
};
struct nla_map map[] = {
- {TIPC_NLA_STATS_RX_INFO, stats->recv_info},
+ {TIPC_NLA_STATS_RX_INFO, stats->recv_pkts},
{TIPC_NLA_STATS_RX_FRAGMENTS, stats->recv_fragments},
{TIPC_NLA_STATS_RX_FRAGMENTED, stats->recv_fragmented},
{TIPC_NLA_STATS_RX_BUNDLES, stats->recv_bundles},
{TIPC_NLA_STATS_RX_BUNDLED, stats->recv_bundled},
- {TIPC_NLA_STATS_TX_INFO, stats->sent_info},
+ {TIPC_NLA_STATS_TX_INFO, stats->sent_pkts},
{TIPC_NLA_STATS_TX_FRAGMENTS, stats->sent_fragments},
{TIPC_NLA_STATS_TX_FRAGMENTED, stats->sent_fragmented},
{TIPC_NLA_STATS_TX_BUNDLES, stats->sent_bundles},
@@ -2075,9 +2079,9 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg)
goto attr_msg_full;
if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, bcl->name))
goto attr_msg_full;
- if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, bcl->rcv_nxt))
+ if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, 0))
goto attr_msg_full;
- if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, bcl->snd_nxt))
+ if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, 0))
goto attr_msg_full;
prop = nla_nest_start(msg->skb, TIPC_NLA_LINK_PROP);
diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
index ed97a5876ebe..9e109bb1a207 100644
--- a/net/tipc/monitor.c
+++ b/net/tipc/monitor.c
@@ -455,14 +455,14 @@ void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr,
int i, applied_bef;
state->probing = false;
- if (!dlen)
- return;
/* Sanity check received domain record */
- if ((dlen < new_dlen) || ntohs(arrv_dom->len) != new_dlen) {
- pr_warn_ratelimited("Received illegal domain record\n");
+ if (dlen < dom_rec_len(arrv_dom, 0))
+ return;
+ if (dlen != dom_rec_len(arrv_dom, new_member_cnt))
+ return;
+ if ((dlen < new_dlen) || ntohs(arrv_dom->len) != new_dlen)
return;
- }
/* Synch generation numbers with peer if link just came up */
if (!state->synched) {
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index f9f5f3c3dab5..41f013888f07 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1,7 +1,7 @@
/*
* net/tipc/socket.c: TIPC socket API
*
- * Copyright (c) 2001-2007, 2012-2015, Ericsson AB
+ * Copyright (c) 2001-2007, 2012-2016, Ericsson AB
* Copyright (c) 2004-2008, 2010-2013, Wind River Systems
* All rights reserved.
*
@@ -129,54 +129,8 @@ static const struct proto_ops packet_ops;
static const struct proto_ops stream_ops;
static const struct proto_ops msg_ops;
static struct proto tipc_proto;
-
static const struct rhashtable_params tsk_rht_params;
-/*
- * Revised TIPC socket locking policy:
- *
- * Most socket operations take the standard socket lock when they start
- * and hold it until they finish (or until they need to sleep). Acquiring
- * this lock grants the owner exclusive access to the fields of the socket
- * data structures, with the exception of the backlog queue. A few socket
- * operations can be done without taking the socket lock because they only
- * read socket information that never changes during the life of the socket.
- *
- * Socket operations may acquire the lock for the associated TIPC port if they
- * need to perform an operation on the port. If any routine needs to acquire
- * both the socket lock and the port lock it must take the socket lock first
- * to avoid the risk of deadlock.
- *
- * The dispatcher handling incoming messages cannot grab the socket lock in
- * the standard fashion, since invoked it runs at the BH level and cannot block.
- * Instead, it checks to see if the socket lock is currently owned by someone,
- * and either handles the message itself or adds it to the socket's backlog
- * queue; in the latter case the queued message is processed once the process
- * owning the socket lock releases it.
- *
- * NOTE: Releasing the socket lock while an operation is sleeping overcomes
- * the problem of a blocked socket operation preventing any other operations
- * from occurring. However, applications must be careful if they have
- * multiple threads trying to send (or receive) on the same socket, as these
- * operations might interfere with each other. For example, doing a connect
- * and a receive at the same time might allow the receive to consume the
- * ACK message meant for the connect. While additional work could be done
- * to try and overcome this, it doesn't seem to be worthwhile at the present.
- *
- * NOTE: Releasing the socket lock while an operation is sleeping also ensures
- * that another operation that must be performed in a non-blocking manner is
- * not delayed for very long because the lock has already been taken.
- *
- * NOTE: This code assumes that certain fields of a port/socket pair are
- * constant over its lifetime; such fields can be examined without taking
- * the socket lock and/or port lock, and do not need to be re-read even
- * after resuming processing after waiting. These fields include:
- * - socket type
- * - pointer to socket sk structure (aka tipc_sock structure)
- * - pointer to port structure
- * - port reference
- */
-
static u32 tsk_own_node(struct tipc_sock *tsk)
{
return msg_prevnode(&tsk->phdr);
@@ -232,7 +186,7 @@ static struct tipc_sock *tipc_sk(const struct sock *sk)
static bool tsk_conn_cong(struct tipc_sock *tsk)
{
- return tsk->snt_unacked >= tsk->snd_win;
+ return tsk->snt_unacked > tsk->snd_win;
}
/* tsk_blocks(): translate a buffer size in bytes to number of
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 78cab9c5a445..b58dc95f3d35 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -697,6 +697,11 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
udp_conf.local_ip.s_addr = htonl(INADDR_ANY);
udp_conf.use_udp_checksums = false;
ub->ifindex = dev->ifindex;
+ if (tipc_mtu_bad(dev, sizeof(struct iphdr) +
+ sizeof(struct udphdr))) {
+ err = -EINVAL;
+ goto err;
+ }
b->mtu = dev->mtu - sizeof(struct iphdr)
- sizeof(struct udphdr);
#if IS_ENABLED(CONFIG_IPV6)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 145082e2ba36..2358f2690ec5 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2199,7 +2199,8 @@ out:
* Sleep until more data has arrived. But check for races..
*/
static long unix_stream_data_wait(struct sock *sk, long timeo,
- struct sk_buff *last, unsigned int last_len)
+ struct sk_buff *last, unsigned int last_len,
+ bool freezable)
{
struct sk_buff *tail;
DEFINE_WAIT(wait);
@@ -2220,7 +2221,10 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
unix_state_unlock(sk);
- timeo = freezable_schedule_timeout(timeo);
+ if (freezable)
+ timeo = freezable_schedule_timeout(timeo);
+ else
+ timeo = schedule_timeout(timeo);
unix_state_lock(sk);
if (sock_flag(sk, SOCK_DEAD))
@@ -2250,7 +2254,8 @@ struct unix_stream_read_state {
unsigned int splice_flags;
};
-static int unix_stream_read_generic(struct unix_stream_read_state *state)
+static int unix_stream_read_generic(struct unix_stream_read_state *state,
+ bool freezable)
{
struct scm_cookie scm;
struct socket *sock = state->socket;
@@ -2330,7 +2335,7 @@ again:
mutex_unlock(&u->iolock);
timeo = unix_stream_data_wait(sk, timeo, last,
- last_len);
+ last_len, freezable);
if (signal_pending(current)) {
err = sock_intr_errno(timeo);
@@ -2472,7 +2477,7 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
.flags = flags
};
- return unix_stream_read_generic(&state);
+ return unix_stream_read_generic(&state, true);
}
static int unix_stream_splice_actor(struct sk_buff *skb,
@@ -2503,7 +2508,7 @@ static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
flags & SPLICE_F_NONBLOCK)
state.flags = MSG_DONTWAIT;
- return unix_stream_read_generic(&state);
+ return unix_stream_read_generic(&state, false);
}
static int unix_shutdown(struct socket *sock, int mode)
@@ -2812,7 +2817,8 @@ static int unix_seq_show(struct seq_file *seq, void *v)
i++;
}
for ( ; i < len; i++)
- seq_putc(seq, u->addr->name->sun_path[i]);
+ seq_putc(seq, u->addr->name->sun_path[i] ?:
+ '@');
}
unix_state_unlock(s);
seq_putc(seq, '\n');
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 08d2e948c9ad..f0c0c8a48c92 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -71,6 +71,7 @@ struct cfg80211_registered_device {
struct list_head bss_list;
struct rb_root bss_tree;
u32 bss_generation;
+ u32 bss_entries;
struct cfg80211_scan_request *scan_req; /* protected by RTNL */
struct sk_buff *scan_msg;
struct cfg80211_sched_scan_request __rcu *sched_scan_req;
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index b5bd58d0f731..35ad69fd0838 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -57,6 +57,19 @@
* also linked into the probe response struct.
*/
+/*
+ * Limit the number of BSS entries stored in mac80211. Each one is
+ * a bit over 4k at most, so this limits to roughly 4-5M of memory.
+ * If somebody wants to really attack this though, they'd likely
+ * use small beacons, and only one type of frame, limiting each of
+ * the entries to a much smaller size (in order to generate more
+ * entries in total, so overhead is bigger.)
+ */
+static int bss_entries_limit = 1000;
+module_param(bss_entries_limit, int, 0644);
+MODULE_PARM_DESC(bss_entries_limit,
+ "limit to number of scan BSS entries (per wiphy, default 1000)");
+
#define IEEE80211_SCAN_RESULT_EXPIRE (30 * HZ)
static void bss_free(struct cfg80211_internal_bss *bss)
@@ -137,6 +150,10 @@ static bool __cfg80211_unlink_bss(struct cfg80211_registered_device *rdev,
list_del_init(&bss->list);
rb_erase(&bss->rbn, &rdev->bss_tree);
+ rdev->bss_entries--;
+ WARN_ONCE((rdev->bss_entries == 0) ^ list_empty(&rdev->bss_list),
+ "rdev bss entries[%d]/list[empty:%d] corruption\n",
+ rdev->bss_entries, list_empty(&rdev->bss_list));
bss_ref_put(rdev, bss);
return true;
}
@@ -163,6 +180,40 @@ static void __cfg80211_bss_expire(struct cfg80211_registered_device *rdev,
rdev->bss_generation++;
}
+static bool cfg80211_bss_expire_oldest(struct cfg80211_registered_device *rdev)
+{
+ struct cfg80211_internal_bss *bss, *oldest = NULL;
+ bool ret;
+
+ lockdep_assert_held(&rdev->bss_lock);
+
+ list_for_each_entry(bss, &rdev->bss_list, list) {
+ if (atomic_read(&bss->hold))
+ continue;
+
+ if (!list_empty(&bss->hidden_list) &&
+ !bss->pub.hidden_beacon_bss)
+ continue;
+
+ if (oldest && time_before(oldest->ts, bss->ts))
+ continue;
+ oldest = bss;
+ }
+
+ if (WARN_ON(!oldest))
+ return false;
+
+ /*
+ * The callers make sure to increase rdev->bss_generation if anything
+ * gets removed (and a new entry added), so there's no need to also do
+ * it here.
+ */
+
+ ret = __cfg80211_unlink_bss(rdev, oldest);
+ WARN_ON(!ret);
+ return ret;
+}
+
void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev,
bool send_message)
{
@@ -689,6 +740,7 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *rdev,
const u8 *ie;
int i, ssidlen;
u8 fold = 0;
+ u32 n_entries = 0;
ies = rcu_access_pointer(new->pub.beacon_ies);
if (WARN_ON(!ies))
@@ -712,6 +764,12 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *rdev,
/* This is the bad part ... */
list_for_each_entry(bss, &rdev->bss_list, list) {
+ /*
+ * we're iterating all the entries anyway, so take the
+ * opportunity to validate the list length accounting
+ */
+ n_entries++;
+
if (!ether_addr_equal(bss->pub.bssid, new->pub.bssid))
continue;
if (bss->pub.channel != new->pub.channel)
@@ -740,6 +798,10 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *rdev,
new->pub.beacon_ies);
}
+ WARN_ONCE(n_entries != rdev->bss_entries,
+ "rdev bss entries[%d]/list[len:%d] corruption\n",
+ rdev->bss_entries, n_entries);
+
return true;
}
@@ -894,7 +956,14 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev,
}
}
+ if (rdev->bss_entries >= bss_entries_limit &&
+ !cfg80211_bss_expire_oldest(rdev)) {
+ kfree(new);
+ goto drop;
+ }
+
list_add_tail(&new->list, &rdev->bss_list);
+ rdev->bss_entries++;
rb_insert_bss(rdev, new);
found = new;
}
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 5ea12afc7706..659b507b347d 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1158,7 +1158,8 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate)
58500000,
65000000,
78000000,
- 0,
+ /* not in the spec, but some devices use this: */
+ 86500000,
},
{ 13500000,
27000000,
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index fd6986634e6f..5bf7e1bfeac7 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1268,12 +1268,14 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
err = security_xfrm_policy_lookup(pol->security,
fl->flowi_secid,
policy_to_flow_dir(dir));
- if (!err && !xfrm_pol_hold_rcu(pol))
- goto again;
- else if (err == -ESRCH)
+ if (!err) {
+ if (!xfrm_pol_hold_rcu(pol))
+ goto again;
+ } else if (err == -ESRCH) {
pol = NULL;
- else
+ } else {
pol = ERR_PTR(err);
+ }
} else
pol = NULL;
}
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 08892091cfe3..671a1d0333f0 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2450,7 +2450,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
#ifdef CONFIG_COMPAT
if (in_compat_syscall())
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
#endif
type = nlh->nlmsg_type;