diff options
Diffstat (limited to 'net')
122 files changed, 1147 insertions, 544 deletions
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index e034afbd1bb0..08ce36147c4c 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -652,6 +652,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface, batadv_softif_destroy_sysfs(hard_iface->soft_iface); } + hard_iface->soft_iface = NULL; batadv_hardif_put(hard_iface); out: diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index 2333777f919d..8af1611b8ab2 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -837,6 +837,7 @@ static int batadv_tp_send(void *arg) primary_if = batadv_primary_if_get_selected(bat_priv); if (unlikely(!primary_if)) { err = BATADV_TP_REASON_DST_UNREACHABLE; + tp_vars->reason = err; goto out; } diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index d020299baba4..1904a93f47d5 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -1090,7 +1090,6 @@ static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type, { struct hci_conn *hcon; struct hci_dev *hdev; - bdaddr_t *src = BDADDR_ANY; int n; n = sscanf(buf, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx %hhu", @@ -1101,7 +1100,8 @@ static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type, if (n < 7) return -EINVAL; - hdev = hci_get_route(addr, src); + /* The LE_PUBLIC address type is ignored because of BDADDR_ANY */ + hdev = hci_get_route(addr, BDADDR_ANY, BDADDR_LE_PUBLIC); if (!hdev) return -ENOENT; diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 3809617aa98d..dc59eae54717 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -613,7 +613,7 @@ int hci_conn_del(struct hci_conn *conn) return 0; } -struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src) +struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, uint8_t src_type) { int use_src = bacmp(src, BDADDR_ANY); struct hci_dev *hdev = NULL, *d; @@ -634,7 +634,29 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src) */ if (use_src) { - if (!bacmp(&d->bdaddr, src)) { + bdaddr_t id_addr; + u8 id_addr_type; + + if (src_type == BDADDR_BREDR) { + if (!lmp_bredr_capable(d)) + continue; + bacpy(&id_addr, &d->bdaddr); + id_addr_type = BDADDR_BREDR; + } else { + if (!lmp_le_capable(d)) + continue; + + hci_copy_identity_address(d, &id_addr, + &id_addr_type); + + /* Convert from HCI to three-value type */ + if (id_addr_type == ADDR_LE_DEV_PUBLIC) + id_addr_type = BDADDR_LE_PUBLIC; + else + id_addr_type = BDADDR_LE_RANDOM; + } + + if (!bacmp(&id_addr, src) && id_addr_type == src_type) { hdev = d; break; } } else { diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index d4cad29b033f..577f1c01454a 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -7060,7 +7060,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, BT_DBG("%pMR -> %pMR (type %u) psm 0x%2.2x", &chan->src, dst, dst_type, __le16_to_cpu(psm)); - hdev = hci_get_route(dst, &chan->src); + hdev = hci_get_route(dst, &chan->src, chan->src_type); if (!hdev) return -EHOSTUNREACH; diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 8e385a0ae60e..2f2cb5e27cdd 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -178,7 +178,7 @@ static void rfcomm_reparent_device(struct rfcomm_dev *dev) struct hci_dev *hdev; struct hci_conn *conn; - hdev = hci_get_route(&dev->dst, &dev->src); + hdev = hci_get_route(&dev->dst, &dev->src, BDADDR_BREDR); if (!hdev) return; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index f52bcbf2e58c..3125ce670c2f 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -219,7 +219,7 @@ static int sco_connect(struct sock *sk) BT_DBG("%pMR -> %pMR", &sco_pi(sk)->src, &sco_pi(sk)->dst); - hdev = hci_get_route(&sco_pi(sk)->dst, &sco_pi(sk)->src); + hdev = hci_get_route(&sco_pi(sk)->dst, &sco_pi(sk)->src, BDADDR_BREDR); if (!hdev) return -EHOSTUNREACH; diff --git a/net/can/bcm.c b/net/can/bcm.c index 8e999ffdf28b..436a7537e6a9 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -77,7 +77,7 @@ (CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG) : \ (CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG)) -#define CAN_BCM_VERSION "20160617" +#define CAN_BCM_VERSION "20161123" MODULE_DESCRIPTION("PF_CAN broadcast manager protocol"); MODULE_LICENSE("Dual BSD/GPL"); @@ -109,8 +109,9 @@ struct bcm_op { u32 count; u32 nframes; u32 currframe; - struct canfd_frame *frames; - struct canfd_frame *last_frames; + /* void pointers to arrays of struct can[fd]_frame */ + void *frames; + void *last_frames; struct canfd_frame sframe; struct canfd_frame last_sframe; struct sock *sk; @@ -681,7 +682,7 @@ static void bcm_rx_handler(struct sk_buff *skb, void *data) if (op->flags & RX_FILTER_ID) { /* the easiest case */ - bcm_rx_update_and_send(op, &op->last_frames[0], rxframe); + bcm_rx_update_and_send(op, op->last_frames, rxframe); goto rx_starttimer; } @@ -1068,7 +1069,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, if (msg_head->nframes) { /* update CAN frames content */ - err = memcpy_from_msg((u8 *)op->frames, msg, + err = memcpy_from_msg(op->frames, msg, msg_head->nframes * op->cfsiz); if (err < 0) return err; @@ -1118,7 +1119,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, } if (msg_head->nframes) { - err = memcpy_from_msg((u8 *)op->frames, msg, + err = memcpy_from_msg(op->frames, msg, msg_head->nframes * op->cfsiz); if (err < 0) { if (op->frames != &op->sframe) @@ -1163,6 +1164,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, /* check flags */ if (op->flags & RX_RTR_FRAME) { + struct canfd_frame *frame0 = op->frames; /* no timers in RTR-mode */ hrtimer_cancel(&op->thrtimer); @@ -1174,8 +1176,8 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, * prevent a full-load-loopback-test ... ;-] */ if ((op->flags & TX_CP_CAN_ID) || - (op->frames[0].can_id == op->can_id)) - op->frames[0].can_id = op->can_id & ~CAN_RTR_FLAG; + (frame0->can_id == op->can_id)) + frame0->can_id = op->can_id & ~CAN_RTR_FLAG; } else { if (op->flags & SETTIMER) { @@ -1549,24 +1551,31 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len, struct sockaddr_can *addr = (struct sockaddr_can *)uaddr; struct sock *sk = sock->sk; struct bcm_sock *bo = bcm_sk(sk); + int ret = 0; if (len < sizeof(*addr)) return -EINVAL; - if (bo->bound) - return -EISCONN; + lock_sock(sk); + + if (bo->bound) { + ret = -EISCONN; + goto fail; + } /* bind a device to this socket */ if (addr->can_ifindex) { struct net_device *dev; dev = dev_get_by_index(&init_net, addr->can_ifindex); - if (!dev) - return -ENODEV; - + if (!dev) { + ret = -ENODEV; + goto fail; + } if (dev->type != ARPHRD_CAN) { dev_put(dev); - return -ENODEV; + ret = -ENODEV; + goto fail; } bo->ifindex = dev->ifindex; @@ -1577,17 +1586,24 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len, bo->ifindex = 0; } - bo->bound = 1; - if (proc_dir) { /* unique socket address as filename */ sprintf(bo->procname, "%lu", sock_i_ino(sk)); bo->bcm_proc_read = proc_create_data(bo->procname, 0644, proc_dir, &bcm_proc_fops, sk); + if (!bo->bcm_proc_read) { + ret = -ENOMEM; + goto fail; + } } - return 0; + bo->bound = 1; + +fail: + release_sock(sk); + + return ret; } static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, diff --git a/net/ceph/ceph_fs.c b/net/ceph/ceph_fs.c index 7d54e944de5e..dcbe67ff3e2b 100644 --- a/net/ceph/ceph_fs.c +++ b/net/ceph/ceph_fs.c @@ -34,7 +34,8 @@ void ceph_file_layout_from_legacy(struct ceph_file_layout *fl, fl->stripe_count = le32_to_cpu(legacy->fl_stripe_count); fl->object_size = le32_to_cpu(legacy->fl_object_size); fl->pool_id = le32_to_cpu(legacy->fl_pg_pool); - if (fl->pool_id == 0) + if (fl->pool_id == 0 && fl->stripe_unit == 0 && + fl->stripe_count == 0 && fl->object_size == 0) fl->pool_id = -1; } EXPORT_SYMBOL(ceph_file_layout_from_legacy); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index d9bf7a1d0a58..e6ae15bc41b7 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -4094,6 +4094,7 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) osd_init(&osdc->homeless_osd); osdc->homeless_osd.o_osdc = osdc; osdc->homeless_osd.o_osd = CEPH_HOMELESS_OSD; + osdc->last_linger_id = CEPH_LINGER_ID_START; osdc->linger_requests = RB_ROOT; osdc->map_checks = RB_ROOT; osdc->linger_map_checks = RB_ROOT; diff --git a/net/core/dev.c b/net/core/dev.c index 820bac239738..6666b28b6815 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1766,19 +1766,14 @@ EXPORT_SYMBOL_GPL(is_skb_forwardable); int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb) { - if (skb_orphan_frags(skb, GFP_ATOMIC) || - unlikely(!is_skb_forwardable(dev, skb))) { - atomic_long_inc(&dev->rx_dropped); - kfree_skb(skb); - return NET_RX_DROP; - } + int ret = ____dev_forward_skb(dev, skb); - skb_scrub_packet(skb, true); - skb->priority = 0; - skb->protocol = eth_type_trans(skb, dev); - skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); + if (likely(!ret)) { + skb->protocol = eth_type_trans(skb, dev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); + } - return 0; + return ret; } EXPORT_SYMBOL_GPL(__dev_forward_skb); @@ -2484,7 +2479,7 @@ int skb_checksum_help(struct sk_buff *skb) goto out; } - *(__sum16 *)(skb->data + offset) = csum_fold(csum); + *(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0; out_set_summed: skb->ip_summed = CHECKSUM_NONE; out: diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 977489820eb9..047a1752ece1 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -2479,6 +2479,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GET_TS_INFO: case ETHTOOL_GEEE: case ETHTOOL_GTUNABLE: + case ETHTOOL_GLINKSETTINGS: break; default: if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) diff --git a/net/core/filter.c b/net/core/filter.c index 00351cdf7d0c..b391209838ef 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1628,6 +1628,19 @@ static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb) return dev_forward_skb(dev, skb); } +static inline int __bpf_rx_skb_no_mac(struct net_device *dev, + struct sk_buff *skb) +{ + int ret = ____dev_forward_skb(dev, skb); + + if (likely(!ret)) { + skb->dev = dev; + ret = netif_rx(skb); + } + + return ret; +} + static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) { int ret; @@ -1647,6 +1660,51 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) return ret; } +static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev, + u32 flags) +{ + /* skb->mac_len is not set on normal egress */ + unsigned int mlen = skb->network_header - skb->mac_header; + + __skb_pull(skb, mlen); + + /* At ingress, the mac header has already been pulled once. + * At egress, skb_pospull_rcsum has to be done in case that + * the skb is originated from ingress (i.e. a forwarded skb) + * to ensure that rcsum starts at net header. + */ + if (!skb_at_tc_ingress(skb)) + skb_postpull_rcsum(skb, skb_mac_header(skb), mlen); + skb_pop_mac_header(skb); + skb_reset_mac_len(skb); + return flags & BPF_F_INGRESS ? + __bpf_rx_skb_no_mac(dev, skb) : __bpf_tx_skb(dev, skb); +} + +static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev, + u32 flags) +{ + bpf_push_mac_rcsum(skb); + return flags & BPF_F_INGRESS ? + __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb); +} + +static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev, + u32 flags) +{ + switch (dev->type) { + case ARPHRD_TUNNEL: + case ARPHRD_TUNNEL6: + case ARPHRD_SIT: + case ARPHRD_IPGRE: + case ARPHRD_VOID: + case ARPHRD_NONE: + return __bpf_redirect_no_mac(skb, dev, flags); + default: + return __bpf_redirect_common(skb, dev, flags); + } +} + BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags) { struct net_device *dev; @@ -1675,10 +1733,7 @@ BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags) return -ENOMEM; } - bpf_push_mac_rcsum(clone); - - return flags & BPF_F_INGRESS ? - __bpf_rx_skb(dev, clone) : __bpf_tx_skb(dev, clone); + return __bpf_redirect(clone, dev, flags); } static const struct bpf_func_proto bpf_clone_redirect_proto = { @@ -1722,10 +1777,7 @@ int skb_do_redirect(struct sk_buff *skb) return -EINVAL; } - bpf_push_mac_rcsum(skb); - - return ri->flags & BPF_F_INGRESS ? - __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb); + return __bpf_redirect(skb, dev, ri->flags); } static const struct bpf_func_proto bpf_redirect_proto = { diff --git a/net/core/flow.c b/net/core/flow.c index 3937b1b68d5b..18e8893d4be5 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -95,7 +95,6 @@ static void flow_cache_gc_task(struct work_struct *work) list_for_each_entry_safe(fce, n, &gc_list, u.gc_list) { flow_entry_kill(fce, xfrm); atomic_dec(&xfrm->flow_cache_gc_count); - WARN_ON(atomic_read(&xfrm->flow_cache_gc_count) < 0); } } @@ -236,9 +235,8 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, if (fcp->hash_count > fc->high_watermark) flow_cache_shrink(fc, fcp); - if (fcp->hash_count > 2 * fc->high_watermark || - atomic_read(&net->xfrm.flow_cache_gc_count) > fc->high_watermark) { - atomic_inc(&net->xfrm.flow_cache_genid); + if (atomic_read(&net->xfrm.flow_cache_gc_count) > + 2 * num_online_cpus() * fc->high_watermark) { flo = ERR_PTR(-ENOBUFS); goto ret_object; } diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index ab193e5def07..c6d8207ffa7e 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -122,7 +122,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector_key_keyid *key_keyid; bool skip_vlan = false; u8 ip_proto = 0; - bool ret = false; + bool ret; if (!data) { data = skb->data; @@ -549,12 +549,17 @@ ip_proto_again: out_good: ret = true; -out_bad: + key_control->thoff = (u16)nhoff; +out: key_basic->n_proto = proto; key_basic->ip_proto = ip_proto; - key_control->thoff = (u16)nhoff; return ret; + +out_bad: + ret = false; + key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen); + goto out; } EXPORT_SYMBOL(__skb_flow_dissect); @@ -1008,4 +1013,4 @@ static int __init init_default_flow_dissectors(void) return 0; } -late_initcall_sync(init_default_flow_dissectors); +core_initcall(init_default_flow_dissectors); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index f61c0e02a413..7001da910c6b 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -219,6 +219,8 @@ int peernet2id_alloc(struct net *net, struct net *peer) bool alloc; int id; + if (atomic_read(&net->count) == 0) + return NETNSA_NSID_NOT_ASSIGNED; spin_lock_irqsave(&net->nsid_lock, flags); alloc = atomic_read(&peer->count) == 0 ? false : true; id = __peernet2id_alloc(net, peer, &alloc); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index fb7348f13501..a6196cf844f6 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -275,6 +275,7 @@ int rtnl_unregister(int protocol, int msgtype) rtnl_msg_handlers[protocol][msgindex].doit = NULL; rtnl_msg_handlers[protocol][msgindex].dumpit = NULL; + rtnl_msg_handlers[protocol][msgindex].calcit = NULL; return 0; } @@ -839,18 +840,20 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev, if (dev->dev.parent && dev_is_pci(dev->dev.parent) && (ext_filter_mask & RTEXT_FILTER_VF)) { int num_vfs = dev_num_vf(dev->dev.parent); - size_t size = nla_total_size(sizeof(struct nlattr)); - size += nla_total_size(num_vfs * sizeof(struct nlattr)); + size_t size = nla_total_size(0); size += num_vfs * - (nla_total_size(sizeof(struct ifla_vf_mac)) + - nla_total_size(MAX_VLAN_LIST_LEN * - sizeof(struct nlattr)) + + (nla_total_size(0) + + nla_total_size(sizeof(struct ifla_vf_mac)) + + nla_total_size(sizeof(struct ifla_vf_vlan)) + + nla_total_size(0) + /* nest IFLA_VF_VLAN_LIST */ nla_total_size(MAX_VLAN_LIST_LEN * sizeof(struct ifla_vf_vlan_info)) + nla_total_size(sizeof(struct ifla_vf_spoofchk)) + + nla_total_size(sizeof(struct ifla_vf_tx_rate)) + nla_total_size(sizeof(struct ifla_vf_rate)) + nla_total_size(sizeof(struct ifla_vf_link_state)) + nla_total_size(sizeof(struct ifla_vf_rss_query_en)) + + nla_total_size(0) + /* nest IFLA_VF_STATS */ /* IFLA_VF_STATS_RX_PACKETS */ nla_total_size_64bit(sizeof(__u64)) + /* IFLA_VF_STATS_TX_PACKETS */ @@ -898,7 +901,8 @@ static size_t rtnl_port_size(const struct net_device *dev, static size_t rtnl_xdp_size(const struct net_device *dev) { - size_t xdp_size = nla_total_size(1); /* XDP_ATTACHED */ + size_t xdp_size = nla_total_size(0) + /* nest IFLA_XDP */ + nla_total_size(1); /* XDP_ATTACHED */ if (!dev->netdev_ops->ndo_xdp) return 0; @@ -927,8 +931,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(4) /* IFLA_PROMISCUITY */ + nla_total_size(4) /* IFLA_NUM_TX_QUEUES */ + nla_total_size(4) /* IFLA_NUM_RX_QUEUES */ - + nla_total_size(4) /* IFLA_MAX_GSO_SEGS */ - + nla_total_size(4) /* IFLA_MAX_GSO_SIZE */ + + nla_total_size(4) /* IFLA_GSO_MAX_SEGS */ + + nla_total_size(4) /* IFLA_GSO_MAX_SIZE */ + nla_total_size(1) /* IFLA_OPERSTATE */ + nla_total_size(1) /* IFLA_LINKMODE */ + nla_total_size(4) /* IFLA_CARRIER_CHANGES */ @@ -1605,7 +1609,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) head = &net->dev_index_head[h]; hlist_for_each_entry(dev, head, index_hlist) { if (link_dump_filtered(dev, master_idx, kind_ops)) - continue; + goto cont; if (idx < s_idx) goto cont; err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, @@ -2733,7 +2737,7 @@ static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh) ext_filter_mask)); } - return min_ifinfo_dump_size; + return nlmsg_total_size(min_ifinfo_dump_size); } static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) @@ -2848,7 +2852,10 @@ nla_put_failure: static inline size_t rtnl_fdb_nlmsg_size(void) { - return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(ETH_ALEN); + return NLMSG_ALIGN(sizeof(struct ndmsg)) + + nla_total_size(ETH_ALEN) + /* NDA_LLADDR */ + nla_total_size(sizeof(u16)) + /* NDA_VLAN */ + 0; } static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, u16 vid, int type, diff --git a/net/core/sock.c b/net/core/sock.c index c73e28fc9c2a..00a074dbfe9b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -453,7 +453,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) EXPORT_SYMBOL(sock_queue_rcv_skb); int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, - const int nested, unsigned int trim_cap) + const int nested, unsigned int trim_cap, bool refcounted) { int rc = NET_RX_SUCCESS; @@ -487,7 +487,8 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, bh_unlock_sock(sk); out: - sock_put(sk); + if (refcounted) + sock_put(sk); return rc; discard_and_relse: kfree_skb(skb); @@ -714,7 +715,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname, val = min_t(u32, val, sysctl_wmem_max); set_sndbuf: sk->sk_userlocks |= SOCK_SNDBUF_LOCK; - sk->sk_sndbuf = max_t(u32, val * 2, SOCK_MIN_SNDBUF); + sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); /* Wake up sending tasks if we upped the value. */ sk->sk_write_space(sk); break; @@ -750,7 +751,7 @@ set_rcvbuf: * returning the value we actually used in getsockopt * is the most desirable behavior. */ - sk->sk_rcvbuf = max_t(u32, val * 2, SOCK_MIN_RCVBUF); + sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF); break; case SO_RCVBUFFORCE: @@ -1543,6 +1544,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL); newsk->sk_err = 0; + newsk->sk_err_soft = 0; newsk->sk_priority = 0; newsk->sk_incoming_cpu = raw_smp_processor_id(); atomic64_set(&newsk->sk_cookie, 0); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 345a3aeb8c7e..edbe59d203ef 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -235,7 +235,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) { const struct iphdr *iph = (struct iphdr *)skb->data; const u8 offset = iph->ihl << 2; - const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); + const struct dccp_hdr *dh; struct dccp_sock *dp; struct inet_sock *inet; const int type = icmp_hdr(skb)->type; @@ -245,11 +245,13 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) int err; struct net *net = dev_net(skb->dev); - if (skb->len < offset + sizeof(*dh) || - skb->len < offset + __dccp_basic_hdr_len(dh)) { - __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); - return; - } + /* Only need dccph_dport & dccph_sport which are the first + * 4 bytes in dccp header. + * Our caller (icmp_socket_deliver()) already pulled 8 bytes for us. + */ + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8); + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8); + dh = (struct dccp_hdr *)(skb->data + offset); sk = __inet_lookup_established(net, &dccp_hashinfo, iph->daddr, dh->dccph_dport, @@ -698,6 +700,7 @@ int dccp_invalid_packet(struct sk_buff *skb) { const struct dccp_hdr *dh; unsigned int cscov; + u8 dccph_doff; if (skb->pkt_type != PACKET_HOST) return 1; @@ -719,18 +722,19 @@ int dccp_invalid_packet(struct sk_buff *skb) /* * If P.Data Offset is too small for packet type, drop packet and return */ - if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) { - DCCP_WARN("P.Data Offset(%u) too small\n", dh->dccph_doff); + dccph_doff = dh->dccph_doff; + if (dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) { + DCCP_WARN("P.Data Offset(%u) too small\n", dccph_doff); return 1; } /* * If P.Data Offset is too too large for packet, drop packet and return */ - if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) { - DCCP_WARN("P.Data Offset(%u) too large\n", dh->dccph_doff); + if (!pskb_may_pull(skb, dccph_doff * sizeof(u32))) { + DCCP_WARN("P.Data Offset(%u) too large\n", dccph_doff); return 1; } - + dh = dccp_hdr(skb); /* * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet * has short sequence numbers), drop packet and return @@ -868,7 +872,7 @@ lookup: goto discard_and_relse; nf_reset(skb); - return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4); + return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4, refcounted); no_dccp_socket: if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 3828f94b234c..715e5d1dc107 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -70,7 +70,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; - const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); + const struct dccp_hdr *dh; struct dccp_sock *dp; struct ipv6_pinfo *np; struct sock *sk; @@ -78,12 +78,13 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, __u64 seq; struct net *net = dev_net(skb->dev); - if (skb->len < offset + sizeof(*dh) || - skb->len < offset + __dccp_basic_hdr_len(dh)) { - __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), - ICMP6_MIB_INERRORS); - return; - } + /* Only need dccph_dport & dccph_sport which are the first + * 4 bytes in dccp header. + * Our caller (icmpv6_notify()) already pulled 8 bytes for us. + */ + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8); + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8); + dh = (struct dccp_hdr *)(skb->data + offset); sk = __inet6_lookup_established(net, &dccp_hashinfo, &hdr->daddr, dh->dccph_dport, @@ -738,7 +739,8 @@ lookup: if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; - return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4) ? -1 : 0; + return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4, + refcounted) ? -1 : 0; no_dccp_socket: if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) @@ -956,6 +958,7 @@ static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = { .getsockopt = ipv6_getsockopt, .addr2sockaddr = inet6_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in6), + .bind_conflict = inet6_csk_bind_conflict, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_ipv6_setsockopt, .compat_getsockopt = compat_ipv6_getsockopt, diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 41e65804ddf5..9fe25bf63296 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1009,6 +1009,10 @@ void dccp_close(struct sock *sk, long timeout) __kfree_skb(skb); } + /* If socket has been already reset kill it. */ + if (sk->sk_state == DCCP_CLOSED) + goto adjudge_to_death; + if (data_was_unread) { /* Unread data was tossed, send an appropriate Reset Code */ DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread); diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index a6902c1e2f28..7899919cd9f0 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -233,6 +233,8 @@ int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev, genphy_read_status(phydev); if (ds->ops->adjust_link) ds->ops->adjust_link(ds, port, phydev); + + put_device(&phydev->mdio.dev); } return 0; @@ -504,15 +506,8 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, void dsa_cpu_dsa_destroy(struct device_node *port_dn) { - struct phy_device *phydev; - - if (of_phy_is_fixed_link(port_dn)) { - phydev = of_phy_find_device(port_dn); - if (phydev) { - phy_device_free(phydev); - fixed_phy_unregister(phydev); - } - } + if (of_phy_is_fixed_link(port_dn)) + of_phy_deregister_fixed_link(port_dn); } static void dsa_switch_destroy(struct dsa_switch *ds) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index f8a7d9aab437..5fff951a0a49 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -28,8 +28,10 @@ static struct dsa_switch_tree *dsa_get_dst(u32 tree) struct dsa_switch_tree *dst; list_for_each_entry(dst, &dsa_switch_trees, list) - if (dst->tree == tree) + if (dst->tree == tree) { + kref_get(&dst->refcount); return dst; + } return NULL; } diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 6b1282c006b1..30e2e21d7619 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1125,7 +1125,7 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p, p->phy_interface = mode; phy_dn = of_parse_phandle(port_dn, "phy-handle", 0); - if (of_phy_is_fixed_link(port_dn)) { + if (!phy_dn && of_phy_is_fixed_link(port_dn)) { /* In the case of a fixed PHY, the DT node associated * to the fixed PHY is the Port DT node */ @@ -1135,7 +1135,7 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p, return ret; } phy_is_fixed = true; - phy_dn = port_dn; + phy_dn = of_node_get(port_dn); } if (ds->ops->get_phy_flags) @@ -1154,6 +1154,7 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p, ret = dsa_slave_phy_connect(p, slave_dev, phy_id); if (ret) { netdev_err(slave_dev, "failed to connect to phy%d: %d\n", phy_id, ret); + of_node_put(phy_dn); return ret; } } else { @@ -1162,6 +1163,8 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p, phy_flags, p->phy_interface); } + + of_node_put(phy_dn); } if (p->phy && phy_is_fixed) @@ -1174,6 +1177,8 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p, ret = dsa_slave_phy_connect(p, slave_dev, p->port); if (ret) { netdev_err(slave_dev, "failed to connect to port %d: %d\n", p->port, ret); + if (phy_is_fixed) + of_phy_deregister_fixed_link(port_dn); return ret; } } @@ -1289,10 +1294,18 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent, void dsa_slave_destroy(struct net_device *slave_dev) { struct dsa_slave_priv *p = netdev_priv(slave_dev); + struct dsa_switch *ds = p->parent; + struct device_node *port_dn; + + port_dn = ds->ports[p->port].dn; netif_carrier_off(slave_dev); - if (p->phy) + if (p->phy) { phy_disconnect(p->phy); + + if (of_phy_is_fixed_link(port_dn)) + of_phy_deregister_fixed_link(port_dn); + } unregister_netdev(slave_dev); free_netdev(slave_dev); } diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 300b06888fdf..b54b3ca939db 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -715,6 +715,7 @@ config DEFAULT_TCP_CONG default "reno" if DEFAULT_RENO default "dctcp" if DEFAULT_DCTCP default "cdg" if DEFAULT_CDG + default "bbr" if DEFAULT_BBR default "cubic" config TCP_MD5SIG diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 9648c97e541f..215143246e4b 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -533,9 +533,9 @@ EXPORT_SYMBOL(inet_dgram_connect); static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias) { - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + add_wait_queue(sk_sleep(sk), &wait); sk->sk_write_pending += writebias; /* Basic assumption: if someone sets sk->sk_err, he _must_ @@ -545,13 +545,12 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias) */ while ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { release_sock(sk); - timeo = schedule_timeout(timeo); + timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); lock_sock(sk); if (signal_pending(current) || !timeo) break; - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); sk->sk_write_pending -= writebias; return timeo; } @@ -1234,7 +1233,7 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb, fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID); /* fixed ID is invalid if DF bit is not set */ - if (fixedid && !(iph->frag_off & htons(IP_DF))) + if (fixedid && !(ip_hdr(skb)->frag_off & htons(IP_DF))) goto out; } diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index d95631d09248..20fb25e3027b 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -476,7 +476,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) esph = (void *)skb_push(skb, 4); *seqhi = esph->spi; esph->spi = esph->seq_no; - esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.input.hi); + esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi; aead_request_set_callback(req, 0, esp_input_done_esn, skb); } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index c3b80478226e..161fc0f0d752 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -151,7 +151,7 @@ static void fib_replace_table(struct net *net, struct fib_table *old, int fib_unmerge(struct net *net) { - struct fib_table *old, *new; + struct fib_table *old, *new, *main_table; /* attempt to fetch local table if it has been allocated */ old = fib_get_table(net, RT_TABLE_LOCAL); @@ -162,11 +162,21 @@ int fib_unmerge(struct net *net) if (!new) return -ENOMEM; + /* table is already unmerged */ + if (new == old) + return 0; + /* replace merged table with clean table */ - if (new != old) { - fib_replace_table(net, old, new); - fib_free_table(old); - } + fib_replace_table(net, old, new); + fib_free_table(old); + + /* attempt to fetch main table if it has been allocated */ + main_table = fib_get_table(net, RT_TABLE_MAIN); + if (!main_table) + return 0; + + /* flush local entries from main table */ + fib_table_flush_external(main_table); return 0; } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 31cef3602585..026f309c51e9 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1743,8 +1743,10 @@ struct fib_table *fib_trie_unmerge(struct fib_table *oldtb) local_l = fib_find_node(lt, &local_tp, l->key); if (fib_insert_alias(lt, local_tp, local_l, new_fa, - NULL, l->key)) + NULL, l->key)) { + kmem_cache_free(fn_alias_kmem, new_fa); goto out; + } } /* stop loop if key wrapped back to 0 */ @@ -1760,6 +1762,71 @@ out: return NULL; } +/* Caller must hold RTNL */ +void fib_table_flush_external(struct fib_table *tb) +{ + struct trie *t = (struct trie *)tb->tb_data; + struct key_vector *pn = t->kv; + unsigned long cindex = 1; + struct hlist_node *tmp; + struct fib_alias *fa; + + /* walk trie in reverse order */ + for (;;) { + unsigned char slen = 0; + struct key_vector *n; + + if (!(cindex--)) { + t_key pkey = pn->key; + + /* cannot resize the trie vector */ + if (IS_TRIE(pn)) + break; + + /* resize completed node */ + pn = resize(t, pn); + cindex = get_index(pkey, pn); + + continue; + } + + /* grab the next available node */ + n = get_child(pn, cindex); + if (!n) + continue; + + if (IS_TNODE(n)) { + /* record pn and cindex for leaf walking */ + pn = n; + cindex = 1ul << n->bits; + + continue; + } + + hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { + /* if alias was cloned to local then we just + * need to remove the local copy from main + */ + if (tb->tb_id != fa->tb_id) { + hlist_del_rcu(&fa->fa_list); + alias_free_mem_rcu(fa); + continue; + } + + /* record local slen */ + slen = fa->fa_slen; + } + + /* update leaf slen */ + n->slen = slen; + + if (hlist_empty(&n->leaf)) { + put_child_root(pn, n->key, NULL); + node_free(n); + } + } +} + /* Caller must hold RTNL. */ int fib_table_flush(struct net *net, struct fib_table *tb) { @@ -2413,22 +2480,19 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter, struct key_vector *l, **tp = &iter->tnode; t_key key; - /* use cache location of next-to-find key */ + /* use cached location of previously found key */ if (iter->pos > 0 && pos >= iter->pos) { - pos -= iter->pos; key = iter->key; } else { - iter->pos = 0; + iter->pos = 1; key = 0; } - while ((l = leaf_walk_rcu(tp, key)) != NULL) { + pos -= iter->pos; + + while ((l = leaf_walk_rcu(tp, key)) && (pos-- > 0)) { key = l->key + 1; iter->pos++; - - if (--pos <= 0) - break; - l = NULL; /* handle unlikely case of a key wrap */ @@ -2437,7 +2501,7 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter, } if (l) - iter->key = key; /* remember it */ + iter->key = l->key; /* remember it */ else iter->pos = 0; /* forget it */ @@ -2465,7 +2529,7 @@ static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos) return fib_route_get_idx(iter, *pos); iter->pos = 0; - iter->key = 0; + iter->key = KEY_MAX; return SEQ_START_TOKEN; } @@ -2474,7 +2538,7 @@ static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct fib_route_iter *iter = seq->private; struct key_vector *l = NULL; - t_key key = iter->key; + t_key key = iter->key + 1; ++*pos; @@ -2483,7 +2547,7 @@ static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) l = leaf_walk_rcu(&iter->tnode, key); if (l) { - iter->key = l->key + 1; + iter->key = l->key; iter->pos++; } else { iter->pos = 0; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 38abe70e595f..48734ee6293f 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -477,7 +477,7 @@ static struct rtable *icmp_route_lookup(struct net *net, fl4->flowi4_proto = IPPROTO_ICMP; fl4->fl4_icmp_type = type; fl4->fl4_icmp_code = code; - fl4->flowi4_oif = l3mdev_master_ifindex(skb_in->dev); + fl4->flowi4_oif = l3mdev_master_ifindex(skb_dst(skb_in)->dev); security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4)); rt = __ip_route_output_key_hash(net, fl4, @@ -502,7 +502,7 @@ static struct rtable *icmp_route_lookup(struct net *net, if (err) goto relookup_failed; - if (inet_addr_type_dev_table(net, skb_in->dev, + if (inet_addr_type_dev_table(net, skb_dst(skb_in)->dev, fl4_dec.saddr) == RTN_LOCAL) { rt2 = __ip_route_output_key(net, &fl4_dec); if (IS_ERR(rt2)) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 606cc3e85d2b..15db786d50ed 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -162,7 +162,7 @@ static int unsolicited_report_interval(struct in_device *in_dev) } static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im); -static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr); +static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im); static void igmpv3_clear_delrec(struct in_device *in_dev); static int sf_setstate(struct ip_mc_list *pmc); static void sf_markstate(struct ip_mc_list *pmc); @@ -1130,10 +1130,15 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im) spin_unlock_bh(&in_dev->mc_tomb_lock); } -static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr) +/* + * restore ip_mc_list deleted records + */ +static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im) { struct ip_mc_list *pmc, *pmc_prev; - struct ip_sf_list *psf, *psf_next; + struct ip_sf_list *psf; + struct net *net = dev_net(in_dev->dev); + __be32 multiaddr = im->multiaddr; spin_lock_bh(&in_dev->mc_tomb_lock); pmc_prev = NULL; @@ -1149,16 +1154,26 @@ static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr) in_dev->mc_tomb = pmc->next; } spin_unlock_bh(&in_dev->mc_tomb_lock); + + spin_lock_bh(&im->lock); if (pmc) { - for (psf = pmc->tomb; psf; psf = psf_next) { - psf_next = psf->sf_next; - kfree(psf); + im->interface = pmc->interface; + im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + im->sfmode = pmc->sfmode; + if (pmc->sfmode == MCAST_INCLUDE) { + im->tomb = pmc->tomb; + im->sources = pmc->sources; + for (psf = im->sources; psf; psf = psf->sf_next) + psf->sf_crcount = im->crcount; } in_dev_put(pmc->interface); - kfree(pmc); } + spin_unlock_bh(&im->lock); } +/* + * flush ip_mc_list deleted records + */ static void igmpv3_clear_delrec(struct in_device *in_dev) { struct ip_mc_list *pmc, *nextpmc; @@ -1366,7 +1381,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) ip_mc_hash_add(in_dev, im); #ifdef CONFIG_IP_MULTICAST - igmpv3_del_delrec(in_dev, im->multiaddr); + igmpv3_del_delrec(in_dev, im); #endif igmp_group_added(im); if (!in_dev->dead) @@ -1626,8 +1641,12 @@ void ip_mc_remap(struct in_device *in_dev) ASSERT_RTNL(); - for_each_pmc_rtnl(in_dev, pmc) + for_each_pmc_rtnl(in_dev, pmc) { +#ifdef CONFIG_IP_MULTICAST + igmpv3_del_delrec(in_dev, pmc); +#endif igmp_group_added(pmc); + } } /* Device going down */ @@ -1648,7 +1667,6 @@ void ip_mc_down(struct in_device *in_dev) in_dev->mr_gq_running = 0; if (del_timer(&in_dev->mr_gq_timer)) __in_dev_put(in_dev); - igmpv3_clear_delrec(in_dev); #endif ip_mc_dec_group(in_dev, IGMP_ALL_HOSTS); @@ -1688,8 +1706,12 @@ void ip_mc_up(struct in_device *in_dev) #endif ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); - for_each_pmc_rtnl(in_dev, pmc) + for_each_pmc_rtnl(in_dev, pmc) { +#ifdef CONFIG_IP_MULTICAST + igmpv3_del_delrec(in_dev, pmc); +#endif igmp_group_added(pmc); + } } /* @@ -1704,13 +1726,13 @@ void ip_mc_destroy_dev(struct in_device *in_dev) /* Deactivate timers */ ip_mc_down(in_dev); +#ifdef CONFIG_IP_MULTICAST + igmpv3_clear_delrec(in_dev); +#endif while ((i = rtnl_dereference(in_dev->mc_list)) != NULL) { in_dev->mc_list = i->next_rcu; in_dev->mc_count--; - - /* We've dropped the groups in ip_mc_down already */ - ip_mc_clear_src(i); ip_ma_put(i); } } diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 8b4ffd216839..9f0a7b96646f 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -117,7 +117,7 @@ int ip_forward(struct sk_buff *skb) if (opt->is_strictroute && rt->rt_uses_gateway) goto sr_failed; - IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS; + IPCB(skb)->flags |= IPSKB_FORWARDED; mtu = ip_dst_mtu_maybe_forward(&rt->dst, true); if (ip_exceeds_mtu(skb, mtu)) { IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 03e7f7310423..877bdb02e887 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -107,6 +107,8 @@ int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) if (unlikely(!skb)) return 0; + skb->protocol = htons(ETH_P_IP); + return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk, skb, NULL, skb_dst(skb)->dev, dst_output); @@ -239,19 +241,23 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk, struct sk_buff *segs; int ret = 0; - /* common case: fragmentation of segments is not allowed, - * or seglen is <= mtu + /* common case: seglen is <= mtu */ - if (((IPCB(skb)->flags & IPSKB_FRAG_SEGS) == 0) || - skb_gso_validate_mtu(skb, mtu)) + if (skb_gso_validate_mtu(skb, mtu)) return ip_finish_output2(net, sk, skb); - /* Slowpath - GSO segment length is exceeding the dst MTU. + /* Slowpath - GSO segment length exceeds the egress MTU. * - * This can happen in two cases: - * 1) TCP GRO packet, DF bit not set - * 2) skb arrived via virtio-net, we thus get TSO/GSO skbs directly - * from host network stack. + * This can happen in several cases: + * - Forwarding of a TCP GRO skb, when DF flag is not set. + * - Forwarding of an skb that arrived on a virtualization interface + * (virtio-net/vhost/tap) with TSO/GSO size set by other network + * stack. + * - Local GSO skb transmitted on an NETIF_F_TSO tunnel stacked over an + * interface with a smaller MTU. + * - Arriving GRO skb (or GSO skb in a virtualized environment) that is + * bridged to a NETIF_F_TSO tunnel stacked over an interface with an + * insufficent MTU. */ features = netif_skb_features(skb); BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_SGO_CB_OFFSET); @@ -1579,7 +1585,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, } oif = arg->bound_dev_if; - oif = oif ? : skb->skb_iif; + if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) + oif = skb->skb_iif; flowi4_init_output(&fl4, oif, IP4_REPLY_MARK(net, skb->mark), diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 777bc1883870..fed3d29f9eb3 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -63,7 +63,6 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, int pkt_len = skb->len - skb_inner_network_offset(skb); struct net *net = dev_net(rt->dst.dev); struct net_device *dev = skb->dev; - int skb_iif = skb->skb_iif; struct iphdr *iph; int err; @@ -73,16 +72,6 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, skb_dst_set(skb, &rt->dst); memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - if (skb_iif && !(df & htons(IP_DF))) { - /* Arrived from an ingress interface, got encapsulated, with - * fragmentation of encapulating frames allowed. - * If skb is gso, the resulting encapsulated network segments - * may exceed dst mtu. - * Allow IP Fragmentation of segments. - */ - IPCB(skb)->flags |= IPSKB_FRAG_SEGS; - } - /* Push down and install the IP header. */ skb_push(skb, sizeof(struct iphdr)); skb_reset_network_header(skb); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 5f006e13de56..27089f5ebbb1 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1749,7 +1749,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, vif->dev->stats.tx_bytes += skb->len; } - IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS; + IPCB(skb)->flags |= IPSKB_FORWARDED; /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally * not only before forwarding, but after forwarding on all output diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index c3776ff6749f..b3cc1335adbc 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -24,10 +24,11 @@ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_t struct flowi4 fl4 = {}; __be32 saddr = iph->saddr; __u8 flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; + struct net_device *dev = skb_dst(skb)->dev; unsigned int hh_len; if (addr_type == RTN_UNSPEC) - addr_type = inet_addr_type(net, saddr); + addr_type = inet_addr_type_dev_table(net, dev, saddr); if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST) flags |= FLOWI_FLAG_ANYSRC; else @@ -40,6 +41,8 @@ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_t fl4.saddr = saddr; fl4.flowi4_tos = RT_TOS(iph->tos); fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; + if (!fl4.flowi4_oif) + fl4.flowi4_oif = l3mdev_master_ifindex(dev); fl4.flowi4_mark = skb->mark; fl4.flowi4_flags = flags; rt = ip_route_output_key(net, &fl4); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index b31df597fd37..697538464e6e 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1201,8 +1201,8 @@ static int translate_compat_table(struct xt_table_info **pinfo, newinfo->number = compatr->num_entries; for (i = 0; i < NF_ARP_NUMHOOKS; i++) { - newinfo->hook_entry[i] = info->hook_entry[i]; - newinfo->underflow[i] = info->underflow[i]; + newinfo->hook_entry[i] = compatr->hook_entry[i]; + newinfo->underflow[i] = compatr->underflow[i]; } entry1 = newinfo->entries; pos = entry1; diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c index bf855e64fc45..0c01a270bf9f 100644 --- a/net/ipv4/netfilter/nft_dup_ipv4.c +++ b/net/ipv4/netfilter/nft_dup_ipv4.c @@ -28,7 +28,7 @@ static void nft_dup_ipv4_eval(const struct nft_expr *expr, struct in_addr gw = { .s_addr = (__force __be32)regs->data[priv->sreg_addr], }; - int oif = regs->data[priv->sreg_dev]; + int oif = priv->sreg_dev ? regs->data[priv->sreg_dev] : -1; nf_dup_ipv4(pkt->net, pkt->skb, pkt->hook, &gw, oif); } @@ -59,7 +59,9 @@ static int nft_dup_ipv4_dump(struct sk_buff *skb, const struct nft_expr *expr) { struct nft_dup_ipv4 *priv = nft_expr_priv(expr); - if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr) || + if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr)) + goto nla_put_failure; + if (priv->sreg_dev && nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev)) goto nla_put_failure; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 62d4d90c1389..2a57566e6e91 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -753,7 +753,9 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow goto reject_redirect; } - n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw); + n = __ipv4_neigh_lookup(rt->dst.dev, new_gw); + if (!n) + n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev); if (!IS_ERR(n)) { if (!(n->nud_state & NUD_VALID)) { neigh_event_send(n, NULL); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3251fe71f39f..814af89c1bd3 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1164,7 +1164,7 @@ restart: err = -EPIPE; if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) - goto out_err; + goto do_error; sg = !!(sk->sk_route_caps & NETIF_F_SG); @@ -1241,7 +1241,7 @@ new_segment: if (!skb_can_coalesce(skb, i, pfrag->page, pfrag->offset)) { - if (i == sysctl_max_skb_frags || !sg) { + if (i >= sysctl_max_skb_frags || !sg) { tcp_mark_push(tp, skb); goto new_segment; } diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 1294af4e0127..f9038d6b109e 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -200,8 +200,10 @@ static void tcp_reinit_congestion_control(struct sock *sk, icsk->icsk_ca_ops = ca; icsk->icsk_ca_setsockopt = 1; - if (sk->sk_state != TCP_CLOSE) + if (sk->sk_state != TCP_CLOSE) { + memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); tcp_init_congestion_control(sk); + } } /* Manage refcounts on socket close. */ diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 10d728b6804c..ab37c6775630 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -56,6 +56,7 @@ struct dctcp { u32 next_seq; u32 ce_state; u32 delayed_ack_reserved; + u32 loss_cwnd; }; static unsigned int dctcp_shift_g __read_mostly = 4; /* g = 1/2^4 */ @@ -96,6 +97,7 @@ static void dctcp_init(struct sock *sk) ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA); ca->delayed_ack_reserved = 0; + ca->loss_cwnd = 0; ca->ce_state = 0; dctcp_reset(tp, ca); @@ -111,9 +113,10 @@ static void dctcp_init(struct sock *sk) static u32 dctcp_ssthresh(struct sock *sk) { - const struct dctcp *ca = inet_csk_ca(sk); + struct dctcp *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); + ca->loss_cwnd = tp->snd_cwnd; return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U); } @@ -308,12 +311,20 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr, return 0; } +static u32 dctcp_cwnd_undo(struct sock *sk) +{ + const struct dctcp *ca = inet_csk_ca(sk); + + return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); +} + static struct tcp_congestion_ops dctcp __read_mostly = { .init = dctcp_init, .in_ack_event = dctcp_update_alpha, .cwnd_event = dctcp_cwnd_event, .ssthresh = dctcp_ssthresh, .cong_avoid = tcp_reno_cong_avoid, + .undo_cwnd = dctcp_cwnd_undo, .set_state = dctcp_state, .get_info = dctcp_get_info, .flags = TCP_CONG_NEEDS_ECN, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 61b7be303eec..2259114c7242 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1564,6 +1564,21 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(tcp_add_backlog); +int tcp_filter(struct sock *sk, struct sk_buff *skb) +{ + struct tcphdr *th = (struct tcphdr *)skb->data; + unsigned int eaten = skb->len; + int err; + + err = sk_filter_trim_cap(sk, skb, th->doff * 4); + if (!err) { + eaten -= skb->len; + TCP_SKB_CB(skb)->end_seq -= eaten; + } + return err; +} +EXPORT_SYMBOL(tcp_filter); + /* * From tcp_input.c */ @@ -1676,8 +1691,10 @@ process: nf_reset(skb); - if (sk_filter(sk, skb)) + if (tcp_filter(sk, skb)) goto discard_and_relse; + th = (const struct tcphdr *)skb->data; + iph = ip_hdr(skb); skb->dev = NULL; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d123d68f4d1d..5bab6c3f7a2f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1455,7 +1455,7 @@ static void udp_v4_rehash(struct sock *sk) udp_lib_rehash(sk, new_hash); } -static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int rc; @@ -1652,10 +1652,10 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, if (use_hash2) { hash2_any = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum) & - udp_table.mask; - hash2 = udp4_portaddr_hash(net, daddr, hnum) & udp_table.mask; + udptable->mask; + hash2 = udp4_portaddr_hash(net, daddr, hnum) & udptable->mask; start_lookup: - hslot = &udp_table.hash2[hash2]; + hslot = &udptable->hash2[hash2]; offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node); } diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index 7e0fe4bdd967..feb50a16398d 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h @@ -25,7 +25,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len); int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); -int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); +int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); void udp_destroy_sock(struct sock *sk); #ifdef CONFIG_PROC_FS diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index af817158d830..ff450c2aad9b 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -50,7 +50,7 @@ struct proto udplite_prot = { .sendmsg = udp_sendmsg, .recvmsg = udp_recvmsg, .sendpage = udp_sendpage, - .backlog_rcv = udp_queue_rcv_skb, + .backlog_rcv = __udp_queue_rcv_skb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, .get_port = udp_v4_get_port, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 060dd9922018..4bc5ba3ae452 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -183,7 +183,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, static void addrconf_dad_start(struct inet6_ifaddr *ifp); static void addrconf_dad_work(struct work_struct *w); -static void addrconf_dad_completed(struct inet6_ifaddr *ifp); +static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id); static void addrconf_dad_run(struct inet6_dev *idev); static void addrconf_rs_timer(unsigned long data); static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa); @@ -2898,6 +2898,7 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr, spin_lock_bh(&ifp->lock); ifp->flags &= ~IFA_F_TENTATIVE; spin_unlock_bh(&ifp->lock); + rt_genid_bump_ipv6(dev_net(idev->dev)); ipv6_ifa_notify(RTM_NEWADDR, ifp); in6_ifa_put(ifp); } @@ -3740,7 +3741,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) { struct inet6_dev *idev = ifp->idev; struct net_device *dev = idev->dev; - bool notify = false; + bool bump_id, notify = false; addrconf_join_solict(dev, &ifp->addr); @@ -3755,11 +3756,12 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) idev->cnf.accept_dad < 1 || !(ifp->flags&IFA_F_TENTATIVE) || ifp->flags & IFA_F_NODAD) { + bump_id = ifp->flags & IFA_F_TENTATIVE; ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED); spin_unlock(&ifp->lock); read_unlock_bh(&idev->lock); - addrconf_dad_completed(ifp); + addrconf_dad_completed(ifp, bump_id); return; } @@ -3819,8 +3821,8 @@ static void addrconf_dad_work(struct work_struct *w) struct inet6_ifaddr, dad_work); struct inet6_dev *idev = ifp->idev; + bool bump_id, disable_ipv6 = false; struct in6_addr mcaddr; - bool disable_ipv6 = false; enum { DAD_PROCESS, @@ -3890,11 +3892,12 @@ static void addrconf_dad_work(struct work_struct *w) * DAD was successful */ + bump_id = ifp->flags & IFA_F_TENTATIVE; ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED); spin_unlock(&ifp->lock); write_unlock_bh(&idev->lock); - addrconf_dad_completed(ifp); + addrconf_dad_completed(ifp, bump_id); goto out; } @@ -3931,7 +3934,7 @@ static bool ipv6_lonely_lladdr(struct inet6_ifaddr *ifp) return true; } -static void addrconf_dad_completed(struct inet6_ifaddr *ifp) +static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id) { struct net_device *dev = ifp->idev->dev; struct in6_addr lladdr; @@ -3983,6 +3986,9 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) spin_unlock(&ifp->lock); write_unlock_bh(&ifp->idev->lock); } + + if (bump_id) + rt_genid_bump_ipv6(dev_net(dev)); } static void addrconf_dad_run(struct inet6_dev *idev) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 37874e2f30ed..ccf40550c475 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -139,7 +139,8 @@ void ip6_datagram_release_cb(struct sock *sk) } EXPORT_SYMBOL_GPL(ip6_datagram_release_cb); -static int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, + int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; struct inet_sock *inet = inet_sk(sk); @@ -252,6 +253,7 @@ ipv4_connected: out: return err; } +EXPORT_SYMBOL_GPL(__ip6_datagram_connect); int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 060a60b2f8a6..111ba55fd512 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -418,7 +418,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) esph = (void *)skb_push(skb, 4); *seqhi = esph->spi; esph->spi = esph->seq_no; - esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.input.hi); + esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi; aead_request_set_callback(req, 0, esp_input_done_esn, skb); } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index bd59c343d35f..2772004ba5a1 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -447,8 +447,10 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, if (__ipv6_addr_needs_scope_id(addr_type)) iif = skb->dev->ifindex; - else - iif = l3mdev_master_ifindex(skb->dev); + else { + dst = skb_dst(skb); + iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev); + } /* * Must not send error if the source does not uniquely diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 1fcf61f1cbc3..89c59e656f44 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -99,7 +99,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, segs = ops->callbacks.gso_segment(skb, features); } - if (IS_ERR(segs)) + if (IS_ERR_OR_NULL(segs)) goto out; gso_partial = !!(skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 6001e781164e..59eb4ed99ce8 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1366,7 +1366,7 @@ emsgsize: if (((length > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && - (rt->dst.dev->features & NETIF_F_UFO) && + (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len && (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) { err = ip6_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, exthdrlen, diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 87784560dc46..d76674efe523 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1034,6 +1034,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, int mtu; unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen; unsigned int max_headroom = psh_hlen; + bool use_cache = false; u8 hop_limit; int err = -1; @@ -1066,7 +1067,15 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); neigh_release(neigh); - } else if (!fl6->flowi6_mark) + } else if (!(t->parms.flags & + (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) { + /* enable the cache only only if the routing decision does + * not depend on the current inner header value + */ + use_cache = true; + } + + if (use_cache) dst = dst_cache_get(&t->dst_cache); if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr)) @@ -1150,7 +1159,7 @@ route_lookup: if (t->encap.type != TUNNEL_ENCAP_NONE) goto tx_err_dst_release; } else { - if (!fl6->flowi6_mark && ndst) + if (use_cache && ndst) dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr); } skb_dst_set(skb, dst); @@ -1172,7 +1181,6 @@ route_lookup: if (err) return err; - skb->protocol = htons(ETH_P_IPV6); skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index a7520528ecd2..b283f293ee4a 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -88,9 +88,6 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, uh->len = htons(skb->len); - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED - | IPSKB_REROUTED); skb_dst_set(skb, dst); udp6_set_csum(nocheck, skb, saddr, daddr, skb->len); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 8a02ca8a11af..c299c1e2bbf0 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -1138,6 +1138,33 @@ static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = { .priority = 100, }; +static bool is_vti6_tunnel(const struct net_device *dev) +{ + return dev->netdev_ops == &vti6_netdev_ops; +} + +static int vti6_device_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct ip6_tnl *t = netdev_priv(dev); + + if (!is_vti6_tunnel(dev)) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_DOWN: + if (!net_eq(t->net, dev_net(dev))) + xfrm_garbage_collect(t->net); + break; + } + return NOTIFY_DONE; +} + +static struct notifier_block vti6_notifier_block __read_mostly = { + .notifier_call = vti6_device_event, +}; + /** * vti6_tunnel_init - register protocol and reserve needed resources * @@ -1148,6 +1175,8 @@ static int __init vti6_tunnel_init(void) const char *msg; int err; + register_netdevice_notifier(&vti6_notifier_block); + msg = "tunnel device"; err = register_pernet_device(&vti6_net_ops); if (err < 0) @@ -1180,6 +1209,7 @@ xfrm_proto_ah_failed: xfrm_proto_esp_failed: unregister_pernet_device(&vti6_net_ops); pernet_dev_failed: + unregister_netdevice_notifier(&vti6_notifier_block); pr_err("vti6 init: failed to register %s\n", msg); return err; } @@ -1194,6 +1224,7 @@ static void __exit vti6_tunnel_cleanup(void) xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH); xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP); unregister_pernet_device(&vti6_net_ops); + unregister_netdevice_notifier(&vti6_notifier_block); } module_init(vti6_tunnel_init); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index e4347aeb2e65..9948b5ce52da 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -576,11 +576,11 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) /* Jumbo payload inhibits frag. header */ if (ipv6_hdr(skb)->payload_len == 0) { pr_debug("payload len = 0\n"); - return -EINVAL; + return 0; } if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0) - return -EINVAL; + return 0; if (!pskb_may_pull(skb, fhoff + sizeof(*fhdr))) return -ENOMEM; diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index f7aab5ab93a5..f06b0471f39f 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -69,7 +69,7 @@ static unsigned int ipv6_defrag(void *priv, if (err == -EINPROGRESS) return NF_STOLEN; - return NF_ACCEPT; + return err == 0 ? NF_ACCEPT : NF_DROP; } static struct nf_hook_ops ipv6_defrag_ops[] = { diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index a5400223fd74..10090400c72f 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -156,6 +156,7 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) fl6.daddr = oip6h->saddr; fl6.fl6_sport = otcph->dest; fl6.fl6_dport = otcph->source; + fl6.flowi6_oif = l3mdev_master_ifindex(skb_dst(oldskb)->dev); security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6)); dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c index 8bfd470cbe72..831f86e1ec08 100644 --- a/net/ipv6/netfilter/nft_dup_ipv6.c +++ b/net/ipv6/netfilter/nft_dup_ipv6.c @@ -26,7 +26,7 @@ static void nft_dup_ipv6_eval(const struct nft_expr *expr, { struct nft_dup_ipv6 *priv = nft_expr_priv(expr); struct in6_addr *gw = (struct in6_addr *)®s->data[priv->sreg_addr]; - int oif = regs->data[priv->sreg_dev]; + int oif = priv->sreg_dev ? regs->data[priv->sreg_dev] : -1; nf_dup_ipv6(pkt->net, pkt->skb, pkt->hook, gw, oif); } @@ -57,7 +57,9 @@ static int nft_dup_ipv6_dump(struct sk_buff *skb, const struct nft_expr *expr) { struct nft_dup_ipv6 *priv = nft_expr_priv(expr); - if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr) || + if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr)) + goto nla_put_failure; + if (priv->sreg_dev && nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev)) goto nla_put_failure; diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 7cca8ac66fe9..cd4252346a32 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -155,6 +155,8 @@ int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) if (unlikely(!skb)) return 0; + skb->protocol = htons(ETH_P_IPV6); + return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, skb_dst(skb)->dev, dst_output); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 947ed1ded026..1b57e11e6e0d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1364,6 +1364,9 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, if (rt6->rt6i_flags & RTF_LOCAL) return; + if (dst_metric_locked(dst, RTAX_MTU)) + return; + dst_confirm(dst); mtu = max_t(u32, mtu, IPV6_MIN_MTU); if (mtu >= dst_mtu(dst)) @@ -2758,6 +2761,7 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) PMTU discouvery. */ if (rt->dst.dev == arg->dev && + dst_metric_raw(&rt->dst, RTAX_MTU) && !dst_metric_locked(&rt->dst, RTAX_MTU)) { if (rt->rt6i_flags & RTF_CACHE) { /* For RTF_CACHE with rt6i_pmtu == 0 diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5a27ab4eab39..b9f1fee9a886 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -818,8 +818,12 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 fl6.flowi6_proto = IPPROTO_TCP; if (rt6_need_strict(&fl6.daddr) && !oif) fl6.flowi6_oif = tcp_v6_iif(skb); - else - fl6.flowi6_oif = oif ? : skb->skb_iif; + else { + if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) + oif = skb->skb_iif; + + fl6.flowi6_oif = oif; + } fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); fl6.fl6_dport = t1->dest; @@ -1225,7 +1229,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_do_rcv(sk, skb); - if (sk_filter(sk, skb)) + if (tcp_filter(sk, skb)) goto discard; /* @@ -1453,8 +1457,10 @@ process: if (tcp_v6_inbound_md5_hash(sk, skb)) goto discard_and_relse; - if (sk_filter(sk, skb)) + if (tcp_filter(sk, skb)) goto discard_and_relse; + th = (const struct tcphdr *)skb->data; + hdr = ipv6_hdr(skb); skb->dev = NULL; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b2ef061e6836..e4a8000d59ad 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -514,7 +514,7 @@ out: return; } -static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int rc; @@ -706,10 +706,10 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, if (use_hash2) { hash2_any = udp6_portaddr_hash(net, &in6addr_any, hnum) & - udp_table.mask; - hash2 = udp6_portaddr_hash(net, daddr, hnum) & udp_table.mask; + udptable->mask; + hash2 = udp6_portaddr_hash(net, daddr, hnum) & udptable->mask; start_lookup: - hslot = &udp_table.hash2[hash2]; + hslot = &udptable->hash2[hash2]; offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node); } diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index f6eb1ab34f4b..e78bdc76dcc3 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -26,7 +26,7 @@ int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len); -int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); +int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); void udpv6_destroy_sock(struct sock *sk); #ifdef CONFIG_PROC_FS diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 47d0d2b87106..2f5101a12283 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -45,7 +45,7 @@ struct proto udplitev6_prot = { .getsockopt = udpv6_getsockopt, .sendmsg = udpv6_sendmsg, .recvmsg = udpv6_recvmsg, - .backlog_rcv = udpv6_queue_rcv_skb, + .backlog_rcv = __udpv6_queue_rcv_skb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, .get_port = udp_v6_get_port, diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index fce25afb652a..8938b6ba57a0 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -61,7 +61,8 @@ static struct sock *__l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif if ((l2tp->conn_id == tunnel_id) && net_eq(sock_net(sk), net) && !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) && - !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) + (!sk->sk_bound_dev_if || !dif || + sk->sk_bound_dev_if == dif)) goto found; } @@ -182,15 +183,17 @@ pass_up: struct iphdr *iph = (struct iphdr *) skb_network_header(skb); read_lock_bh(&l2tp_ip_lock); - sk = __l2tp_ip_bind_lookup(net, iph->daddr, 0, tunnel_id); + sk = __l2tp_ip_bind_lookup(net, iph->daddr, inet_iif(skb), + tunnel_id); + if (!sk) { + read_unlock_bh(&l2tp_ip_lock); + goto discard; + } + + sock_hold(sk); read_unlock_bh(&l2tp_ip_lock); } - if (sk == NULL) - goto discard; - - sock_hold(sk); - if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_put; @@ -251,22 +254,17 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) int ret; int chk_addr_ret; - if (!sock_flag(sk, SOCK_ZAPPED)) - return -EINVAL; if (addr_len < sizeof(struct sockaddr_l2tpip)) return -EINVAL; if (addr->l2tp_family != AF_INET) return -EINVAL; - ret = -EADDRINUSE; - read_lock_bh(&l2tp_ip_lock); - if (__l2tp_ip_bind_lookup(net, addr->l2tp_addr.s_addr, - sk->sk_bound_dev_if, addr->l2tp_conn_id)) - goto out_in_use; + lock_sock(sk); - read_unlock_bh(&l2tp_ip_lock); + ret = -EINVAL; + if (!sock_flag(sk, SOCK_ZAPPED)) + goto out; - lock_sock(sk); if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_l2tpip)) goto out; @@ -280,14 +278,22 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_rcv_saddr = inet->inet_saddr = addr->l2tp_addr.s_addr; if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) inet->inet_saddr = 0; /* Use device */ - sk_dst_reset(sk); + write_lock_bh(&l2tp_ip_lock); + if (__l2tp_ip_bind_lookup(net, addr->l2tp_addr.s_addr, + sk->sk_bound_dev_if, addr->l2tp_conn_id)) { + write_unlock_bh(&l2tp_ip_lock); + ret = -EADDRINUSE; + goto out; + } + + sk_dst_reset(sk); l2tp_ip_sk(sk)->conn_id = addr->l2tp_conn_id; - write_lock_bh(&l2tp_ip_lock); sk_add_bind_node(sk, &l2tp_ip_bind_table); sk_del_node_init(sk); write_unlock_bh(&l2tp_ip_lock); + ret = 0; sock_reset_flag(sk, SOCK_ZAPPED); @@ -295,11 +301,6 @@ out: release_sock(sk); return ret; - -out_in_use: - read_unlock_bh(&l2tp_ip_lock); - - return ret; } static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) @@ -307,21 +308,24 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *) uaddr; int rc; - if (sock_flag(sk, SOCK_ZAPPED)) /* Must bind first - autobinding does not work */ - return -EINVAL; - if (addr_len < sizeof(*lsa)) return -EINVAL; if (ipv4_is_multicast(lsa->l2tp_addr.s_addr)) return -EINVAL; - rc = ip4_datagram_connect(sk, uaddr, addr_len); - if (rc < 0) - return rc; - lock_sock(sk); + /* Must bind first - autobinding does not work */ + if (sock_flag(sk, SOCK_ZAPPED)) { + rc = -EINVAL; + goto out_sk; + } + + rc = __ip4_datagram_connect(sk, uaddr, addr_len); + if (rc < 0) + goto out_sk; + l2tp_ip_sk(sk)->peer_conn_id = lsa->l2tp_conn_id; write_lock_bh(&l2tp_ip_lock); @@ -329,7 +333,9 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len sk_add_bind_node(sk, &l2tp_ip_bind_table); write_unlock_bh(&l2tp_ip_lock); +out_sk: release_sock(sk); + return rc; } diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index ad3468c32b53..aa821cb639e5 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -72,8 +72,9 @@ static struct sock *__l2tp_ip6_bind_lookup(struct net *net, if ((l2tp->conn_id == tunnel_id) && net_eq(sock_net(sk), net) && - !(addr && ipv6_addr_equal(addr, laddr)) && - !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) + (!addr || ipv6_addr_equal(addr, laddr)) && + (!sk->sk_bound_dev_if || !dif || + sk->sk_bound_dev_if == dif)) goto found; } @@ -196,16 +197,17 @@ pass_up: struct ipv6hdr *iph = ipv6_hdr(skb); read_lock_bh(&l2tp_ip6_lock); - sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, - 0, tunnel_id); + sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, inet6_iif(skb), + tunnel_id); + if (!sk) { + read_unlock_bh(&l2tp_ip6_lock); + goto discard; + } + + sock_hold(sk); read_unlock_bh(&l2tp_ip6_lock); } - if (sk == NULL) - goto discard; - - sock_hold(sk); - if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_put; @@ -266,11 +268,10 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) struct sockaddr_l2tpip6 *addr = (struct sockaddr_l2tpip6 *) uaddr; struct net *net = sock_net(sk); __be32 v4addr = 0; + int bound_dev_if; int addr_type; int err; - if (!sock_flag(sk, SOCK_ZAPPED)) - return -EINVAL; if (addr->l2tp_family != AF_INET6) return -EINVAL; if (addr_len < sizeof(*addr)) @@ -286,41 +287,34 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (addr_type & IPV6_ADDR_MULTICAST) return -EADDRNOTAVAIL; - err = -EADDRINUSE; - read_lock_bh(&l2tp_ip6_lock); - if (__l2tp_ip6_bind_lookup(net, &addr->l2tp_addr, - sk->sk_bound_dev_if, addr->l2tp_conn_id)) - goto out_in_use; - read_unlock_bh(&l2tp_ip6_lock); - lock_sock(sk); err = -EINVAL; + if (!sock_flag(sk, SOCK_ZAPPED)) + goto out_unlock; + if (sk->sk_state != TCP_CLOSE) goto out_unlock; + bound_dev_if = sk->sk_bound_dev_if; + /* Check if the address belongs to the host. */ rcu_read_lock(); if (addr_type != IPV6_ADDR_ANY) { struct net_device *dev = NULL; if (addr_type & IPV6_ADDR_LINKLOCAL) { - if (addr_len >= sizeof(struct sockaddr_in6) && - addr->l2tp_scope_id) { - /* Override any existing binding, if another - * one is supplied by user. - */ - sk->sk_bound_dev_if = addr->l2tp_scope_id; - } + if (addr->l2tp_scope_id) + bound_dev_if = addr->l2tp_scope_id; /* Binding to link-local address requires an - interface */ - if (!sk->sk_bound_dev_if) + * interface. + */ + if (!bound_dev_if) goto out_unlock_rcu; err = -ENODEV; - dev = dev_get_by_index_rcu(sock_net(sk), - sk->sk_bound_dev_if); + dev = dev_get_by_index_rcu(sock_net(sk), bound_dev_if); if (!dev) goto out_unlock_rcu; } @@ -335,13 +329,22 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) } rcu_read_unlock(); - inet->inet_rcv_saddr = inet->inet_saddr = v4addr; + write_lock_bh(&l2tp_ip6_lock); + if (__l2tp_ip6_bind_lookup(net, &addr->l2tp_addr, bound_dev_if, + addr->l2tp_conn_id)) { + write_unlock_bh(&l2tp_ip6_lock); + err = -EADDRINUSE; + goto out_unlock; + } + + inet->inet_saddr = v4addr; + inet->inet_rcv_saddr = v4addr; + sk->sk_bound_dev_if = bound_dev_if; sk->sk_v6_rcv_saddr = addr->l2tp_addr; np->saddr = addr->l2tp_addr; l2tp_ip6_sk(sk)->conn_id = addr->l2tp_conn_id; - write_lock_bh(&l2tp_ip6_lock); sk_add_bind_node(sk, &l2tp_ip6_bind_table); sk_del_node_init(sk); write_unlock_bh(&l2tp_ip6_lock); @@ -354,10 +357,7 @@ out_unlock_rcu: rcu_read_unlock(); out_unlock: release_sock(sk); - return err; -out_in_use: - read_unlock_bh(&l2tp_ip6_lock); return err; } @@ -370,9 +370,6 @@ static int l2tp_ip6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_type; int rc; - if (sock_flag(sk, SOCK_ZAPPED)) /* Must bind first - autobinding does not work */ - return -EINVAL; - if (addr_len < sizeof(*lsa)) return -EINVAL; @@ -389,10 +386,18 @@ static int l2tp_ip6_connect(struct sock *sk, struct sockaddr *uaddr, return -EINVAL; } - rc = ip6_datagram_connect(sk, uaddr, addr_len); - lock_sock(sk); + /* Must bind first - autobinding does not work */ + if (sock_flag(sk, SOCK_ZAPPED)) { + rc = -EINVAL; + goto out_sk; + } + + rc = __ip6_datagram_connect(sk, uaddr, addr_len); + if (rc < 0) + goto out_sk; + l2tp_ip6_sk(sk)->peer_conn_id = lsa->l2tp_conn_id; write_lock_bh(&l2tp_ip6_lock); @@ -400,6 +405,7 @@ static int l2tp_ip6_connect(struct sock *sk, struct sockaddr *uaddr, sk_add_bind_node(sk, &l2tp_ip6_bind_table); write_unlock_bh(&l2tp_ip6_lock); +out_sk: release_sock(sk); return rc; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 78e9ecbc96e6..8e05032689f0 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -688,7 +688,7 @@ static void __sta_info_recalc_tim(struct sta_info *sta, bool ignore_pending) } /* No need to do anything if the driver does all */ - if (!local->ops->set_tim) + if (ieee80211_hw_check(&local->hw, AP_LINK_PS)) return; if (sta->dead) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 1c56abc49627..bd5f4be89435 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1501,7 +1501,6 @@ static bool ieee80211_queue_skb(struct ieee80211_local *local, struct sta_info *sta, struct sk_buff *skb) { - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct fq *fq = &local->fq; struct ieee80211_vif *vif; struct txq_info *txqi; @@ -1526,8 +1525,6 @@ static bool ieee80211_queue_skb(struct ieee80211_local *local, if (!txqi) return false; - info->control.vif = vif; - spin_lock_bh(&fq->lock); ieee80211_txq_enqueue(local, txqi, skb); spin_unlock_bh(&fq->lock); @@ -3213,7 +3210,6 @@ static void ieee80211_xmit_fast_finish(struct ieee80211_sub_if_data *sdata, if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) { tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; - *ieee80211_get_qos_ctl(hdr) = tid; hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid); } else { info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ; @@ -3338,6 +3334,11 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, (tid_tx ? IEEE80211_TX_CTL_AMPDU : 0); info->control.flags = IEEE80211_TX_CTRL_FAST_XMIT; + if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) { + tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; + *ieee80211_get_qos_ctl(hdr) = tid; + } + __skb_queue_head_init(&tx.skbs); tx.flags = IEEE80211_TX_UNICAST; @@ -3426,6 +3427,11 @@ begin: goto begin; } + if (test_bit(IEEE80211_TXQ_AMPDU, &txqi->flags)) + info->flags |= IEEE80211_TX_CTL_AMPDU; + else + info->flags &= ~IEEE80211_TX_CTL_AMPDU; + if (info->control.flags & IEEE80211_TX_CTRL_FAST_XMIT) { struct sta_info *sta = container_of(txq->sta, struct sta_info, sta); diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index ee715764a828..6832bf6ab69f 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -270,6 +270,22 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, vht_cap->vht_mcs.tx_mcs_map |= cpu_to_le16(peer_tx << i * 2); } + /* + * This is a workaround for VHT-enabled STAs which break the spec + * and have the VHT-MCS Rx map filled in with value 3 for all eight + * spacial streams, an example is AR9462. + * + * As per spec, in section 22.1.1 Introduction to the VHT PHY + * A VHT STA shall support at least single spactial stream VHT-MCSs + * 0 to 7 (transmit and receive) in all supported channel widths. + */ + if (vht_cap->vht_mcs.rx_mcs_map == cpu_to_le16(0xFFFF)) { + vht_cap->vht_supported = false; + sdata_info(sdata, "Ignoring VHT IE from %pM due to invalid rx_mcs_map\n", + sta->addr); + return; + } + /* finally set up the bandwidth */ switch (vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) { case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ: diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index c3c809b2e712..a6e44ef2ec9a 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2845,7 +2845,7 @@ static struct genl_family ip_vs_genl_family = { .hdrsize = 0, .name = IPVS_GENL_NAME, .version = IPVS_GENL_VERSION, - .maxattr = IPVS_CMD_MAX, + .maxattr = IPVS_CMD_ATTR_MAX, .netnsok = true, /* Make ipvsadm to work on netns */ }; diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 1b07578bedf3..9350530c16c1 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -283,6 +283,7 @@ struct ip_vs_sync_buff { */ static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho) { + memset(ho, 0, sizeof(*ho)); ho->init_seq = get_unaligned_be32(&no->init_seq); ho->delta = get_unaligned_be32(&no->delta); ho->previous_delta = get_unaligned_be32(&no->previous_delta); @@ -917,8 +918,10 @@ static void ip_vs_proc_conn(struct netns_ipvs *ipvs, struct ip_vs_conn_param *pa kfree(param->pe_data); } - if (opt) - memcpy(&cp->in_seq, opt, sizeof(*opt)); + if (opt) { + cp->in_seq = opt->in_seq; + cp->out_seq = opt->out_seq; + } atomic_set(&cp->in_pkts, sysctl_sync_threshold(ipvs)); cp->state = state; cp->old_state = cp->state; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index df2f5a3901df..0f87e5d21be7 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -76,6 +76,7 @@ struct conntrack_gc_work { struct delayed_work dwork; u32 last_bucket; bool exiting; + long next_gc_run; }; static __read_mostly struct kmem_cache *nf_conntrack_cachep; @@ -83,9 +84,11 @@ static __read_mostly spinlock_t nf_conntrack_locks_all_lock; static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock); static __read_mostly bool nf_conntrack_locks_all; +/* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */ #define GC_MAX_BUCKETS_DIV 64u -#define GC_MAX_BUCKETS 8192u -#define GC_INTERVAL (5 * HZ) +/* upper bound of scan intervals */ +#define GC_INTERVAL_MAX (2 * HZ) +/* maximum conntracks to evict per gc run */ #define GC_MAX_EVICTS 256u static struct conntrack_gc_work conntrack_gc_work; @@ -936,13 +939,13 @@ static noinline int early_drop(struct net *net, unsigned int _hash) static void gc_worker(struct work_struct *work) { unsigned int i, goal, buckets = 0, expired_count = 0; - unsigned long next_run = GC_INTERVAL; - unsigned int ratio, scanned = 0; struct conntrack_gc_work *gc_work; + unsigned int ratio, scanned = 0; + unsigned long next_run; gc_work = container_of(work, struct conntrack_gc_work, dwork.work); - goal = min(nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV, GC_MAX_BUCKETS); + goal = nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV; i = gc_work->last_bucket; do { @@ -982,17 +985,47 @@ static void gc_worker(struct work_struct *work) if (gc_work->exiting) return; + /* + * Eviction will normally happen from the packet path, and not + * from this gc worker. + * + * This worker is only here to reap expired entries when system went + * idle after a busy period. + * + * The heuristics below are supposed to balance conflicting goals: + * + * 1. Minimize time until we notice a stale entry + * 2. Maximize scan intervals to not waste cycles + * + * Normally, expired_count will be 0, this increases the next_run time + * to priorize 2) above. + * + * As soon as a timed-out entry is found, move towards 1) and increase + * the scan frequency. + * In case we have lots of evictions next scan is done immediately. + */ ratio = scanned ? expired_count * 100 / scanned : 0; - if (ratio >= 90 || expired_count == GC_MAX_EVICTS) + if (ratio >= 90 || expired_count == GC_MAX_EVICTS) { + gc_work->next_gc_run = 0; next_run = 0; + } else if (expired_count) { + gc_work->next_gc_run /= 2U; + next_run = msecs_to_jiffies(1); + } else { + if (gc_work->next_gc_run < GC_INTERVAL_MAX) + gc_work->next_gc_run += msecs_to_jiffies(1); + + next_run = gc_work->next_gc_run; + } gc_work->last_bucket = i; - schedule_delayed_work(&gc_work->dwork, next_run); + queue_delayed_work(system_long_wq, &gc_work->dwork, next_run); } static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work) { INIT_DELAYED_WORK(&gc_work->dwork, gc_worker); + gc_work->next_gc_run = GC_INTERVAL_MAX; gc_work->exiting = false; } @@ -1885,7 +1918,7 @@ int nf_conntrack_init_start(void) nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED); conntrack_gc_work_init(&conntrack_gc_work); - schedule_delayed_work(&conntrack_gc_work.dwork, GC_INTERVAL); + queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, GC_INTERVAL_MAX); return 0; diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 336e21559e01..7341adf7059d 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -138,9 +138,14 @@ __nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum) for (i = 0; i < nf_ct_helper_hsize; i++) { hlist_for_each_entry_rcu(h, &nf_ct_helper_hash[i], hnode) { - if (!strcmp(h->name, name) && - h->tuple.src.l3num == l3num && - h->tuple.dst.protonum == protonum) + if (strcmp(h->name, name)) + continue; + + if (h->tuple.src.l3num != NFPROTO_UNSPEC && + h->tuple.src.l3num != l3num) + continue; + + if (h->tuple.dst.protonum == protonum) return h; } } diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 621b81c7bddc..c3fc14e021ec 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1436,9 +1436,12 @@ static int process_sip_request(struct sk_buff *skb, unsigned int protoff, handler = &sip_handlers[i]; if (handler->request == NULL) continue; - if (*datalen < handler->len || + if (*datalen < handler->len + 2 || strncasecmp(*dptr, handler->method, handler->len)) continue; + if ((*dptr)[handler->len] != ' ' || + !isalpha((*dptr)[handler->len+1])) + continue; if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CSEQ, &matchoff, &matchlen) <= 0) { diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index bbb8f3df79f7..5b9c884a452e 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -42,7 +42,7 @@ struct nf_nat_conn_key { const struct nf_conntrack_zone *zone; }; -static struct rhashtable nf_nat_bysource_table; +static struct rhltable nf_nat_bysource_table; inline const struct nf_nat_l3proto * __nf_nat_l3proto_find(u8 family) @@ -193,9 +193,12 @@ static int nf_nat_bysource_cmp(struct rhashtable_compare_arg *arg, const struct nf_nat_conn_key *key = arg->key; const struct nf_conn *ct = obj; - return same_src(ct, key->tuple) && - net_eq(nf_ct_net(ct), key->net) && - nf_ct_zone_equal(ct, key->zone, IP_CT_DIR_ORIGINAL); + if (!same_src(ct, key->tuple) || + !net_eq(nf_ct_net(ct), key->net) || + !nf_ct_zone_equal(ct, key->zone, IP_CT_DIR_ORIGINAL)) + return 1; + + return 0; } static struct rhashtable_params nf_nat_bysource_params = { @@ -204,7 +207,6 @@ static struct rhashtable_params nf_nat_bysource_params = { .obj_cmpfn = nf_nat_bysource_cmp, .nelem_hint = 256, .min_size = 1024, - .nulls_base = (1U << RHT_BASE_SHIFT), }; /* Only called for SRC manip */ @@ -223,12 +225,15 @@ find_appropriate_src(struct net *net, .tuple = tuple, .zone = zone }; + struct rhlist_head *hl; - ct = rhashtable_lookup_fast(&nf_nat_bysource_table, &key, - nf_nat_bysource_params); - if (!ct) + hl = rhltable_lookup(&nf_nat_bysource_table, &key, + nf_nat_bysource_params); + if (!hl) return 0; + ct = container_of(hl, typeof(*ct), nat_bysource); + nf_ct_invert_tuplepr(result, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); result->dst = tuple->dst; @@ -446,11 +451,17 @@ nf_nat_setup_info(struct nf_conn *ct, } if (maniptype == NF_NAT_MANIP_SRC) { + struct nf_nat_conn_key key = { + .net = nf_ct_net(ct), + .tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + .zone = nf_ct_zone(ct), + }; int err; - err = rhashtable_insert_fast(&nf_nat_bysource_table, - &ct->nat_bysource, - nf_nat_bysource_params); + err = rhltable_insert_key(&nf_nat_bysource_table, + &key, + &ct->nat_bysource, + nf_nat_bysource_params); if (err) return NF_DROP; } @@ -567,8 +578,8 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data) * will delete entry from already-freed table. */ ct->status &= ~IPS_NAT_DONE_MASK; - rhashtable_remove_fast(&nf_nat_bysource_table, &ct->nat_bysource, - nf_nat_bysource_params); + rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource, + nf_nat_bysource_params); /* don't delete conntrack. Although that would make things a lot * simpler, we'd end up flushing all conntracks on nat rmmod. @@ -698,8 +709,8 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct) if (!nat) return; - rhashtable_remove_fast(&nf_nat_bysource_table, &ct->nat_bysource, - nf_nat_bysource_params); + rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource, + nf_nat_bysource_params); } static struct nf_ct_ext_type nat_extend __read_mostly = { @@ -834,13 +845,13 @@ static int __init nf_nat_init(void) { int ret; - ret = rhashtable_init(&nf_nat_bysource_table, &nf_nat_bysource_params); + ret = rhltable_init(&nf_nat_bysource_table, &nf_nat_bysource_params); if (ret) return ret; ret = nf_ct_extend_register(&nat_extend); if (ret < 0) { - rhashtable_destroy(&nf_nat_bysource_table); + rhltable_destroy(&nf_nat_bysource_table); printk(KERN_ERR "nf_nat_core: Unable to register extension\n"); return ret; } @@ -864,7 +875,7 @@ static int __init nf_nat_init(void) return 0; cleanup_extend: - rhashtable_destroy(&nf_nat_bysource_table); + rhltable_destroy(&nf_nat_bysource_table); nf_ct_extend_unregister(&nat_extend); return ret; } @@ -883,7 +894,7 @@ static void __exit nf_nat_cleanup(void) for (i = 0; i < NFPROTO_NUMPROTO; i++) kfree(nf_nat_l4protos[i]); - rhashtable_destroy(&nf_nat_bysource_table); + rhltable_destroy(&nf_nat_bysource_table); } MODULE_LICENSE("GPL"); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 24db22257586..e5194f6f906c 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2570,7 +2570,8 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, } if (set->timeout && - nla_put_be64(skb, NFTA_SET_TIMEOUT, cpu_to_be64(set->timeout), + nla_put_be64(skb, NFTA_SET_TIMEOUT, + cpu_to_be64(jiffies_to_msecs(set->timeout)), NFTA_SET_PAD)) goto nla_put_failure; if (set->gc_int && @@ -2859,7 +2860,8 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, if (nla[NFTA_SET_TIMEOUT] != NULL) { if (!(flags & NFT_SET_TIMEOUT)) return -EINVAL; - timeout = be64_to_cpu(nla_get_be64(nla[NFTA_SET_TIMEOUT])); + timeout = msecs_to_jiffies(be64_to_cpu(nla_get_be64( + nla[NFTA_SET_TIMEOUT]))); } gc_int = 0; if (nla[NFTA_SET_GC_INTERVAL] != NULL) { @@ -2956,12 +2958,14 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set); if (err < 0) - goto err2; + goto err3; list_add_tail_rcu(&set->list, &table->sets); table->use++; return 0; +err3: + ops->destroy(set); err2: kfree(set); err1: @@ -3176,7 +3180,8 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) && nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT, - cpu_to_be64(*nft_set_ext_timeout(ext)), + cpu_to_be64(jiffies_to_msecs( + *nft_set_ext_timeout(ext))), NFTA_SET_ELEM_PAD)) goto nla_put_failure; @@ -3445,21 +3450,22 @@ void *nft_set_elem_init(const struct nft_set *set, memcpy(nft_set_ext_data(ext), data, set->dlen); if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) *nft_set_ext_expiration(ext) = - jiffies + msecs_to_jiffies(timeout); + jiffies + timeout; if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT)) *nft_set_ext_timeout(ext) = timeout; return elem; } -void nft_set_elem_destroy(const struct nft_set *set, void *elem) +void nft_set_elem_destroy(const struct nft_set *set, void *elem, + bool destroy_expr) { struct nft_set_ext *ext = nft_set_elem_ext(set, elem); nft_data_uninit(nft_set_ext_key(ext), NFT_DATA_VALUE); if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) nft_data_uninit(nft_set_ext_data(ext), set->dtype); - if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) + if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext)); kfree(elem); @@ -3532,7 +3538,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (nla[NFTA_SET_ELEM_TIMEOUT] != NULL) { if (!(set->flags & NFT_SET_TIMEOUT)) return -EINVAL; - timeout = be64_to_cpu(nla_get_be64(nla[NFTA_SET_ELEM_TIMEOUT])); + timeout = msecs_to_jiffies(be64_to_cpu(nla_get_be64( + nla[NFTA_SET_ELEM_TIMEOUT]))); } else if (set->flags & NFT_SET_TIMEOUT) { timeout = set->timeout; } @@ -3565,6 +3572,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, dreg = nft_type_to_reg(set->dtype); list_for_each_entry(binding, &set->bindings, list) { struct nft_ctx bind_ctx = { + .net = ctx->net, .afi = ctx->afi, .table = ctx->table, .chain = (struct nft_chain *)binding->chain, @@ -3812,7 +3820,7 @@ void nft_set_gc_batch_release(struct rcu_head *rcu) gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu); for (i = 0; i < gcb->head.cnt; i++) - nft_set_elem_destroy(gcb->head.set, gcb->elems[i]); + nft_set_elem_destroy(gcb->head.set, gcb->elems[i], true); kfree(gcb); } EXPORT_SYMBOL_GPL(nft_set_gc_batch_release); @@ -4030,7 +4038,7 @@ static void nf_tables_commit_release(struct nft_trans *trans) break; case NFT_MSG_DELSETELEM: nft_set_elem_destroy(nft_trans_elem_set(trans), - nft_trans_elem(trans).priv); + nft_trans_elem(trans).priv, true); break; } kfree(trans); @@ -4171,7 +4179,7 @@ static void nf_tables_abort_release(struct nft_trans *trans) break; case NFT_MSG_NEWSETELEM: nft_set_elem_destroy(nft_trans_elem_set(trans), - nft_trans_elem(trans).priv); + nft_trans_elem(trans).priv, true); break; } kfree(trans); @@ -4421,7 +4429,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, * Otherwise a 0 is returned and the attribute value is stored in the * destination variable. */ -unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest) +int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest) { u32 val; diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 517f08767a3c..31ca94793aa9 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -44,18 +44,22 @@ static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr, ®s->data[priv->sreg_key], ®s->data[priv->sreg_data], timeout, GFP_ATOMIC); - if (elem == NULL) { - if (set->size) - atomic_dec(&set->nelems); - return NULL; - } + if (elem == NULL) + goto err1; ext = nft_set_elem_ext(set, elem); if (priv->expr != NULL && nft_expr_clone(nft_set_ext_expr(ext), priv->expr) < 0) - return NULL; + goto err2; return elem; + +err2: + nft_set_elem_destroy(set, elem, false); +err1: + if (set->size) + atomic_dec(&set->nelems); + return NULL; } static void nft_dynset_eval(const struct nft_expr *expr, @@ -139,6 +143,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx, return PTR_ERR(set); } + if (set->ops->update == NULL) + return -EOPNOTSUPP; + if (set->flags & NFT_SET_CONSTANT) return -EBUSY; diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index baf694de3935..d5447a22275c 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -53,6 +53,7 @@ static int nft_hash_init(const struct nft_ctx *ctx, { struct nft_hash *priv = nft_expr_priv(expr); u32 len; + int err; if (!tb[NFTA_HASH_SREG] || !tb[NFTA_HASH_DREG] || @@ -67,8 +68,10 @@ static int nft_hash_init(const struct nft_ctx *ctx, priv->sreg = nft_parse_register(tb[NFTA_HASH_SREG]); priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]); - len = ntohl(nla_get_be32(tb[NFTA_HASH_LEN])); - if (len == 0 || len > U8_MAX) + err = nft_parse_u32_check(tb[NFTA_HASH_LEN], U8_MAX, &len); + if (err < 0) + return err; + if (len == 0) return -ERANGE; priv->len = len; diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c index fbc88009ca2e..8f0aaaea1376 100644 --- a/net/netfilter/nft_range.c +++ b/net/netfilter/nft_range.c @@ -59,6 +59,12 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr int err; u32 op; + if (!tb[NFTA_RANGE_SREG] || + !tb[NFTA_RANGE_OP] || + !tb[NFTA_RANGE_FROM_DATA] || + !tb[NFTA_RANGE_TO_DATA]) + return -EINVAL; + err = nft_data_init(NULL, &priv->data_from, sizeof(priv->data_from), &desc_from, tb[NFTA_RANGE_FROM_DATA]); if (err < 0) diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 3794cb2fc788..a3dface3e6e6 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -98,7 +98,7 @@ static bool nft_hash_update(struct nft_set *set, const u32 *key, const struct nft_set_ext **ext) { struct nft_hash *priv = nft_set_priv(set); - struct nft_hash_elem *he; + struct nft_hash_elem *he, *prev; struct nft_hash_cmp_arg arg = { .genmask = NFT_GENMASK_ANY, .set = set, @@ -112,15 +112,24 @@ static bool nft_hash_update(struct nft_set *set, const u32 *key, he = new(set, expr, regs); if (he == NULL) goto err1; - if (rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node, - nft_hash_params)) + + prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node, + nft_hash_params); + if (IS_ERR(prev)) goto err2; + + /* Another cpu may race to insert the element with the same key */ + if (prev) { + nft_set_elem_destroy(set, he, true); + he = prev; + } + out: *ext = &he->ext; return true; err2: - nft_set_elem_destroy(set, he); + nft_set_elem_destroy(set, he, true); err1: return false; } @@ -332,7 +341,7 @@ static int nft_hash_init(const struct nft_set *set, static void nft_hash_elem_destroy(void *ptr, void *arg) { - nft_set_elem_destroy((const struct nft_set *)arg, ptr); + nft_set_elem_destroy((const struct nft_set *)arg, ptr, true); } static void nft_hash_destroy(const struct nft_set *set) diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 38b5bda242f8..36493a7cae88 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -266,7 +266,7 @@ static void nft_rbtree_destroy(const struct nft_set *set) while ((node = priv->root.rb_node) != NULL) { rb_erase(node, &priv->root); rbe = rb_entry(node, struct nft_rbtree_elem, node); - nft_set_elem_destroy(set, rbe); + nft_set_elem_destroy(set, rbe, true); } } diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index 69f78e96fdb4..b83e158e116a 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -44,7 +44,7 @@ connmark_tg(struct sk_buff *skb, const struct xt_action_param *par) u_int32_t newmark; ct = nf_ct_get(skb, &ctinfo); - if (ct == NULL) + if (ct == NULL || nf_ct_is_untracked(ct)) return XT_CONTINUE; switch (info->mode) { @@ -97,7 +97,7 @@ connmark_mt(const struct sk_buff *skb, struct xt_action_param *par) const struct nf_conn *ct; ct = nf_ct_get(skb, &ctinfo); - if (ct == NULL) + if (ct == NULL || nf_ct_is_untracked(ct)) return false; return ((ct->mark & info->mask) == info->mark) ^ info->invert; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 62bea4591054..602e5ebe9db3 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -322,14 +322,11 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) sk_mem_charge(sk, skb->truesize); } -static void netlink_sock_destruct(struct sock *sk) +static void __netlink_sock_destruct(struct sock *sk) { struct netlink_sock *nlk = nlk_sk(sk); if (nlk->cb_running) { - if (nlk->cb.done) - nlk->cb.done(&nlk->cb); - module_put(nlk->cb.module); kfree_skb(nlk->cb.skb); } @@ -346,6 +343,28 @@ static void netlink_sock_destruct(struct sock *sk) WARN_ON(nlk_sk(sk)->groups); } +static void netlink_sock_destruct_work(struct work_struct *work) +{ + struct netlink_sock *nlk = container_of(work, struct netlink_sock, + work); + + nlk->cb.done(&nlk->cb); + __netlink_sock_destruct(&nlk->sk); +} + +static void netlink_sock_destruct(struct sock *sk) +{ + struct netlink_sock *nlk = nlk_sk(sk); + + if (nlk->cb_running && nlk->cb.done) { + INIT_WORK(&nlk->work, netlink_sock_destruct_work); + schedule_work(&nlk->work); + return; + } + + __netlink_sock_destruct(sk); +} + /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on * SMP. Look, when several writers sleep and reader wakes them up, all but one * immediately hit write lock and grab all the cpus. Exclusive sleep solves diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index 3cfd6cc60504..4fdb38318977 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -3,6 +3,7 @@ #include <linux/rhashtable.h> #include <linux/atomic.h> +#include <linux/workqueue.h> #include <net/sock.h> #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) @@ -33,6 +34,7 @@ struct netlink_sock { struct rhash_head node; struct rcu_head rcu; + struct work_struct work; }; static inline struct netlink_sock *nlk_sk(struct sock *sk) diff --git a/net/netlink/diag.c b/net/netlink/diag.c index b2f0e986a6f4..a5546249fb10 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -178,11 +178,8 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) } cb->args[1] = i; } else { - if (req->sdiag_protocol >= MAX_LINKS) { - read_unlock(&nl_table_lock); - rcu_read_unlock(); + if (req->sdiag_protocol >= MAX_LINKS) return -ENOENT; - } err = __netlink_diag_dump(skb, cb, req->sdiag_protocol, s_num); } diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 23cc12639ba7..49c28e8ef01b 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -404,7 +404,7 @@ int __genl_register_family(struct genl_family *family) err = genl_validate_assign_mc_groups(family); if (err) - goto errout_locked; + goto errout_free; list_add_tail(&family->family_list, genl_family_chain(family->id)); genl_unlock_all(); @@ -417,6 +417,8 @@ int __genl_register_family(struct genl_family *family) return 0; +errout_free: + kfree(family->attrbuf); errout_locked: genl_unlock_all(); errout: diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 31045ef44a82..fecefa2dc94e 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -370,8 +370,11 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key, skb_orphan(skb); memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); err = nf_ct_frag6_gather(net, skb, user); - if (err) + if (err) { + if (err != -EINPROGRESS) + kfree_skb(skb); return err; + } key->ip.proto = ipv6_hdr(skb)->nexthdr; ovs_cb.mru = IP6CB(skb)->frag_max_size; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index d2238b204691..dd2332390c45 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3648,19 +3648,25 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (optlen != sizeof(val)) return -EINVAL; - if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) - return -EBUSY; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; switch (val) { case TPACKET_V1: case TPACKET_V2: case TPACKET_V3: - po->tp_version = val; - return 0; + break; default: return -EINVAL; } + lock_sock(sk); + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { + ret = -EBUSY; + } else { + po->tp_version = val; + ret = 0; + } + release_sock(sk); + return ret; } case PACKET_RESERVE: { @@ -4164,6 +4170,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, /* Added to avoid minimal code churn */ struct tpacket_req *req = &req_u->req; + lock_sock(sk); /* Opening a Tx-ring is NOT supported in TPACKET_V3 */ if (!closing && tx_ring && (po->tp_version > TPACKET_V2)) { net_warn_ratelimited("Tx-ring is not supported.\n"); @@ -4245,7 +4252,6 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, goto out; } - lock_sock(sk); /* Detach socket from network */ spin_lock(&po->bind_lock); @@ -4294,11 +4300,11 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, if (!tx_ring) prb_shutdown_retire_blk_timer(po, rb_queue); } - release_sock(sk); if (pg_vec) free_pg_vec(pg_vec, order, req->tp_block_nr); out: + release_sock(sk); return err; } diff --git a/net/rds/tcp.c b/net/rds/tcp.c index fcddacc92e01..20e2923dc827 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -659,6 +659,8 @@ out_recv: out_pernet: unregister_pernet_subsys(&rds_tcp_net_ops); out_slab: + if (unregister_netdevice_notifier(&rds_tcp_dev_notifier)) + pr_warn("could not unregister rds_tcp_dev_notifier\n"); kmem_cache_destroy(rds_tcp_conn_slab); out: return ret; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index b54d56d4959b..cf9b2fe8eac6 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -108,6 +108,17 @@ static void tcf_pedit_cleanup(struct tc_action *a, int bind) kfree(keys); } +static bool offset_valid(struct sk_buff *skb, int offset) +{ + if (offset > 0 && offset > skb->len) + return false; + + if (offset < 0 && -offset > skb_headroom(skb)) + return false; + + return true; +} + static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { @@ -134,6 +145,11 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, if (tkey->offmask) { char *d, _d; + if (!offset_valid(skb, off + tkey->at)) { + pr_info("tc filter pedit 'at' offset %d out of bounds\n", + off + tkey->at); + goto bad; + } d = skb_header_pointer(skb, off + tkey->at, 1, &_d); if (!d) @@ -146,10 +162,10 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, " offset must be on 32 bit boundaries\n"); goto bad; } - if (offset > 0 && offset > skb->len) { - pr_info("tc filter pedit" - " offset %d can't exceed pkt length %d\n", - offset, skb->len); + + if (!offset_valid(skb, off + offset)) { + pr_info("tc filter pedit offset %d out of bounds\n", + offset); goto bad; } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 2b2a7974e4bb..b05d4a2155b0 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -430,7 +430,8 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb, if (!skb) return -ENOBUFS; - if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq, 0, event) <= 0) { + if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq, + n->nlmsg_flags, event) <= 0) { kfree_skb(skb); return -EINVAL; } diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index eb219b78cd49..5877f6061b57 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -62,9 +62,6 @@ static unsigned long basic_get(struct tcf_proto *tp, u32 handle) struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f; - if (head == NULL) - return 0UL; - list_for_each_entry(f, &head->flist, link) { if (f->handle == handle) { l = (unsigned long) f; @@ -109,7 +106,6 @@ static bool basic_destroy(struct tcf_proto *tp, bool force) tcf_unbind_filter(tp, &f->res); call_rcu(&f->rcu, basic_delete_filter); } - RCU_INIT_POINTER(tp->root, NULL); kfree_rcu(head, rcu); return true; } diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index bb1d5a487081..0a47ba5e6109 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -292,7 +292,6 @@ static bool cls_bpf_destroy(struct tcf_proto *tp, bool force) call_rcu(&prog->rcu, __cls_bpf_delete_prog); } - RCU_INIT_POINTER(tp->root, NULL); kfree_rcu(head, rcu); return true; } @@ -303,9 +302,6 @@ static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle) struct cls_bpf_prog *prog; unsigned long ret = 0UL; - if (head == NULL) - return 0UL; - list_for_each_entry(prog, &head->plist, link) { if (prog->handle == handle) { ret = (unsigned long) prog; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 85233c470035..c1f20077837f 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -137,11 +137,10 @@ static bool cls_cgroup_destroy(struct tcf_proto *tp, bool force) if (!force) return false; - - if (head) { - RCU_INIT_POINTER(tp->root, NULL); + /* Head can still be NULL due to cls_cgroup_init(). */ + if (head) call_rcu(&head->rcu, cls_cgroup_destroy_rcu); - } + return true; } diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index e39672394c7b..6575aba87630 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -596,7 +596,6 @@ static bool flow_destroy(struct tcf_proto *tp, bool force) list_del_rcu(&f->list); call_rcu(&f->rcu, flow_destroy_filter); } - RCU_INIT_POINTER(tp->root, NULL); kfree_rcu(head, rcu); return true; } diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index f6f40fba599b..904442421db3 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -13,6 +13,7 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/rhashtable.h> +#include <linux/workqueue.h> #include <linux/if_ether.h> #include <linux/in6.h> @@ -64,7 +65,10 @@ struct cls_fl_head { bool mask_assigned; struct list_head filters; struct rhashtable_params ht_params; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; struct cls_fl_filter { @@ -269,6 +273,24 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc); } +static void fl_destroy_sleepable(struct work_struct *work) +{ + struct cls_fl_head *head = container_of(work, struct cls_fl_head, + work); + if (head->mask_assigned) + rhashtable_destroy(&head->ht); + kfree(head); + module_put(THIS_MODULE); +} + +static void fl_destroy_rcu(struct rcu_head *rcu) +{ + struct cls_fl_head *head = container_of(rcu, struct cls_fl_head, rcu); + + INIT_WORK(&head->work, fl_destroy_sleepable); + schedule_work(&head->work); +} + static bool fl_destroy(struct tcf_proto *tp, bool force) { struct cls_fl_head *head = rtnl_dereference(tp->root); @@ -282,10 +304,9 @@ static bool fl_destroy(struct tcf_proto *tp, bool force) list_del_rcu(&f->list); call_rcu(&f->rcu, fl_destroy_filter); } - RCU_INIT_POINTER(tp->root, NULL); - if (head->mask_assigned) - rhashtable_destroy(&head->ht); - kfree_rcu(head, rcu); + + __module_get(THIS_MODULE); + call_rcu(&head->rcu, fl_destroy_rcu); return true; } @@ -711,8 +732,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, goto errout; if (fold) { - rhashtable_remove_fast(&head->ht, &fold->ht_node, - head->ht_params); + if (!tc_skip_sw(fold->flags)) + rhashtable_remove_fast(&head->ht, &fold->ht_node, + head->ht_params); fl_hw_destroy_filter(tp, (unsigned long)fold); } @@ -739,8 +761,9 @@ static int fl_delete(struct tcf_proto *tp, unsigned long arg) struct cls_fl_head *head = rtnl_dereference(tp->root); struct cls_fl_filter *f = (struct cls_fl_filter *) arg; - rhashtable_remove_fast(&head->ht, &f->ht_node, - head->ht_params); + if (!tc_skip_sw(f->flags)) + rhashtable_remove_fast(&head->ht, &f->ht_node, + head->ht_params); list_del_rcu(&f->list); fl_hw_destroy_filter(tp, (unsigned long)f); tcf_unbind_filter(tp, &f->res); diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 25927b6c4436..f935429bd5ef 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -114,7 +114,6 @@ static bool mall_destroy(struct tcf_proto *tp, bool force) call_rcu(&f->rcu, mall_destroy_filter); } - RCU_INIT_POINTER(tp->root, NULL); kfree_rcu(head, rcu); return true; } diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 4f05a19fb073..322438fb3ffc 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -152,7 +152,8 @@ static int rsvp_classify(struct sk_buff *skb, const struct tcf_proto *tp, return -1; nhptr = ip_hdr(skb); #endif - + if (unlikely(!head)) + return -1; restart: #if RSVP_DST_LEN == 4 diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 96144bdf30db..0751245a6ace 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -543,7 +543,6 @@ static bool tcindex_destroy(struct tcf_proto *tp, bool force) walker.fn = tcindex_destroy_element; tcindex_walk(tp, &walker); - RCU_INIT_POINTER(tp->root, NULL); call_rcu(&p->rcu, __tcindex_destroy); return true; } diff --git a/net/sctp/input.c b/net/sctp/input.c index a2ea1d1cc06a..a01a56ec8b8c 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -181,9 +181,10 @@ int sctp_rcv(struct sk_buff *skb) * bound to another interface, via SO_BINDTODEVICE, treat it as OOTB */ if (sk->sk_bound_dev_if && (sk->sk_bound_dev_if != af->skb_iif(skb))) { - if (asoc) { - sctp_association_put(asoc); + if (transport) { + sctp_transport_put(transport); asoc = NULL; + transport = NULL; } else { sctp_endpoint_put(ep); ep = NULL; @@ -269,8 +270,8 @@ int sctp_rcv(struct sk_buff *skb) bh_unlock_sock(sk); /* Release the asoc/ep ref we took in the lookup calls. */ - if (asoc) - sctp_association_put(asoc); + if (transport) + sctp_transport_put(transport); else sctp_endpoint_put(ep); @@ -283,8 +284,8 @@ discard_it: discard_release: /* Release the asoc/ep ref we took in the lookup calls. */ - if (asoc) - sctp_association_put(asoc); + if (transport) + sctp_transport_put(transport); else sctp_endpoint_put(ep); @@ -300,6 +301,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) { struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk; struct sctp_inq *inqueue = &chunk->rcvr->inqueue; + struct sctp_transport *t = chunk->transport; struct sctp_ep_common *rcvr = NULL; int backloged = 0; @@ -351,7 +353,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) done: /* Release the refs we took in sctp_add_backlog */ if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type) - sctp_association_put(sctp_assoc(rcvr)); + sctp_transport_put(t); else if (SCTP_EP_TYPE_SOCKET == rcvr->type) sctp_endpoint_put(sctp_ep(rcvr)); else @@ -363,6 +365,7 @@ done: static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb) { struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk; + struct sctp_transport *t = chunk->transport; struct sctp_ep_common *rcvr = chunk->rcvr; int ret; @@ -373,7 +376,7 @@ static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb) * from us */ if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type) - sctp_association_hold(sctp_assoc(rcvr)); + sctp_transport_hold(t); else if (SCTP_EP_TYPE_SOCKET == rcvr->type) sctp_endpoint_hold(sctp_ep(rcvr)); else @@ -537,15 +540,15 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb, return sk; out: - sctp_association_put(asoc); + sctp_transport_put(transport); return NULL; } /* Common cleanup code for icmp/icmpv6 error handler. */ -void sctp_err_finish(struct sock *sk, struct sctp_association *asoc) +void sctp_err_finish(struct sock *sk, struct sctp_transport *t) { bh_unlock_sock(sk); - sctp_association_put(asoc); + sctp_transport_put(t); } /* @@ -641,7 +644,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) } out_unlock: - sctp_err_finish(sk, asoc); + sctp_err_finish(sk, transport); } /* @@ -952,11 +955,8 @@ static struct sctp_association *__sctp_lookup_association( goto out; asoc = t->asoc; - sctp_association_hold(asoc); *pt = t; - sctp_transport_put(t); - out: return asoc; } @@ -986,7 +986,7 @@ int sctp_has_association(struct net *net, struct sctp_transport *transport; if ((asoc = sctp_lookup_association(net, laddr, paddr, &transport))) { - sctp_association_put(asoc); + sctp_transport_put(transport); return 1; } @@ -1021,7 +1021,6 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net, struct sctphdr *sh = sctp_hdr(skb); union sctp_params params; sctp_init_chunk_t *init; - struct sctp_transport *transport; struct sctp_af *af; /* @@ -1052,7 +1051,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net, af->from_addr_param(paddr, params.addr, sh->source, 0); - asoc = __sctp_lookup_association(net, laddr, paddr, &transport); + asoc = __sctp_lookup_association(net, laddr, paddr, transportp); if (asoc) return asoc; } diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index f473779e8b1c..176af3080a2b 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -198,7 +198,7 @@ static void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } out_unlock: - sctp_err_finish(sk, asoc); + sctp_err_finish(sk, transport); out: if (likely(idev != NULL)) in6_dev_put(idev); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 9fbb6feb8c27..f23ad913dc7a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1214,9 +1214,12 @@ static int __sctp_connect(struct sock *sk, timeo = sock_sndtimeo(sk, f_flags & O_NONBLOCK); - err = sctp_wait_for_connect(asoc, &timeo); - if ((err == 0 || err == -EINPROGRESS) && assoc_id) + if (assoc_id) *assoc_id = asoc->assoc_id; + err = sctp_wait_for_connect(asoc, &timeo); + /* Note: the asoc may be freed after the return of + * sctp_wait_for_connect. + */ /* Don't free association on exit. */ asoc = NULL; @@ -4282,19 +4285,18 @@ static void sctp_shutdown(struct sock *sk, int how) { struct net *net = sock_net(sk); struct sctp_endpoint *ep; - struct sctp_association *asoc; if (!sctp_style(sk, TCP)) return; - if (how & SEND_SHUTDOWN) { + ep = sctp_sk(sk)->ep; + if (how & SEND_SHUTDOWN && !list_empty(&ep->asocs)) { + struct sctp_association *asoc; + sk->sk_state = SCTP_SS_CLOSING; - ep = sctp_sk(sk)->ep; - if (!list_empty(&ep->asocs)) { - asoc = list_entry(ep->asocs.next, - struct sctp_association, asocs); - sctp_primitive_SHUTDOWN(net, asoc, NULL); - } + asoc = list_entry(ep->asocs.next, + struct sctp_association, asocs); + sctp_primitive_SHUTDOWN(net, asoc, NULL); } } @@ -4480,12 +4482,9 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), if (!transport || !sctp_transport_hold(transport)) goto out; - sctp_association_hold(transport->asoc); - sctp_transport_put(transport); - rcu_read_unlock(); err = cb(transport, p); - sctp_association_put(transport->asoc); + sctp_transport_put(transport); out: return err; diff --git a/net/socket.c b/net/socket.c index 5a9bf5ee2464..73dc69f9681e 100644 --- a/net/socket.c +++ b/net/socket.c @@ -341,8 +341,23 @@ static const struct xattr_handler sockfs_xattr_handler = { .get = sockfs_xattr_get, }; +static int sockfs_security_xattr_set(const struct xattr_handler *handler, + struct dentry *dentry, struct inode *inode, + const char *suffix, const void *value, + size_t size, int flags) +{ + /* Handled by LSM. */ + return -EAGAIN; +} + +static const struct xattr_handler sockfs_security_xattr_handler = { + .prefix = XATTR_SECURITY_PREFIX, + .set = sockfs_security_xattr_set, +}; + static const struct xattr_handler *sockfs_xattr_handlers[] = { &sockfs_xattr_handler, + &sockfs_security_xattr_handler, NULL }; @@ -2038,6 +2053,8 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, if (err) break; ++datagrams; + if (msg_data_left(&msg_sys)) + break; cond_resched(); } diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 34dd7b26ee5f..62a482790937 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2753,14 +2753,18 @@ EXPORT_SYMBOL_GPL(rpc_cap_max_reconnect_timeout); void rpc_clnt_xprt_switch_put(struct rpc_clnt *clnt) { + rcu_read_lock(); xprt_switch_put(rcu_dereference(clnt->cl_xpi.xpi_xpswitch)); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_put); void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt) { + rcu_read_lock(); rpc_xprt_switch_add_xprt(rcu_dereference(clnt->cl_xpi.xpi_xpswitch), xprt); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_add_xprt); @@ -2770,9 +2774,8 @@ bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt, struct rpc_xprt_switch *xps; bool ret; - xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch); - rcu_read_lock(); + xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch); ret = rpc_xprt_switch_has_addr(xps, sap); rcu_read_unlock(); return ret; diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index c3f652395a80..3bc1d61694cb 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -1002,14 +1002,8 @@ static void svc_age_temp_xprts(unsigned long closure) void svc_age_temp_xprts_now(struct svc_serv *serv, struct sockaddr *server_addr) { struct svc_xprt *xprt; - struct svc_sock *svsk; - struct socket *sock; struct list_head *le, *next; LIST_HEAD(to_be_closed); - struct linger no_linger = { - .l_onoff = 1, - .l_linger = 0, - }; spin_lock_bh(&serv->sv_lock); list_for_each_safe(le, next, &serv->sv_tempsocks) { @@ -1027,10 +1021,7 @@ void svc_age_temp_xprts_now(struct svc_serv *serv, struct sockaddr *server_addr) list_del_init(le); xprt = list_entry(le, struct svc_xprt, xpt_list); dprintk("svc_age_temp_xprts_now: closing %p\n", xprt); - svsk = container_of(xprt, struct svc_sock, sk_xprt); - sock = svsk->sk_sock; - kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER, - (char *)&no_linger, sizeof(no_linger)); + xprt->xpt_ops->xpo_kill_temp_xprt(xprt); svc_close_xprt(xprt); } } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 57625f64efd5..a4bc98265d88 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -438,6 +438,21 @@ static int svc_tcp_has_wspace(struct svc_xprt *xprt) return !test_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); } +static void svc_tcp_kill_temp_xprt(struct svc_xprt *xprt) +{ + struct svc_sock *svsk; + struct socket *sock; + struct linger no_linger = { + .l_onoff = 1, + .l_linger = 0, + }; + + svsk = container_of(xprt, struct svc_sock, sk_xprt); + sock = svsk->sk_sock; + kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER, + (char *)&no_linger, sizeof(no_linger)); +} + /* * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo */ @@ -648,6 +663,10 @@ static struct svc_xprt *svc_udp_accept(struct svc_xprt *xprt) return NULL; } +static void svc_udp_kill_temp_xprt(struct svc_xprt *xprt) +{ +} + static struct svc_xprt *svc_udp_create(struct svc_serv *serv, struct net *net, struct sockaddr *sa, int salen, @@ -667,6 +686,7 @@ static struct svc_xprt_ops svc_udp_ops = { .xpo_has_wspace = svc_udp_has_wspace, .xpo_accept = svc_udp_accept, .xpo_secure_port = svc_sock_secure_port, + .xpo_kill_temp_xprt = svc_udp_kill_temp_xprt, }; static struct svc_xprt_class svc_udp_class = { @@ -1242,6 +1262,7 @@ static struct svc_xprt_ops svc_tcp_ops = { .xpo_has_wspace = svc_tcp_has_wspace, .xpo_accept = svc_tcp_accept, .xpo_secure_port = svc_sock_secure_port, + .xpo_kill_temp_xprt = svc_tcp_kill_temp_xprt, }; static struct svc_xprt_class svc_tcp_class = { diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 210949562786..26b26beef2d4 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -44,18 +44,20 @@ * being done. * * When the underlying transport disconnects, MRs are left in one of - * three states: + * four states: * * INVALID: The MR was not in use before the QP entered ERROR state. - * (Or, the LOCAL_INV WR has not completed or flushed yet). - * - * STALE: The MR was being registered or unregistered when the QP - * entered ERROR state, and the pending WR was flushed. * * VALID: The MR was registered before the QP entered ERROR state. * - * When frwr_op_map encounters STALE and VALID MRs, they are recovered - * with ib_dereg_mr and then are re-initialized. Beause MR recovery + * FLUSHED_FR: The MR was being registered when the QP entered ERROR + * state, and the pending WR was flushed. + * + * FLUSHED_LI: The MR was being invalidated when the QP entered ERROR + * state, and the pending WR was flushed. + * + * When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered + * with ib_dereg_mr and then are re-initialized. Because MR recovery * allocates fresh resources, it is deferred to a workqueue, and the * recovered MRs are placed back on the rb_mws list when recovery is * complete. frwr_op_map allocates another MR for the current RPC while @@ -177,12 +179,15 @@ __frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) static void frwr_op_recover_mr(struct rpcrdma_mw *mw) { + enum rpcrdma_frmr_state state = mw->frmr.fr_state; struct rpcrdma_xprt *r_xprt = mw->mw_xprt; struct rpcrdma_ia *ia = &r_xprt->rx_ia; int rc; rc = __frwr_reset_mr(ia, mw); - ib_dma_unmap_sg(ia->ri_device, mw->mw_sg, mw->mw_nents, mw->mw_dir); + if (state != FRMR_FLUSHED_LI) + ib_dma_unmap_sg(ia->ri_device, + mw->mw_sg, mw->mw_nents, mw->mw_dir); if (rc) goto out_release; @@ -262,10 +267,8 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt) } static void -__frwr_sendcompletion_flush(struct ib_wc *wc, struct rpcrdma_frmr *frmr, - const char *wr) +__frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr) { - frmr->fr_state = FRMR_IS_STALE; if (wc->status != IB_WC_WR_FLUSH_ERR) pr_err("rpcrdma: %s: %s (%u/0x%x)\n", wr, ib_wc_status_msg(wc->status), @@ -288,7 +291,8 @@ frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) if (wc->status != IB_WC_SUCCESS) { cqe = wc->wr_cqe; frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe); - __frwr_sendcompletion_flush(wc, frmr, "fastreg"); + frmr->fr_state = FRMR_FLUSHED_FR; + __frwr_sendcompletion_flush(wc, "fastreg"); } } @@ -308,7 +312,8 @@ frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) if (wc->status != IB_WC_SUCCESS) { cqe = wc->wr_cqe; frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe); - __frwr_sendcompletion_flush(wc, frmr, "localinv"); + frmr->fr_state = FRMR_FLUSHED_LI; + __frwr_sendcompletion_flush(wc, "localinv"); } } @@ -328,8 +333,10 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) /* WARNING: Only wr_cqe and status are reliable at this point */ cqe = wc->wr_cqe; frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe); - if (wc->status != IB_WC_SUCCESS) - __frwr_sendcompletion_flush(wc, frmr, "localinv"); + if (wc->status != IB_WC_SUCCESS) { + frmr->fr_state = FRMR_FLUSHED_LI; + __frwr_sendcompletion_flush(wc, "localinv"); + } complete(&frmr->fr_linv_done); } diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 6864fb967038..1334de2715c2 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -67,6 +67,7 @@ static void svc_rdma_detach(struct svc_xprt *xprt); static void svc_rdma_free(struct svc_xprt *xprt); static int svc_rdma_has_wspace(struct svc_xprt *xprt); static int svc_rdma_secure_port(struct svc_rqst *); +static void svc_rdma_kill_temp_xprt(struct svc_xprt *); static struct svc_xprt_ops svc_rdma_ops = { .xpo_create = svc_rdma_create, @@ -79,6 +80,7 @@ static struct svc_xprt_ops svc_rdma_ops = { .xpo_has_wspace = svc_rdma_has_wspace, .xpo_accept = svc_rdma_accept, .xpo_secure_port = svc_rdma_secure_port, + .xpo_kill_temp_xprt = svc_rdma_kill_temp_xprt, }; struct svc_xprt_class svc_rdma_class = { @@ -1317,6 +1319,10 @@ static int svc_rdma_secure_port(struct svc_rqst *rqstp) return 1; } +static void svc_rdma_kill_temp_xprt(struct svc_xprt *xprt) +{ +} + int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) { struct ib_send_wr *bad_wr, *n_wr; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 0d35b761c883..6e1bba358203 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -216,7 +216,8 @@ struct rpcrdma_rep { enum rpcrdma_frmr_state { FRMR_IS_INVALID, /* ready to be used */ FRMR_IS_VALID, /* in use */ - FRMR_IS_STALE, /* failed completion */ + FRMR_FLUSHED_FR, /* flushed FASTREG WR */ + FRMR_FLUSHED_LI, /* flushed LOCALINV WR */ }; struct rpcrdma_frmr { diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 975dbeb60ab0..52d74760fb68 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -421,6 +421,10 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, dev = dev_get_by_name(net, driver_name); if (!dev) return -ENODEV; + if (tipc_mtu_bad(dev, 0)) { + dev_put(dev); + return -EINVAL; + } /* Associate TIPC bearer with L2 bearer */ rcu_assign_pointer(b->media_ptr, dev); @@ -610,8 +614,6 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, if (!b) return NOTIFY_DONE; - b->mtu = dev->mtu; - switch (evt) { case NETDEV_CHANGE: if (netif_carrier_ok(dev)) @@ -624,6 +626,11 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, tipc_reset_bearer(net, b); break; case NETDEV_CHANGEMTU: + if (tipc_mtu_bad(dev, 0)) { + bearer_disable(net, b); + break; + } + b->mtu = dev->mtu; tipc_reset_bearer(net, b); break; case NETDEV_CHANGEADDR: diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 78892e2f53e3..278ff7f616f9 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -39,6 +39,7 @@ #include "netlink.h" #include "core.h" +#include "msg.h" #include <net/genetlink.h> #define MAX_MEDIA 3 @@ -59,6 +60,9 @@ #define TIPC_MEDIA_TYPE_IB 2 #define TIPC_MEDIA_TYPE_UDP 3 +/* minimum bearer MTU */ +#define TIPC_MIN_BEARER_MTU (MAX_H_SIZE + INT_H_SIZE) + /** * struct tipc_media_addr - destination address used by TIPC bearers * @value: address info (format defined by media) @@ -215,4 +219,13 @@ void tipc_bearer_xmit(struct net *net, u32 bearer_id, void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, struct sk_buff_head *xmitq); +/* check if device MTU is too low for tipc headers */ +static inline bool tipc_mtu_bad(struct net_device *dev, unsigned int reserve) +{ + if (dev->mtu >= TIPC_MIN_BEARER_MTU + reserve) + return false; + netdev_warn(dev, "MTU too low for tipc bearer\n"); + return true; +} + #endif /* _TIPC_BEARER_H */ diff --git a/net/tipc/link.c b/net/tipc/link.c index 1055164c6232..bda89bf9f4ff 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -47,8 +47,8 @@ #include <linux/pkt_sched.h> struct tipc_stats { - u32 sent_info; /* used in counting # sent packets */ - u32 recv_info; /* used in counting # recv'd packets */ + u32 sent_pkts; + u32 recv_pkts; u32 sent_states; u32 recv_states; u32 sent_probes; @@ -857,7 +857,6 @@ void tipc_link_reset(struct tipc_link *l) l->acked = 0; l->silent_intv_cnt = 0; l->rst_cnt = 0; - l->stats.recv_info = 0; l->stale_count = 0; l->bc_peer_is_up = false; memset(&l->mon_state, 0, sizeof(l->mon_state)); @@ -888,6 +887,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, struct sk_buff_head *transmq = &l->transmq; struct sk_buff_head *backlogq = &l->backlogq; struct sk_buff *skb, *_skb, *bskb; + int pkt_cnt = skb_queue_len(list); /* Match msg importance against this and all higher backlog limits: */ if (!skb_queue_empty(backlogq)) { @@ -901,6 +901,11 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, return -EMSGSIZE; } + if (pkt_cnt > 1) { + l->stats.sent_fragmented++; + l->stats.sent_fragments += pkt_cnt; + } + /* Prepare each packet for sending, and add to relevant queue: */ while (skb_queue_len(list)) { skb = skb_peek(list); @@ -920,6 +925,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, __skb_queue_tail(xmitq, _skb); TIPC_SKB_CB(skb)->ackers = l->ackers; l->rcv_unacked = 0; + l->stats.sent_pkts++; seqno++; continue; } @@ -968,6 +974,7 @@ void tipc_link_advance_backlog(struct tipc_link *l, struct sk_buff_head *xmitq) msg_set_ack(hdr, ack); msg_set_bcast_ack(hdr, bc_ack); l->rcv_unacked = 0; + l->stats.sent_pkts++; seqno++; } l->snd_nxt = seqno; @@ -1260,7 +1267,7 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, /* Deliver packet */ l->rcv_nxt++; - l->stats.recv_info++; + l->stats.recv_pkts++; if (!tipc_data_input(l, skb, l->inputq)) rc |= tipc_link_input(l, skb, l->inputq); if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN)) @@ -1492,8 +1499,9 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) l->tolerance = peers_tol; - if (peers_prio && in_range(peers_prio, TIPC_MIN_LINK_PRI, - TIPC_MAX_LINK_PRI)) { + /* Update own prio if peer indicates a different value */ + if ((peers_prio != l->priority) && + in_range(peers_prio, 1, TIPC_MAX_LINK_PRI)) { l->priority = peers_prio; rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT); } @@ -1799,10 +1807,6 @@ void tipc_link_set_queue_limits(struct tipc_link *l, u32 win) void tipc_link_reset_stats(struct tipc_link *l) { memset(&l->stats, 0, sizeof(l->stats)); - if (!link_is_bc_sndlink(l)) { - l->stats.sent_info = l->snd_nxt; - l->stats.recv_info = l->rcv_nxt; - } } static void link_print(struct tipc_link *l, const char *str) @@ -1866,12 +1870,12 @@ static int __tipc_nl_add_stats(struct sk_buff *skb, struct tipc_stats *s) }; struct nla_map map[] = { - {TIPC_NLA_STATS_RX_INFO, s->recv_info}, + {TIPC_NLA_STATS_RX_INFO, 0}, {TIPC_NLA_STATS_RX_FRAGMENTS, s->recv_fragments}, {TIPC_NLA_STATS_RX_FRAGMENTED, s->recv_fragmented}, {TIPC_NLA_STATS_RX_BUNDLES, s->recv_bundles}, {TIPC_NLA_STATS_RX_BUNDLED, s->recv_bundled}, - {TIPC_NLA_STATS_TX_INFO, s->sent_info}, + {TIPC_NLA_STATS_TX_INFO, 0}, {TIPC_NLA_STATS_TX_FRAGMENTS, s->sent_fragments}, {TIPC_NLA_STATS_TX_FRAGMENTED, s->sent_fragmented}, {TIPC_NLA_STATS_TX_BUNDLES, s->sent_bundles}, @@ -1946,9 +1950,9 @@ int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_LINK_MTU, link->mtu)) goto attr_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, link->rcv_nxt)) + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, link->stats.recv_pkts)) goto attr_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, link->snd_nxt)) + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, link->stats.sent_pkts)) goto attr_msg_full; if (tipc_link_is_up(link)) @@ -2003,12 +2007,12 @@ static int __tipc_nl_add_bc_link_stat(struct sk_buff *skb, }; struct nla_map map[] = { - {TIPC_NLA_STATS_RX_INFO, stats->recv_info}, + {TIPC_NLA_STATS_RX_INFO, stats->recv_pkts}, {TIPC_NLA_STATS_RX_FRAGMENTS, stats->recv_fragments}, {TIPC_NLA_STATS_RX_FRAGMENTED, stats->recv_fragmented}, {TIPC_NLA_STATS_RX_BUNDLES, stats->recv_bundles}, {TIPC_NLA_STATS_RX_BUNDLED, stats->recv_bundled}, - {TIPC_NLA_STATS_TX_INFO, stats->sent_info}, + {TIPC_NLA_STATS_TX_INFO, stats->sent_pkts}, {TIPC_NLA_STATS_TX_FRAGMENTS, stats->sent_fragments}, {TIPC_NLA_STATS_TX_FRAGMENTED, stats->sent_fragmented}, {TIPC_NLA_STATS_TX_BUNDLES, stats->sent_bundles}, @@ -2075,9 +2079,9 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) goto attr_msg_full; if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, bcl->name)) goto attr_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, bcl->rcv_nxt)) + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, 0)) goto attr_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, bcl->snd_nxt)) + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, 0)) goto attr_msg_full; prop = nla_nest_start(msg->skb, TIPC_NLA_LINK_PROP); diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index ed97a5876ebe..9e109bb1a207 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c @@ -455,14 +455,14 @@ void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr, int i, applied_bef; state->probing = false; - if (!dlen) - return; /* Sanity check received domain record */ - if ((dlen < new_dlen) || ntohs(arrv_dom->len) != new_dlen) { - pr_warn_ratelimited("Received illegal domain record\n"); + if (dlen < dom_rec_len(arrv_dom, 0)) + return; + if (dlen != dom_rec_len(arrv_dom, new_member_cnt)) + return; + if ((dlen < new_dlen) || ntohs(arrv_dom->len) != new_dlen) return; - } /* Synch generation numbers with peer if link just came up */ if (!state->synched) { diff --git a/net/tipc/socket.c b/net/tipc/socket.c index f9f5f3c3dab5..41f013888f07 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1,7 +1,7 @@ /* * net/tipc/socket.c: TIPC socket API * - * Copyright (c) 2001-2007, 2012-2015, Ericsson AB + * Copyright (c) 2001-2007, 2012-2016, Ericsson AB * Copyright (c) 2004-2008, 2010-2013, Wind River Systems * All rights reserved. * @@ -129,54 +129,8 @@ static const struct proto_ops packet_ops; static const struct proto_ops stream_ops; static const struct proto_ops msg_ops; static struct proto tipc_proto; - static const struct rhashtable_params tsk_rht_params; -/* - * Revised TIPC socket locking policy: - * - * Most socket operations take the standard socket lock when they start - * and hold it until they finish (or until they need to sleep). Acquiring - * this lock grants the owner exclusive access to the fields of the socket - * data structures, with the exception of the backlog queue. A few socket - * operations can be done without taking the socket lock because they only - * read socket information that never changes during the life of the socket. - * - * Socket operations may acquire the lock for the associated TIPC port if they - * need to perform an operation on the port. If any routine needs to acquire - * both the socket lock and the port lock it must take the socket lock first - * to avoid the risk of deadlock. - * - * The dispatcher handling incoming messages cannot grab the socket lock in - * the standard fashion, since invoked it runs at the BH level and cannot block. - * Instead, it checks to see if the socket lock is currently owned by someone, - * and either handles the message itself or adds it to the socket's backlog - * queue; in the latter case the queued message is processed once the process - * owning the socket lock releases it. - * - * NOTE: Releasing the socket lock while an operation is sleeping overcomes - * the problem of a blocked socket operation preventing any other operations - * from occurring. However, applications must be careful if they have - * multiple threads trying to send (or receive) on the same socket, as these - * operations might interfere with each other. For example, doing a connect - * and a receive at the same time might allow the receive to consume the - * ACK message meant for the connect. While additional work could be done - * to try and overcome this, it doesn't seem to be worthwhile at the present. - * - * NOTE: Releasing the socket lock while an operation is sleeping also ensures - * that another operation that must be performed in a non-blocking manner is - * not delayed for very long because the lock has already been taken. - * - * NOTE: This code assumes that certain fields of a port/socket pair are - * constant over its lifetime; such fields can be examined without taking - * the socket lock and/or port lock, and do not need to be re-read even - * after resuming processing after waiting. These fields include: - * - socket type - * - pointer to socket sk structure (aka tipc_sock structure) - * - pointer to port structure - * - port reference - */ - static u32 tsk_own_node(struct tipc_sock *tsk) { return msg_prevnode(&tsk->phdr); @@ -232,7 +186,7 @@ static struct tipc_sock *tipc_sk(const struct sock *sk) static bool tsk_conn_cong(struct tipc_sock *tsk) { - return tsk->snt_unacked >= tsk->snd_win; + return tsk->snt_unacked > tsk->snd_win; } /* tsk_blocks(): translate a buffer size in bytes to number of diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 78cab9c5a445..b58dc95f3d35 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -697,6 +697,11 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, udp_conf.local_ip.s_addr = htonl(INADDR_ANY); udp_conf.use_udp_checksums = false; ub->ifindex = dev->ifindex; + if (tipc_mtu_bad(dev, sizeof(struct iphdr) + + sizeof(struct udphdr))) { + err = -EINVAL; + goto err; + } b->mtu = dev->mtu - sizeof(struct iphdr) - sizeof(struct udphdr); #if IS_ENABLED(CONFIG_IPV6) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 145082e2ba36..2358f2690ec5 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2199,7 +2199,8 @@ out: * Sleep until more data has arrived. But check for races.. */ static long unix_stream_data_wait(struct sock *sk, long timeo, - struct sk_buff *last, unsigned int last_len) + struct sk_buff *last, unsigned int last_len, + bool freezable) { struct sk_buff *tail; DEFINE_WAIT(wait); @@ -2220,7 +2221,10 @@ static long unix_stream_data_wait(struct sock *sk, long timeo, sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); unix_state_unlock(sk); - timeo = freezable_schedule_timeout(timeo); + if (freezable) + timeo = freezable_schedule_timeout(timeo); + else + timeo = schedule_timeout(timeo); unix_state_lock(sk); if (sock_flag(sk, SOCK_DEAD)) @@ -2250,7 +2254,8 @@ struct unix_stream_read_state { unsigned int splice_flags; }; -static int unix_stream_read_generic(struct unix_stream_read_state *state) +static int unix_stream_read_generic(struct unix_stream_read_state *state, + bool freezable) { struct scm_cookie scm; struct socket *sock = state->socket; @@ -2330,7 +2335,7 @@ again: mutex_unlock(&u->iolock); timeo = unix_stream_data_wait(sk, timeo, last, - last_len); + last_len, freezable); if (signal_pending(current)) { err = sock_intr_errno(timeo); @@ -2472,7 +2477,7 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, .flags = flags }; - return unix_stream_read_generic(&state); + return unix_stream_read_generic(&state, true); } static int unix_stream_splice_actor(struct sk_buff *skb, @@ -2503,7 +2508,7 @@ static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos, flags & SPLICE_F_NONBLOCK) state.flags = MSG_DONTWAIT; - return unix_stream_read_generic(&state); + return unix_stream_read_generic(&state, false); } static int unix_shutdown(struct socket *sock, int mode) @@ -2812,7 +2817,8 @@ static int unix_seq_show(struct seq_file *seq, void *v) i++; } for ( ; i < len; i++) - seq_putc(seq, u->addr->name->sun_path[i]); + seq_putc(seq, u->addr->name->sun_path[i] ?: + '@'); } unix_state_unlock(s); seq_putc(seq, '\n'); diff --git a/net/wireless/core.h b/net/wireless/core.h index 08d2e948c9ad..f0c0c8a48c92 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -71,6 +71,7 @@ struct cfg80211_registered_device { struct list_head bss_list; struct rb_root bss_tree; u32 bss_generation; + u32 bss_entries; struct cfg80211_scan_request *scan_req; /* protected by RTNL */ struct sk_buff *scan_msg; struct cfg80211_sched_scan_request __rcu *sched_scan_req; diff --git a/net/wireless/scan.c b/net/wireless/scan.c index b5bd58d0f731..35ad69fd0838 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -57,6 +57,19 @@ * also linked into the probe response struct. */ +/* + * Limit the number of BSS entries stored in mac80211. Each one is + * a bit over 4k at most, so this limits to roughly 4-5M of memory. + * If somebody wants to really attack this though, they'd likely + * use small beacons, and only one type of frame, limiting each of + * the entries to a much smaller size (in order to generate more + * entries in total, so overhead is bigger.) + */ +static int bss_entries_limit = 1000; +module_param(bss_entries_limit, int, 0644); +MODULE_PARM_DESC(bss_entries_limit, + "limit to number of scan BSS entries (per wiphy, default 1000)"); + #define IEEE80211_SCAN_RESULT_EXPIRE (30 * HZ) static void bss_free(struct cfg80211_internal_bss *bss) @@ -137,6 +150,10 @@ static bool __cfg80211_unlink_bss(struct cfg80211_registered_device *rdev, list_del_init(&bss->list); rb_erase(&bss->rbn, &rdev->bss_tree); + rdev->bss_entries--; + WARN_ONCE((rdev->bss_entries == 0) ^ list_empty(&rdev->bss_list), + "rdev bss entries[%d]/list[empty:%d] corruption\n", + rdev->bss_entries, list_empty(&rdev->bss_list)); bss_ref_put(rdev, bss); return true; } @@ -163,6 +180,40 @@ static void __cfg80211_bss_expire(struct cfg80211_registered_device *rdev, rdev->bss_generation++; } +static bool cfg80211_bss_expire_oldest(struct cfg80211_registered_device *rdev) +{ + struct cfg80211_internal_bss *bss, *oldest = NULL; + bool ret; + + lockdep_assert_held(&rdev->bss_lock); + + list_for_each_entry(bss, &rdev->bss_list, list) { + if (atomic_read(&bss->hold)) + continue; + + if (!list_empty(&bss->hidden_list) && + !bss->pub.hidden_beacon_bss) + continue; + + if (oldest && time_before(oldest->ts, bss->ts)) + continue; + oldest = bss; + } + + if (WARN_ON(!oldest)) + return false; + + /* + * The callers make sure to increase rdev->bss_generation if anything + * gets removed (and a new entry added), so there's no need to also do + * it here. + */ + + ret = __cfg80211_unlink_bss(rdev, oldest); + WARN_ON(!ret); + return ret; +} + void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool send_message) { @@ -689,6 +740,7 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *rdev, const u8 *ie; int i, ssidlen; u8 fold = 0; + u32 n_entries = 0; ies = rcu_access_pointer(new->pub.beacon_ies); if (WARN_ON(!ies)) @@ -712,6 +764,12 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *rdev, /* This is the bad part ... */ list_for_each_entry(bss, &rdev->bss_list, list) { + /* + * we're iterating all the entries anyway, so take the + * opportunity to validate the list length accounting + */ + n_entries++; + if (!ether_addr_equal(bss->pub.bssid, new->pub.bssid)) continue; if (bss->pub.channel != new->pub.channel) @@ -740,6 +798,10 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *rdev, new->pub.beacon_ies); } + WARN_ONCE(n_entries != rdev->bss_entries, + "rdev bss entries[%d]/list[len:%d] corruption\n", + rdev->bss_entries, n_entries); + return true; } @@ -894,7 +956,14 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, } } + if (rdev->bss_entries >= bss_entries_limit && + !cfg80211_bss_expire_oldest(rdev)) { + kfree(new); + goto drop; + } + list_add_tail(&new->list, &rdev->bss_list); + rdev->bss_entries++; rb_insert_bss(rdev, new); found = new; } diff --git a/net/wireless/util.c b/net/wireless/util.c index 5ea12afc7706..659b507b347d 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1158,7 +1158,8 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate) 58500000, 65000000, 78000000, - 0, + /* not in the spec, but some devices use this: */ + 86500000, }, { 13500000, 27000000, diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index fd6986634e6f..5bf7e1bfeac7 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1268,12 +1268,14 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, err = security_xfrm_policy_lookup(pol->security, fl->flowi_secid, policy_to_flow_dir(dir)); - if (!err && !xfrm_pol_hold_rcu(pol)) - goto again; - else if (err == -ESRCH) + if (!err) { + if (!xfrm_pol_hold_rcu(pol)) + goto again; + } else if (err == -ESRCH) { pol = NULL; - else + } else { pol = ERR_PTR(err); + } } else pol = NULL; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 08892091cfe3..671a1d0333f0 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2450,7 +2450,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) #ifdef CONFIG_COMPAT if (in_compat_syscall()) - return -ENOTSUPP; + return -EOPNOTSUPP; #endif type = nlh->nlmsg_type; |